11 #ifndef TMVA_DNN_MINIMIZERS    12 #define TMVA_DNN_MINIMIZERS    53 template<
typename Architecture_t>
    57    using Scalar_t = 
typename Architecture_t::Scalar_t;
    58    using Matrix_t = 
typename Architecture_t::Matrix_t;
    77                     size_t   convergenceSteps,
    83       fMinimumError = std::numeric_limits<Scalar_t>::infinity();
    84       fConvergenceCount = 0;
    90    template <
typename Data_t, 
typename Net_t>
    91    Scalar_t Train(
const Data_t & TrainingDataIn, 
size_t nTrainingSamples,
    92                   const Data_t & TestDataIn, 
size_t nTestSamples,
    93                   Net_t & net, 
size_t nThreads = 1);
    96    template <
typename Data_t, 
typename Net_t>
    98                           const Data_t & TestDataIn, 
size_t nTestSamples,
    99                           Net_t & net, 
Scalar_t momentum, 
size_t nThreads = 1);
   106    template <
typename Net_t>
   111    template <
typename Net_t>
   120    template <
typename Net_t>
   121    void Step(Net_t &master,
   122              std::vector<Net_t> &nets,
   126    template <
typename Net_t>
   128                      std::vector<Net_t> &nets,
   131    template <
typename Net_t>
   136                      std::vector<Net_t> &nets,
   143    template <
typename Net_t>
   148    template <
typename Net_t>
   174 template <
typename Architecture_t>
   183 template <
typename Architecture_t>
   192 template<
typename Architecture_t>
   193 template <
typename Data_t, 
typename Net_t>
   195                                                  size_t nTrainingSamples,
   196                                                  const Data_t & testData,
   208                                                    net.GetOutputWidth(), nThreads);
   209    auto testNet = net.CreateClone(nTestSamples);
   211                                                   testNet.GetBatchSize(),
   212                                                   testNet.GetInputWidth(),
   213                                                   net.GetOutputWidth());
   214    std::vector<Net_t> nets{};
   215    nets.reserve(nThreads);
   216    for (
size_t i = 0; i < nThreads; i++) {
   218        for (
size_t j = 0; j < net.GetDepth(); j++)
   220            auto &masterLayer = net.GetLayer(j);
   221            auto &layer = nets.back().GetLayer(j);
   223                                 masterLayer.GetWeights());
   225                                 masterLayer.GetBiases());
   229    size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
   230    std::vector<TBatch<Architecture_t>> batches{};
   231    batches.reserve(nThreads);
   235          trainLoader.Shuffle();
   236          for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
   238             for (
size_t j = 0; j < nThreads; j++) batches.push_back(trainLoader.GetBatch());
   239             Step(net, nets, batches);
   243       auto b = *testLoader.begin();
   244       auto inputMatrix = 
b.GetInput();
   245       auto outputMatrix = 
b.GetOutput();
   246       auto weightMatrix = 
b.GetWeights();
   247       fTestError = testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
   255 template<
typename Architecture_t>
   256 template <
typename Data_t, 
typename Net_t>
   258                                                      size_t nTrainingSamples,
   259                                                      const Data_t & testData,
   272                                                    net.GetOutputWidth(), nThreads);
   273    auto testNet = net.CreateClone(net.GetBatchSize());
   275                                                   testNet.GetBatchSize(),
   276                                                   testNet.GetInputWidth(),
   277                                                   net.GetOutputWidth());
   279    net.InitializeGradients();
   280    std::vector<Net_t> nets{};
   281    nets.reserve(nThreads);
   282    for (
size_t i = 0; i < nThreads; i++) {
   284        for (
size_t j = 0; j < net.GetDepth(); j++)
   286            auto &masterLayer = net.GetLayer(j);
   287            auto &layer = nets.back().GetLayer(j);
   289                                 masterLayer.GetWeights());
   291                                 masterLayer.GetBiases());
   295    size_t batchesInEpoch = nTrainingSamples / net.GetBatchSize();
   296    std::vector<TBatch<Architecture_t>> batches{};
   297    batches.reserve(nThreads);
   301          trainLoader.Shuffle();
   302          for (
size_t i = 0; i < batchesInEpoch; i += nThreads) {
   304             for (
size_t j = 0; j < nThreads; j++) batches.push_back(trainLoader.GetBatch());
   305             if (momentum != 0.0) {
   308                Step(net, nets, batches);
   314       for (
size_t i = 0; i < batchesInEpoch; i++) {
   315          auto b = testLoader.GetBatch();
   316          auto inputMatrix = 
b.GetInput();
   317          auto outputMatrix = 
b.GetOutput();
   318          auto weightMatrix = 
b.GetWeights();
   319          fTestError += testNet.Loss(inputMatrix, outputMatrix, weightMatrix);
   327 template <
typename Architecture_t>
   328 template <
typename Net_t>
   332    net.Forward(input, 
true);
   333    net.Backward(input, output, weights);
   335    for (
size_t i = 0; i < net.GetDepth(); i++)
   337       auto &layer = net.GetLayer(i);
   338       Architecture_t::ScaleAdd(layer.GetWeights(),
   339                                layer.GetWeightGradients(),
   341       Architecture_t::ScaleAdd(layer.GetBiases(),
   342                                layer.GetBiasGradients(),
   348 template <
typename Architecture_t>
   349 template <
typename Net_t>
   354    net.Backward(input, 
output);
   356    for (
size_t i = 0; i < net.GetDepth(); i++)
   358       auto &layer = net.GetLayer(i);
   359       Architecture_t::ScaleAdd(layer.GetWeights(),
   360                                layer.GetWeightGradients(),
   362       Architecture_t::ScaleAdd(layer.GetBiases(),
   363                                layer.GetBiasGradients(),
   370 template<
typename Architecture_t>
   371     template <
typename Net_t>
   374         std::vector<Net_t> & nets,
   377    typename Architecture_t::Matrix_t 
dummy(0,0);
   378    size_t depth = master.GetDepth();
   381    for (
size_t j = 0; j < nets.size(); j++) {
   382       nets[j].GetLayer(0).Forward(batches[j].GetInput(), 
true);
   385    for (
size_t i = 1; i < depth; i++)
   387       for (
size_t j = 0; j < nets.size(); j++) {
   388          nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(), 
true);
   392    for (
size_t j = 0; j < nets.size(); j++) {
   393       evaluateGradients<Architecture_t>(nets[j].GetLayer(depth - 1).GetActivationGradients(), nets[j].GetLossFunction(),
   394                                         batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
   395                                         batches[j].GetWeights());
   398    for (
size_t i = depth - 1; i > 0; i--)
   400       for (
size_t j = 0; j < nets.size(); j++) {
   401          nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
   402                                       nets[j].GetLayer(i-1).GetOutput(),
   403                                       nets[j].GetRegularization(),
   404                                       nets[j].GetWeightDecay());
   407    for (
size_t j = 0; j < nets.size(); j++) {
   408       nets[j].GetLayer(0).Backward(dummy,
   409                                    batches[j].GetInput(),
   410                                    nets[j].GetRegularization(),
   411                                    nets[j].GetWeightDecay());
   414    for (
size_t j = 0; j < nets.size(); j++) {
   415       for (
size_t i = 0; i < depth; i++)
   417          auto &masterLayer = master.GetLayer(i);
   418          auto &layer       = nets[j].GetLayer(i);
   419          Architecture_t::ScaleAdd(masterLayer.GetWeights(),
   420                                   layer.GetWeightGradients(),
   423                               masterLayer.GetWeights());
   424          Architecture_t::ScaleAdd(masterLayer.GetBiases(),
   425                                   layer.GetBiasGradients(),
   428                               masterLayer.GetBiases());
   434 template<
typename Architecture_t>
   435 template <
typename Net_t>
   438         std::vector<Net_t> & nets,
   442    typename Architecture_t::Matrix_t 
dummy(0,0);
   443    size_t depth = master.GetDepth();
   446    for (
size_t j = 0; j < nets.size(); j++) {
   447       nets[j].GetLayer(0).Forward(batches[j].GetInput(), 
true);
   450    for (
size_t i = 1; i < depth; i++)
   452       for (
size_t j = 0; j < nets.size(); j++) {
   453          nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(), 
true);
   457    for (
size_t j = 0; j < nets.size(); j++) {
   458       evaluateGradients<Architecture_t>(nets[j].GetLayer(depth - 1).GetActivationGradients(), nets[j].GetLossFunction(),
   459                                         batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
   460                                         batches[j].GetWeights());
   463    for (
size_t i = depth - 1; i > 0; i--)
   465       for (
size_t j = 0; j < nets.size(); j++) {
   466          nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
   467                                       nets[j].GetLayer(i-1).GetOutput(),
   468                                       nets[j].GetRegularization(),
   469                                       nets[j].GetWeightDecay());
   470          Architecture_t::ScaleAdd(master.GetLayer(i).GetWeightGradients(),
   471                                   nets[j].GetLayer(i).GetWeightGradients(),
   473          Architecture_t::ScaleAdd(master.GetLayer(i).GetBiasGradients(),
   474                                   nets[j].GetLayer(i).GetBiasGradients(),
   477       Architecture_t::ScaleAdd(master.GetLayer(i).GetWeightGradients(),
   478                                master.GetLayer(i).GetWeightGradients(),
   480       Architecture_t::ScaleAdd(master.GetLayer(i).GetBiasGradients(),
   481                                master.GetLayer(i).GetBiasGradients(),
   484    for (
size_t j = 0; j < nets.size(); j++) {
   485       nets[j].GetLayer(0).Backward(dummy,
   486                                    batches[j].GetInput(),
   487                                    nets[j].GetRegularization(),
   488                                    nets[j].GetWeightDecay());
   489       Architecture_t::ScaleAdd(master.GetLayer(0).GetWeightGradients(),
   490                                nets[j].GetLayer(0).GetWeightGradients(),
   492       Architecture_t::ScaleAdd(master.GetLayer(0).GetBiasGradients(),
   493                                nets[j].GetLayer(0).GetBiasGradients(),
   497    Architecture_t::ScaleAdd(master.GetLayer(0).GetWeightGradients(),
   498                             master.GetLayer(0).GetWeightGradients(),
   500    Architecture_t::ScaleAdd(master.GetLayer(0).GetBiasGradients(),
   501                             master.GetLayer(0).GetBiasGradients(),
   504    for (
size_t i = 0; i < depth; i++)
   506        auto &masterLayer = master.GetLayer(i);
   507        Architecture_t::ScaleAdd(masterLayer.GetWeights(),
   508                                 masterLayer.GetWeightGradients(),
   510        Architecture_t::ScaleAdd(masterLayer.GetBiases(),
   511                                 masterLayer.GetBiasGradients(),
   513        for (
size_t j = 0; j < nets.size(); j++) {
   514          auto &layer       = nets[j].GetLayer(i);
   516                               masterLayer.GetWeights());
   518                               masterLayer.GetBiases());
   524 template<
typename Architecture_t>
   525 template <
typename Net_t>
   528         std::vector<Net_t> & nets,
   532    typename Architecture_t::Matrix_t 
dummy(0,0);
   533    size_t depth = master.GetDepth();
   536    for (
size_t j = 0; j < nets.size(); j++) {
   537       nets[j].GetLayer(0).Forward(batches[j].GetInput(), 
true);
   540    for (
size_t i = 1; i < depth; i++)
   542       for (
size_t j = 0; j < nets.size(); j++) {
   543          nets[j].GetLayer(i).Forward(nets[j].GetLayer(i-1).GetOutput(), 
true);
   548    for (
size_t j = 0; j < nets.size(); j++) {
   549       evaluateGradients<Architecture_t>(nets[j].GetLayer(depth - 1).GetActivationGradients(), nets[j].GetLossFunction(),
   550                                         batches[j].GetOutput(), nets[j].GetLayer(depth - 1).GetOutput(),
   551                                         batches[j].GetWeights());
   555    for (
size_t i = depth - 1; i > 0; i--)
   557       for (
size_t j = 0; j < nets.size(); j++) {
   558          nets[j].GetLayer(i).Backward(nets[j].GetLayer(i-1).GetActivationGradients(),
   559                                       nets[j].GetLayer(i-1).GetOutput(),
   560                                       nets[j].GetRegularization(),
   561                                       nets[j].GetWeightDecay());
   565    for (
size_t j = 0; j < nets.size(); j++) {
   566       nets[j].GetLayer(0).Backward(dummy,
   567                                    batches[j].GetInput(),
   568                                    nets[j].GetRegularization(),
   569                                    nets[j].GetWeightDecay());
   572    for (
size_t i = 0; i < depth; i++)
   574       auto &masterLayer = master.GetLayer(i);
   575       for (
size_t j = 0; j < nets.size(); j++) {
   576          auto &layer       = nets[j].GetLayer(i);
   578                               masterLayer.GetWeights());
   580                               masterLayer.GetBiases());
   581          Architecture_t::ScaleAdd(layer.GetWeights(),
   582                                   masterLayer.GetWeightGradients(),
   584          Architecture_t::ScaleAdd(layer.GetBiases(),
   585                                   masterLayer.GetBiasGradients(),
   588       for (
size_t j = 0; j < nets.size(); j++) {
   589          auto &layer       = nets[j].GetLayer(i);
   590          Architecture_t::ScaleAdd(masterLayer.GetWeightGradients(),
   591                                   layer.GetWeightGradients(),
   593          Architecture_t::ScaleAdd(masterLayer.GetBiasGradients(),
   594                                   layer.GetBiasGradients(),
   597       Architecture_t::ScaleAdd(masterLayer.GetWeightGradients(),
   598                                masterLayer.GetWeightGradients(),
   600       Architecture_t::ScaleAdd(masterLayer.GetBiasGradients(),
   601                                masterLayer.GetBiasGradients(),
   603       Architecture_t::ScaleAdd(masterLayer.GetWeights(),
   604                                masterLayer.GetWeightGradients(),
   606       Architecture_t::ScaleAdd(masterLayer.GetBiases(),
   607                                masterLayer.GetBiasGradients(),
   613 template<
typename Architecture_t>
   614 template <
typename Net_t>
   620    net.Forward(input, 
true);
   621    net.Backward(input, output);
   623    for (
size_t i = 0; i < net.GetDepth(); i++)
   625       auto &layer = net.GetLayer(i);
   626       Architecture_t::ScaleAdd(layer.GetWeights(),
   627                                layer.GetWeightGradients(),
   630          Architecture_t::ScaleAdd(layer.GetBiases(),
   631                                   layer.GetBiasGradients(),
   638 template <
typename Architecture_t>
   639 template <
typename Net_t>
   646    net.Backward(input, 
output, weights);
   648    for (
size_t i = 0; i < net.GetDepth(); i++)
   650       auto &layer = net.GetLayer(i);
   651       Architecture_t::ScaleAdd(layer.GetWeights(),
   652                                layer.GetWeightGradients(),
   655          Architecture_t::ScaleAdd(layer.GetBiases(),
   656                                   layer.GetBiasGradients(),
   664 template<
typename Architecture_t>
   678 template<
typename Architecture_t>
 typename Architecture_t::Scalar_t Scalar_t
Scalar_t GetTrainingError() const
Scalar_t Train(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, size_t nThreads=1)
Train the given net using the given training input data (events), training output data (labels)...
Scalar_t TrainMomentum(const Data_t &TrainingDataIn, size_t nTrainingSamples, const Data_t &TestDataIn, size_t nTestSamples, Net_t &net, Scalar_t momentum, size_t nThreads=1)
Same as Train(...) but uses the given momentum. 
size_t fStepCount
Number of steps performed in the current training session. 
size_t fBatchSize
Batch size to use for the training. 
Scalar_t fMinimumError
The minimum loss achieved on the training set. 
void Step(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Perform a single optimization step on a given batch. 
Scalar_t GetTestError() const
size_t fConvergenceSteps
Number of training epochs without considerable. 
Scalar_t StepReducedWeightsLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Similar to StepReducedWeights(...) but also evaluates the loss. 
Scalar_t fTestError
Holds the most recently computed test loss. 
void SetBatchSize(Scalar_t rate)
void SetConvergenceSteps(size_t steps)
void StepReducedWeights(Net_t &net, Matrix_t &input, const Matrix_t &output)
Does not evaluate the loss and therefore not trigger a possible synchronization with the device...
size_t fConvergenceCount
Current number of training epochs without. 
size_t fTestInterval
Interval for the computation of the test error. 
Scalar_t StepLoss(Net_t &net, Matrix_t &input, const Matrix_t &output, const Matrix_t &weights)
Same as Step(...) but also evaluate the loss on the given training data. 
void Reset()
Reset minimizer object to default state. 
bool HasConverged()
Increases the minimization step counter by the test error evaluation period and uses the current inte...
void StepNesterov(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t >> &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses Nesterov momentum. 
void SetLearningRate(Scalar_t rate)
size_t GetTestInterval() const
void Copy(void *source, void *dest)
size_t GetConvergenceSteps() const
static RooMathCoreReg dummy
void StepMomentum(Net_t &master, std::vector< Net_t > &nets, std::vector< TBatch< Architecture_t >> &batches, Scalar_t momentum)
Same as the Step(...) method for multiple batches but uses momentum. 
Abstract ClassifierFactory template that handles arbitrary types. 
typename Architecture_t::Matrix_t Matrix_t
Scalar_t fLearningRate
Learning rate . 
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
void SetTestInterval(size_t interval)
size_t GetConvergenceCount() const
Scalar_t fTrainingError
Holds the most recently computed training loss.