doc/v614/Cpu_2LossFunctions_8cxx_source.html

 // @(#)root/tmva/tmva/dnn:$Id$
 // Author: Simon Pfreundschuh 20/07/16

 /*************************************************************************
  * Copyright (C) 2016, Simon Pfreundschuh                                *
  * All rights reserved.                                                  *
  *                                                                       *
  * For the licensing terms see $ROOTSYS/LICENSE.                         *
  * For the list of contributors see $ROOTSYS/README/CREDITS.             *
  *************************************************************************/

  /////////////////////////////////////////////////////////////////////
  // Implementation of the loss functions for the multi-threaded CPU //
  // implementation using Roots TThreadExecutor and BLAS.                 //
  /////////////////////////////////////////////////////////////////////

 #include "TMVA/DNN/Architectures/Reference.h"

 namespace TMVA
 {
 namespace DNN
 {

 //______________________________________________________________________________
 template <typename AFloat>
 AFloat TCpu<AFloat>::MeanSquaredError(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,
                                       const TCpuMatrix<AFloat> &weights)
 {
    const AFloat *dataY = Y.GetRawDataPointer();
    const AFloat *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();
    std::vector<AFloat> temp(Y.GetNElements());
    size_t m = Y.GetNrows();
    AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());

    auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {
       AFloat dy = dataY[workerID] - dataOutput[workerID];
       temp[workerID] = dataWeights[workerID % m] * dy * dy;
       return 0;
    };

    auto reduction = [](const std::vector<AFloat> & v )
    {
       return std::accumulate(v.begin(),v.end(),AFloat{});
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
    return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
 }

 //______________________________________________________________________________
 template <typename AFloat>
 void TCpu<AFloat>::MeanSquaredErrorGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,
                                              const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)
 {

          AFloat  *dataDY     = dY.GetRawDataPointer();
    const AFloat  *dataY      = Y.GetRawDataPointer();
    const AFloat  *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();

    size_t m = Y.GetNrows();
    AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());

    auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {
       dataDY[workerID] = -2.0 * norm * (dataY[workerID] - dataOutput[workerID]);
       dataDY[workerID] *= dataWeights[workerID % m];
       return 0;
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
 }

 //______________________________________________________________________________
 template <typename AFloat>
 AFloat TCpu<AFloat>::CrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,
                                   const TCpuMatrix<AFloat> &weights)
 {
    const AFloat *dataY = Y.GetRawDataPointer();
    const AFloat *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();
    std::vector<AFloat> temp(Y.GetNElements());

    size_t m = Y.GetNrows();
    AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());

    auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {
       AFloat y   = dataY[workerID];
       AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
       temp[workerID] = - (y * log(sig) + (1.0 - y) * log(1.0 - sig));
       temp[workerID] *= dataWeights[workerID % m];
       return 0;
    };

    auto reduction = [](const std::vector<AFloat> & v )
    {
       return std::accumulate(v.begin(),v.end(),AFloat{});
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
    return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
 }

 //______________________________________________________________________________
 template <typename AFloat>
 void TCpu<AFloat>::CrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,
                                          const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)
 {
          AFloat  *dataDY     = dY.GetRawDataPointer();
    const AFloat  *dataY      = Y.GetRawDataPointer();
    const AFloat  *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();

    size_t m = Y.GetNrows();
    AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());

    auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {
       AFloat y   = dataY[workerID];
       AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
       dataDY[workerID] = norm * (sig - y);
       dataDY[workerID] *= dataWeights[workerID % m];
       return 0;
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNElements()));
 }

 //______________________________________________________________________________
 template <typename AFloat>
 AFloat TCpu<AFloat>::SoftmaxCrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,
                                          const TCpuMatrix<AFloat> &weights)
 {
    const AFloat  *dataY      = Y.GetRawDataPointer();
    const AFloat  *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();

    std::vector<AFloat> temp(Y.GetNrows());
    size_t m = Y.GetNrows();
    size_t n = Y.GetNcols();
    AFloat norm = 1.0 / ((AFloat) m);

    auto f = [&dataY, &dataOutput, &dataWeights, &temp, n, m](UInt_t workerID) {
       AFloat sum = 0.0;
       for (size_t j = 0; j < n; j++) {
          sum += exp(dataOutput[workerID + j * m]);
       }
       for (size_t j = 0; j < n; j++) {
          temp[workerID] -=
             dataY[workerID + j * m] * log(exp(dataOutput[workerID + j * m]) / sum);
       }
       temp[workerID] *= dataWeights[workerID];
       return 0;
    };

    auto reduction = [](const std::vector<AFloat> & v )
    {
       return std::accumulate(v.begin(),v.end(),AFloat{});
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));
    return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
 }

 //______________________________________________________________________________
 template <typename AFloat>
 void TCpu<AFloat>::SoftmaxCrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,
                                                 const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)
 {
          AFloat  *dataDY     = dY.GetRawDataPointer();
    const AFloat  *dataY      = Y.GetRawDataPointer();
    const AFloat  *dataOutput = output.GetRawDataPointer();
    const AFloat *dataWeights = weights.GetRawDataPointer();

    size_t m = Y.GetNrows();
    size_t n = Y.GetNcols();
    AFloat norm = 1.0 / ((AFloat) m);

    auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, norm, n, m](UInt_t workerID) {
       AFloat sum  = 0.0;
       AFloat sumY = 0.0;
       AFloat weight = dataWeights[workerID];
       for (size_t j = 0; j < n; j++) {
          sum  += exp(dataOutput[workerID + j * m]);
          sumY += dataY[workerID + j * m];
       }
       for (size_t j = 0; j < n; j++) {
          dataDY[workerID + j * m] =
             norm * (exp(dataOutput[workerID + j * m]) / sum * sumY - dataY[workerID + j * m]);
          dataDY[workerID + j * m] *= weight;
       }
       return 0;
    };

    Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));
 }

 } // namespace DNN
 } // namespace TMVA
sum
static long int sum(long int i)
Definition: Factory.cxx:2258

TMVA::DNN::TCpu::MeanSquaredErrorGradients
static void MeanSquaredErrorGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:53

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:72

m
auto * m
Definition: textangle.C:8

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:127

TMVA::DNN::TCpu::CrossEntropy
static Scalar_t CrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.cxx:76

f
#define f(i)
Definition: RSha256.hxx:104

Reference.h

TMVA::DNN::TCpu::MeanSquaredError
static Scalar_t MeanSquaredError(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:26

TMVA::DNN::TCpuMatrix::GetNElements
size_t GetNElements() const
Definition: CpuMatrix.h:128

ROOT::TThreadExecutor::Reduce
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
Definition: TThreadExecutor.hxx:372

v
SVector< double, 2 > v
Definition: Dict.h:5

ClassificationKeras.output
output
Definition: ClassificationKeras.py:16

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:139

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:136

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

y
Double_t y[n]
Definition: legend1.C:17

TMVA::DNN::TCpu::SoftmaxCrossEntropy
static Scalar_t SoftmaxCrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.cxx:130

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

ROOT::TThreadExecutor::Map
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
Definition: TThreadExecutor.hxx:162

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:126

TMVA::DNN::TCpu::SoftmaxCrossEntropyGradients
static void SoftmaxCrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:166

exp
double exp(double)

TMVA::DNN::TCpu::CrossEntropyGradients
static void CrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:106

n
const Int_t n
Definition: legend1.C:16

log
double log(double)