doc/v616/Cpu_2LossFunctions_8cxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


 /////////////////////////////////////////////////////////////////////

 // Implementation of the loss functions for the multi-threaded CPU //

 // implementation using Roots TThreadExecutor and BLAS.                 //

 /////////////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Reference.h"


namespace TMVA

{

namespace DNN

{


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::MeanSquaredError(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                      const TCpuMatrix<AFloat> &weights)

{

   const AFloat *dataY = Y.GetRawDataPointer();

   const AFloat *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();

   std::vector<AFloat> temp(Y.GetNoElements());

   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {

      AFloat dy = dataY[workerID] - dataOutput[workerID];

      temp[workerID] = dataWeights[workerID % m] * dy * dy;

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::MeanSquaredErrorGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                             const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{


         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {

      dataDY[workerID] = -2.0 * norm * (dataY[workerID] - dataOutput[workerID]);

      dataDY[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

}


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::CrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                  const TCpuMatrix<AFloat> &weights)

{

   const AFloat *dataY = Y.GetRawDataPointer();

   const AFloat *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();

   std::vector<AFloat> temp(Y.GetNoElements());


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {

      AFloat y   = dataY[workerID];

      AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));

      temp[workerID] = - (y * log(sig) + (1.0 - y) * log(1.0 - sig));

      temp[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::CrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                         const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{

         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {

      AFloat y   = dataY[workerID];

      AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));

      dataDY[workerID] = norm * (sig - y);

      dataDY[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

}


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::SoftmaxCrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                         const TCpuMatrix<AFloat> &weights)

{

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   std::vector<AFloat> temp(Y.GetNrows());

   size_t m = Y.GetNrows();

   size_t n = Y.GetNcols();

   AFloat norm = 1.0 / ((AFloat) m);


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, n, m](UInt_t workerID) {

      AFloat sum = 0.0;

      for (size_t j = 0; j < n; j++) {

         sum += exp(dataOutput[workerID + j * m]);

      }

      for (size_t j = 0; j < n; j++) {

         temp[workerID] -=

            dataY[workerID + j * m] * log(exp(dataOutput[workerID + j * m]) / sum);

      }

      temp[workerID] *= dataWeights[workerID];

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::SoftmaxCrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                                const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{

         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   size_t n = Y.GetNcols();

   AFloat norm = 1.0 / ((AFloat) m);


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, norm, n, m](UInt_t workerID) {

      AFloat sum  = 0.0;

      AFloat sumY = 0.0;

      AFloat weight = dataWeights[workerID];

      for (size_t j = 0; j < n; j++) {

         sum  += exp(dataOutput[workerID + j * m]);

         sumY += dataY[workerID + j * m];

      }

      for (size_t j = 0; j < n; j++) {

         dataDY[workerID + j * m] =

            norm * (exp(dataOutput[workerID + j * m]) / sum * sumY - dataY[workerID + j * m]);

         dataDY[workerID + j * m] *= weight;

      }

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));

}


} // namespace DNN

} // namespace TMVA

v
SVector< double, 2 > v
Definition: Dict.h:5

f
#define f(i)
Definition: RSha256.hxx:104

Reference.h

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

exp
double exp(double)

log
double log(double)

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

ROOT::TThreadExecutor::Map
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
Definition: TThreadExecutor.hxx:215

ROOT::TThreadExecutor::Reduce
auto Reduce(const std::vector< T > &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
"Reduce" an std::vector into a single object in parallel by passing a binary operator as the second a...
Definition: TThreadExecutor.hxx:442

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:89

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:143

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static ROOT::TThreadExecutor & GetThreadExecutor()
Definition: CpuMatrix.h:155

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:142

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:144

TMVA::DNN::TCpu::CrossEntropyGradients
static void CrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:106

TMVA::DNN::TCpu::SoftmaxCrossEntropy
static Scalar_t SoftmaxCrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.cxx:130

TMVA::DNN::TCpu::MeanSquaredError
static Scalar_t MeanSquaredError(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:26

TMVA::DNN::TCpu::SoftmaxCrossEntropyGradients
static void SoftmaxCrossEntropyGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:166

TMVA::DNN::TCpu::MeanSquaredErrorGradients
static void MeanSquaredErrorGradients(TCpuMatrix< Scalar_t > &dY, const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Definition: LossFunctions.cxx:53

TMVA::DNN::TCpu::CrossEntropy
static Scalar_t CrossEntropy(const TCpuMatrix< Scalar_t > &Y, const TCpuMatrix< Scalar_t > &output, const TCpuMatrix< Scalar_t > &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.cxx:76

y
Double_t y[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

ClassificationKeras.output
output
Definition: ClassificationKeras.py:16

TMVA
Abstract ClassifierFactory template that handles arbitrary types.
Definition: GeneticMinimizer.h:21

m
auto * m
Definition: textangle.C:8

sum
static long int sum(long int i)
Definition: Factory.cxx:2258