doc/v620/Cpu_2LossFunctions_8hxx_source.html

// @(#)root/tmva/tmva/dnn:$Id$

// Author: Simon Pfreundschuh 20/07/16


/*************************************************************************

 * Copyright (C) 2016, Simon Pfreundschuh                                *

 * All rights reserved.                                                  *

 *                                                                       *

 * For the licensing terms see $ROOTSYS/LICENSE.                         *

 * For the list of contributors see $ROOTSYS/README/CREDITS.             *

 *************************************************************************/


 /////////////////////////////////////////////////////////////////////

 // Implementation of the loss functions for the multi-threaded CPU //

 // implementation using Roots TThreadExecutor and BLAS.                 //

 /////////////////////////////////////////////////////////////////////


#include "TMVA/DNN/Architectures/Reference.h"

#include "TMVA/DNN/Architectures/Cpu.h"


namespace TMVA

{

namespace DNN

{


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::MeanSquaredError(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                      const TCpuMatrix<AFloat> &weights)

{

   const AFloat *dataY = Y.GetRawDataPointer();

   const AFloat *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();

   std::vector<AFloat> temp(Y.GetNoElements());

   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {

      AFloat dy = dataY[workerID] - dataOutput[workerID];

      temp[workerID] = dataWeights[workerID % m] * dy * dy;

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::MeanSquaredErrorGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                             const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{


         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {

      dataDY[workerID] = -2.0 * norm * (dataY[workerID] - dataOutput[workerID]);

      dataDY[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

}


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::CrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                  const TCpuMatrix<AFloat> &weights)

{

   const AFloat *dataY = Y.GetRawDataPointer();

   const AFloat *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();

   std::vector<AFloat> temp(Y.GetNoElements());


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {

      AFloat y   = dataY[workerID];

      AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));

      if (y == 0)

         temp[workerID] = - log(1.0 - sig);

      else if ( y == 1.)

         temp[workerID] = - log(sig);

      else

         temp[workerID] = - (y * log(sig) + (1.0 - y) * log(1.0 - sig));


      temp[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::CrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                         const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{

         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {

      AFloat y   = dataY[workerID];

      AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));

      dataDY[workerID] = norm * (sig - y);

      dataDY[workerID] *= dataWeights[workerID % m];

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));

}


//______________________________________________________________________________

template <typename AFloat>

AFloat TCpu<AFloat>::SoftmaxCrossEntropy(const TCpuMatrix<AFloat> &Y, const TCpuMatrix<AFloat> &output,

                                         const TCpuMatrix<AFloat> &weights)

{

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   std::vector<AFloat> temp(Y.GetNrows());

   size_t m = Y.GetNrows();

   size_t n = Y.GetNcols();

   AFloat norm = 1.0 / ((AFloat) m);


   auto f = [&dataY, &dataOutput, &dataWeights, &temp, n, m](UInt_t workerID) {

      AFloat sum = 0.0;

      for (size_t j = 0; j < n; j++) {

         sum += exp(dataOutput[workerID + j * m]);

      }

      for (size_t j = 0; j < n; j++) {

         temp[workerID] -=

            dataY[workerID + j * m] * log(exp(dataOutput[workerID + j * m]) / sum);

      }

      temp[workerID] *= dataWeights[workerID];

      return 0;

   };


   auto reduction = [](const std::vector<AFloat> & v )

   {

      return std::accumulate(v.begin(),v.end(),AFloat{});

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));

   return norm * Y.GetThreadExecutor().Reduce(temp, reduction);

}


//______________________________________________________________________________

template <typename AFloat>

void TCpu<AFloat>::SoftmaxCrossEntropyGradients(TCpuMatrix<AFloat> &dY, const TCpuMatrix<AFloat> &Y,

                                                const TCpuMatrix<AFloat> &output, const TCpuMatrix<AFloat> &weights)

{

         AFloat  *dataDY     = dY.GetRawDataPointer();

   const AFloat  *dataY      = Y.GetRawDataPointer();

   const AFloat  *dataOutput = output.GetRawDataPointer();

   const AFloat *dataWeights = weights.GetRawDataPointer();


   size_t m = Y.GetNrows();

   size_t n = Y.GetNcols();

   AFloat norm = 1.0 / ((AFloat) m);


   auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, norm, n, m](UInt_t workerID) {

      AFloat sum  = 0.0;

      AFloat sumY = 0.0;

      AFloat weight = dataWeights[workerID];

      for (size_t j = 0; j < n; j++) {

         sum  += exp(dataOutput[workerID + j * m]);

         sumY += dataY[workerID + j * m];

      }

      for (size_t j = 0; j < n; j++) {

         dataDY[workerID + j * m] =

            norm * (exp(dataOutput[workerID + j * m]) / sum * sumY - dataY[workerID + j * m]);

         dataDY[workerID + j * m] *= weight;

      }

      return 0;

   };


   Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));

}


} // namespace DNN

} // namespace TMVA

Cpu.h

f
#define f(i)
Definition: RSha256.hxx:104

Reference.h

UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:42

exp
double exp(double)

log
double log(double)

ROOT::TSeq
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66

TMVA::DNN::TCpuMatrix
The TCpuMatrix class.
Definition: CpuMatrix.h:87

TMVA::DNN::TCpuMatrix::GetNcols
size_t GetNcols() const
Definition: CpuMatrix.h:152

TMVA::DNN::TCpuMatrix::GetRawDataPointer
AFloat * GetRawDataPointer()
Return raw pointer to the elements stored contiguously in column-major order.
Definition: CpuMatrix.h:162

TMVA::DNN::TCpuMatrix::GetNrows
size_t GetNrows() const
Definition: CpuMatrix.h:151

TMVA::DNN::TCpuMatrix::GetThreadExecutor
static Executor & GetThreadExecutor()
Definition: CpuMatrix.h:165

TMVA::DNN::TCpuMatrix::GetNoElements
size_t GetNoElements() const
Definition: CpuMatrix.h:153

TMVA::DNN::TCpu::SoftmaxCrossEntropyGradients
static void SoftmaxCrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Definition: LossFunctions.hxx:173

TMVA::DNN::TCpu::CrossEntropyGradients
static void CrossEntropyGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Definition: LossFunctions.hxx:113

TMVA::DNN::TCpu::MeanSquaredErrorGradients
static void MeanSquaredErrorGradients(Matrix_t &dY, const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Definition: LossFunctions.hxx:54

TMVA::DNN::TCpu::MeanSquaredError
static Scalar_t MeanSquaredError(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Definition: LossFunctions.hxx:27

TMVA::DNN::TCpu::CrossEntropy
static Scalar_t CrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Sigmoid transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.hxx:77

TMVA::DNN::TCpu::SoftmaxCrossEntropy
static Scalar_t SoftmaxCrossEntropy(const Matrix_t &Y, const Matrix_t &output, const Matrix_t &weights)
Softmax transformation is implicitly applied, thus output should hold the linear activations of the l...
Definition: LossFunctions.hxx:137

TMVA::Executor::Reduce
auto Reduce(const std::vector< T > &objs, R redfunc) -> decltype(redfunc(objs))
Wrap Reduce function.
Definition: Executor.h:157

TMVA::Executor::Map
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Wrap TExecutor::Map functions.
Definition: Executor.h:133

y
Double_t y[n]
Definition: legend1.C:17

n
const Int_t n
Definition: legend1.C:16

TMVA
create variable transformations
Definition: GeneticMinimizer.h:21

v
@ v
Definition: rootcling_impl.cxx:3622

m
auto * m
Definition: textangle.C:8

sum
static long int sum(long int i)
Definition: Factory.cxx:2276

output
static void output(int code)
Definition: gifencode.c:226