LCOV - code coverage report
Current view: top level - ml - Loss.cpp (source / functions) Hit Total Coverage
Test: test_coverage.info.cleaned Lines: 106 110 96.4 %
Date: 2022-07-06 02:47:47 Functions: 19 20 95.0 %

          Line data    Source code
       1             : #include "Loss.h"
       2             : #include "TypeCasts.hpp"
       3             : 
       4             : namespace elsa::ml
       5             : {
       6             :     // We assume the batch-size (along which we will block) is the last dimension
       7           8 :     static IdenticalBlocksDescriptor getBlockedBatchDescriptor(const DataDescriptor& desc)
       8             :     {
       9           8 :         index_t batchSize = desc.getNumberOfCoefficientsPerDimension().tail(1)(0);
      10          16 :         IndexVector_t blockDims = desc.getNumberOfCoefficientsPerDimension().head(
      11          16 :             desc.getNumberOfCoefficientsPerDimension().size() - 1);
      12          16 :         return IdenticalBlocksDescriptor(batchSize, VolumeDescriptor(blockDims));
      13             :     }
      14             : 
      15             :     template <typename data_t>
      16             :     static std::pair<index_t, index_t> getSizeParameters(const DataContainer<data_t>& x)
      17             :     {
      18             :         // As always we assume the batch-size to be the last dimension
      19             :         index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension().tail(1)(0);
      20             : 
      21             :         index_t size = x.getDataDescriptor().getNumberOfCoefficients();
      22             : 
      23             :         return std::make_pair<index_t, index_t>(std::move(size), std::move(batchSize));
      24             :     }
      25             : 
      26             :     template <typename data_t>
      27           8 :     static data_t reduceLoss(LossReduction reduction, const std::vector<data_t>& batchLoss)
      28             :     {
      29           8 :         switch (reduction) {
      30           4 :             case LossReduction::SumOverBatchSize:
      31           4 :                 return std::accumulate(batchLoss.begin(), batchLoss.end(), data_t(0))
      32           4 :                        / static_cast<data_t>(batchLoss.size());
      33           4 :             case LossReduction::Sum:
      34           4 :                 return std::accumulate(batchLoss.begin(), batchLoss.end(), data_t(0));
      35           0 :             default:
      36           0 :                 throw std::invalid_argument("Unknown loss-reduction");
      37             :         }
      38             :     }
      39             : 
      40             :     template <typename data_t>
      41           8 :     static DataContainer<data_t> unreduceGradient(LossReduction reduction,
      42             :                                                   const DataContainer<data_t>& gradient)
      43             :     {
      44           8 :         if (reduction == LossReduction::SumOverBatchSize) {
      45           4 :             return gradient / gradient.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
      46             :         }
      47           4 :         return gradient;
      48             :     }
      49             : 
      50             :     template <typename data_t>
      51          18 :     Loss<data_t>::Loss(LossReduction reduction, const std::string& name)
      52          18 :         : reduction_(reduction), name_(name)
      53             :     {
      54          18 :     }
      55             :     template <typename data_t>
      56          10 :     data_t Loss<data_t>::getLoss(const DataContainer<data_t>& x,
      57             :                                  const DataContainer<data_t>& y) const
      58             :     {
      59          10 :         return lossFunction_(reduction_, x, y);
      60             :     }
      61             : 
      62             :     template <typename data_t>
      63           8 :     DataContainer<data_t> Loss<data_t>::getLossGradient(const DataContainer<data_t>& x,
      64             :                                                         const DataContainer<data_t>& y) const
      65             :     {
      66           8 :         return lossGradientFunction_(reduction_, x, y);
      67             :     }
      68             : 
      69             :     template <typename data_t>
      70          10 :     data_t Loss<data_t>::operator()(const DataContainer<data_t>& x, const DataContainer<data_t>& y)
      71             :     {
      72          10 :         return getLoss(x, y);
      73             :     }
      74             : 
      75             :     template <typename data_t>
      76           0 :     std::string Loss<data_t>::getName() const
      77             :     {
      78           0 :         return name_;
      79             :     }
      80             : 
      81             :     template <typename data_t>
      82           4 :     BinaryCrossentropy<data_t>::BinaryCrossentropy(LossReduction reduction)
      83           4 :         : Loss<data_t>(reduction, "BinaryCrossentropy")
      84             :     {
      85           4 :         this->lossFunction_ = &BinaryCrossentropy<data_t>::lossImpl;
      86           4 :         this->lossGradientFunction_ = &BinaryCrossentropy<data_t>::lossGradientImpl;
      87           4 :     }
      88             : 
      89             :     template <typename data_t>
      90           2 :     data_t BinaryCrossentropy<data_t>::lossImpl(LossReduction reduction,
      91             :                                                 const DataContainer<data_t>& x,
      92             :                                                 const DataContainer<data_t>& y)
      93             :     {
      94             :         // Get blocked descriptor where each block represents a single batch
      95           4 :         auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
      96             : 
      97           2 :         std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
      98             : 
      99             :         // Calulate binary-crossentropy for each batch
     100          10 :         for (index_t b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
     101             : #ifndef ELSA_CUDA_VECTOR
     102           8 :             auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b)).eval().array();
     103           8 :             auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval().array();
     104          24 :             batchLoss[asUnsigned(b)] =
     105           8 :                 (y_expr * x_expr.max(std::numeric_limits<data_t>::epsilon()).log()
     106          16 :                  + (1 - y_expr) * (1 - x_expr).max(std::numeric_limits<data_t>::epsilon()).log())
     107             :                     .mean();
     108             : #else
     109             :             DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
     110             :             DataContainer<data_t> x2_expr = 1 - x_expr;
     111             :             DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
     112             : 
     113             :             for (index_t i = 0; i < x_expr.getSize(); ++i) {
     114             :                 x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
     115             :                 x2_expr[i] = std::max(x2_expr[i], std::numeric_limits<data_t>::epsilon());
     116             :             }
     117             :             DataContainer<data_t> l = y_expr * log(x_expr) + (1 - y_expr) * log(x2_expr);
     118             :             batchLoss[asUnsigned(b)] = l.sum() / x_expr.getSize();
     119             : #endif
     120             :         }
     121             : 
     122             :         // reduce loss
     123           2 :         data_t loss = reduceLoss(reduction, batchLoss);
     124             : 
     125           2 :         loss *= data_t(-1);
     126           4 :         return loss;
     127             :     }
     128             : 
     129             :     template <typename data_t>
     130           2 :     DataContainer<data_t> BinaryCrossentropy<data_t>::lossGradientImpl(
     131             :         LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
     132             :     {
     133             : #ifndef ELSA_CUDA_VECTOR
     134           2 :         auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
     135           2 :         auto y_expr = (data_t(1) * y).eval().array();
     136           6 :         Eigen::VectorXf data =
     137           2 :             data_t(-1) / data_t(2)
     138             :             * (y_expr * data_t(1) / x_expr
     139             :                + (data_t(1) - y_expr) * data_t(1)
     140           2 :                      / (data_t(1) - x_expr).max(std::numeric_limits<data_t>::epsilon()));
     141           4 :         return unreduceGradient(reduction, DataContainer<data_t>(x.getDataDescriptor(), data));
     142             : #else
     143             :         DataContainer<data_t> x_expr = x;
     144             :         DataContainer<data_t> x2_expr = 1 - x;
     145             :         for (index_t i = 0; i < x_expr.getSize(); ++i) {
     146             :             x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
     147             :             x2_expr[i] = std::max(x2_expr[i], std::numeric_limits<data_t>::epsilon());
     148             :         }
     149             :         DataContainer<data_t> data = y / x_expr + (1 - y) / x2_expr;
     150             :         data *= data_t(-1) / data_t(2);
     151             :         return unreduceGradient(reduction, data.viewAs(x.getDataDescriptor()));
     152             : #endif
     153             :     }
     154             : 
     155             :     template <typename data_t>
     156           6 :     CategoricalCrossentropy<data_t>::CategoricalCrossentropy(LossReduction reduction)
     157           6 :         : Loss<data_t>(reduction, "CategoricalCrossentropy")
     158             :     {
     159           6 :         this->lossFunction_ = &CategoricalCrossentropy<data_t>::lossImpl;
     160           6 :         this->lossGradientFunction_ = &CategoricalCrossentropy<data_t>::lossGradientImpl;
     161           6 :     }
     162             : 
     163             :     template <typename data_t>
     164           4 :     data_t CategoricalCrossentropy<data_t>::lossImpl(LossReduction reduction,
     165             :                                                      const DataContainer<data_t>& x,
     166             :                                                      const DataContainer<data_t>& y)
     167             :     {
     168             :         // Get blocked descriptor where each block represents a single batch
     169           8 :         auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
     170             : 
     171             :         // Calculate loss for each batch
     172           4 :         std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
     173          20 :         for (int b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
     174             : #ifndef ELSA_CUDA_VECTOR
     175          32 :             auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b))
     176             :                               .eval()
     177             :                               .array()
     178          16 :                               .max(std::numeric_limits<data_t>::epsilon());
     179          16 :             auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval();
     180          16 :             batchLoss[asUnsigned(b)] = y_expr.dot(x_expr.log().matrix());
     181             : #else
     182             :             DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
     183             :             for (index_t i = 0; i < x_expr.getSize(); ++i) {
     184             :                 x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
     185             :             }
     186             :             DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
     187             :             batchLoss[asUnsigned(b)] = y_expr.dot(log(x_expr));
     188             : #endif
     189             :         }
     190           4 :         data_t loss = reduceLoss(reduction, batchLoss);
     191           4 :         loss *= data_t(-1);
     192           8 :         return loss;
     193             :     }
     194             : 
     195             :     template <typename data_t>
     196           2 :     DataContainer<data_t> CategoricalCrossentropy<data_t>::lossGradientImpl(
     197             :         LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
     198             :     {
     199             : #ifndef ELSA_CUDA_VECTOR
     200           2 :         auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
     201           2 :         auto y_expr = (data_t(1) * y).eval().array();
     202           2 :         Eigen::VectorXf data = -data_t(1) * (y_expr * data_t(1) / x_expr);
     203           4 :         return unreduceGradient(reduction, DataContainer<data_t>(y.getDataDescriptor(), data));
     204             : #else
     205             :         DataContainer<data_t> x_expr = x;
     206             :         for (index_t i = 0; i < x.getSize(); ++i) {
     207             :             x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
     208             :         }
     209             :         DataContainer<data_t> data = -data_t(1) * y / x_expr;
     210             :         return unreduceGradient(reduction, data.viewAs(y.getDataDescriptor()));
     211             : #endif
     212             :     }
     213             : 
     214             :     template <typename data_t>
     215           4 :     SparseCategoricalCrossentropy<data_t>::SparseCategoricalCrossentropy(LossReduction reduction)
     216           4 :         : Loss<data_t>(reduction, "SparseCategoricalCrossentropy")
     217             :     {
     218           4 :         this->lossFunction_ = &SparseCategoricalCrossentropy<data_t>::lossImpl;
     219           4 :         this->lossGradientFunction_ = &SparseCategoricalCrossentropy<data_t>::lossGradientImpl;
     220           4 :     }
     221             : 
     222             :     template <typename data_t>
     223           2 :     data_t SparseCategoricalCrossentropy<data_t>::lossImpl(LossReduction reduction,
     224             :                                                            const DataContainer<data_t>& x,
     225             :                                                            const DataContainer<data_t>& y)
     226             :     {
     227             :         // This loss is the same as CategoricalCrossentropy but doesn't require
     228             :         // one-hot encoded labels. We therefore translate all labels to one-hot
     229             :         // and call CategoricalCrossentropy.
     230             : 
     231             :         // x has shape (num_classes, batch_size)
     232           2 :         index_t numClasses = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0);
     233           2 :         index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
     234             : 
     235           4 :         return CategoricalCrossentropy<data_t>(reduction)(
     236           4 :             x, Utils::Encoding::toOneHot(y, numClasses, batchSize));
     237             :     }
     238             : 
     239             :     template <typename data_t>
     240           2 :     DataContainer<data_t> SparseCategoricalCrossentropy<data_t>::lossGradientImpl(
     241             :         LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
     242             :     {
     243             :         // x has shape (num_classes, batch_size)
     244           2 :         index_t numClasses = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0);
     245           2 :         index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
     246             : 
     247           4 :         DataContainer<data_t> oneHot = Utils::Encoding::toOneHot(y, numClasses, batchSize);
     248             : #ifndef ELSA_CUDA_VECTOR
     249           2 :         auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
     250           2 :         auto y_expr = (data_t(1) * oneHot).eval().array();
     251           2 :         Eigen::VectorXf data = -data_t(1) * (y_expr * data_t(1) / x_expr);
     252           4 :         return unreduceGradient(reduction, DataContainer<data_t>(oneHot.getDataDescriptor(), data));
     253             : #else
     254             :         DataContainer<data_t> x_expr = x;
     255             :         for (index_t i = 0; i < x_expr.getSize(); ++i) {
     256             :             x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
     257             :         }
     258             :         DataContainer<data_t> data = -data_t(1) * (oneHot / x_expr);
     259             :         return unreduceGradient(reduction, data.viewAs(oneHot.getDataDescriptor()));
     260             : #endif
     261             :     }
     262             : 
     263             :     template <typename data_t>
     264           4 :     MeanSquaredError<data_t>::MeanSquaredError(LossReduction reduction)
     265           4 :         : Loss<data_t>(reduction, "MeanSquaredError")
     266             :     {
     267           4 :         this->lossFunction_ = &MeanSquaredError<data_t>::lossImpl;
     268           4 :         this->lossGradientFunction_ = &MeanSquaredError<data_t>::lossGradientImpl;
     269           4 :     }
     270             : 
     271             :     template <typename data_t>
     272           2 :     data_t MeanSquaredError<data_t>::lossImpl(LossReduction reduction,
     273             :                                               const DataContainer<data_t>& x,
     274             :                                               const DataContainer<data_t>& y)
     275             :     {
     276             : 
     277             :         // Get blocked descriptor where each block represents a single batch
     278           4 :         auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
     279             : 
     280             :         // Calculate loss for each batch
     281           2 :         std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
     282           6 :         for (index_t b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
     283             : #ifndef ELSA_CUDA_VECTOR
     284           4 :             auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b)).eval().array();
     285           4 :             auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval().array();
     286           4 :             batchLoss[asUnsigned(b)] = ((y_expr - x_expr) * (y_expr - x_expr)).mean();
     287             : #else
     288             :             DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
     289             :             DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
     290             :             DataContainer<data_t> l = ((y_expr - x_expr) * (y_expr - x_expr));
     291             :             batchLoss[asUnsigned(b)] = l.sum() / x_expr.getSize();
     292             : #endif
     293             :         }
     294           2 :         data_t loss = reduceLoss(reduction, batchLoss);
     295           4 :         return loss;
     296             :     }
     297             : 
     298             :     template <typename data_t>
     299           2 :     DataContainer<data_t> MeanSquaredError<data_t>::lossGradientImpl(LossReduction reduction,
     300             :                                                                      const DataContainer<data_t>& x,
     301             :                                                                      const DataContainer<data_t>& y)
     302             :     {
     303           6 :         DataContainer<data_t> gradient =
     304             :             data_t(2)
     305           4 :             / static_cast<data_t>(x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0))
     306             :             * (y - x);
     307           4 :         return unreduceGradient(reduction, gradient);
     308             :     }
     309             : 
     310             :     template class Loss<float>;
     311             :     template class BinaryCrossentropy<float>;
     312             :     template class CategoricalCrossentropy<float>;
     313             :     template class SparseCategoricalCrossentropy<float>;
     314             :     template class MeanSquaredError<float>;
     315             : 
     316             : } // namespace elsa::ml

Generated by: LCOV version 1.15