Line data Source code
1 : #include "Loss.h"
2 : #include "TypeCasts.hpp"
3 :
4 : namespace elsa::ml
5 : {
6 : // We assume the batch-size (along which we will block) is the last dimension
7 8 : static IdenticalBlocksDescriptor getBlockedBatchDescriptor(const DataDescriptor& desc)
8 : {
9 8 : index_t batchSize = desc.getNumberOfCoefficientsPerDimension().tail(1)(0);
10 16 : IndexVector_t blockDims = desc.getNumberOfCoefficientsPerDimension().head(
11 16 : desc.getNumberOfCoefficientsPerDimension().size() - 1);
12 16 : return IdenticalBlocksDescriptor(batchSize, VolumeDescriptor(blockDims));
13 : }
14 :
15 : template <typename data_t>
16 : static std::pair<index_t, index_t> getSizeParameters(const DataContainer<data_t>& x)
17 : {
18 : // As always we assume the batch-size to be the last dimension
19 : index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension().tail(1)(0);
20 :
21 : index_t size = x.getDataDescriptor().getNumberOfCoefficients();
22 :
23 : return std::make_pair<index_t, index_t>(std::move(size), std::move(batchSize));
24 : }
25 :
26 : template <typename data_t>
27 8 : static data_t reduceLoss(LossReduction reduction, const std::vector<data_t>& batchLoss)
28 : {
29 8 : switch (reduction) {
30 4 : case LossReduction::SumOverBatchSize:
31 4 : return std::accumulate(batchLoss.begin(), batchLoss.end(), data_t(0))
32 4 : / static_cast<data_t>(batchLoss.size());
33 4 : case LossReduction::Sum:
34 4 : return std::accumulate(batchLoss.begin(), batchLoss.end(), data_t(0));
35 0 : default:
36 0 : throw std::invalid_argument("Unknown loss-reduction");
37 : }
38 : }
39 :
40 : template <typename data_t>
41 8 : static DataContainer<data_t> unreduceGradient(LossReduction reduction,
42 : const DataContainer<data_t>& gradient)
43 : {
44 8 : if (reduction == LossReduction::SumOverBatchSize) {
45 4 : return gradient / gradient.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
46 : }
47 4 : return gradient;
48 : }
49 :
50 : template <typename data_t>
51 18 : Loss<data_t>::Loss(LossReduction reduction, const std::string& name)
52 18 : : reduction_(reduction), name_(name)
53 : {
54 18 : }
55 : template <typename data_t>
56 10 : data_t Loss<data_t>::getLoss(const DataContainer<data_t>& x,
57 : const DataContainer<data_t>& y) const
58 : {
59 10 : return lossFunction_(reduction_, x, y);
60 : }
61 :
62 : template <typename data_t>
63 8 : DataContainer<data_t> Loss<data_t>::getLossGradient(const DataContainer<data_t>& x,
64 : const DataContainer<data_t>& y) const
65 : {
66 8 : return lossGradientFunction_(reduction_, x, y);
67 : }
68 :
69 : template <typename data_t>
70 10 : data_t Loss<data_t>::operator()(const DataContainer<data_t>& x, const DataContainer<data_t>& y)
71 : {
72 10 : return getLoss(x, y);
73 : }
74 :
75 : template <typename data_t>
76 0 : std::string Loss<data_t>::getName() const
77 : {
78 0 : return name_;
79 : }
80 :
81 : template <typename data_t>
82 4 : BinaryCrossentropy<data_t>::BinaryCrossentropy(LossReduction reduction)
83 4 : : Loss<data_t>(reduction, "BinaryCrossentropy")
84 : {
85 4 : this->lossFunction_ = &BinaryCrossentropy<data_t>::lossImpl;
86 4 : this->lossGradientFunction_ = &BinaryCrossentropy<data_t>::lossGradientImpl;
87 4 : }
88 :
89 : template <typename data_t>
90 2 : data_t BinaryCrossentropy<data_t>::lossImpl(LossReduction reduction,
91 : const DataContainer<data_t>& x,
92 : const DataContainer<data_t>& y)
93 : {
94 : // Get blocked descriptor where each block represents a single batch
95 4 : auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
96 :
97 2 : std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
98 :
99 : // Calulate binary-crossentropy for each batch
100 10 : for (index_t b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
101 : #ifndef ELSA_CUDA_VECTOR
102 8 : auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b)).eval().array();
103 8 : auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval().array();
104 24 : batchLoss[asUnsigned(b)] =
105 8 : (y_expr * x_expr.max(std::numeric_limits<data_t>::epsilon()).log()
106 16 : + (1 - y_expr) * (1 - x_expr).max(std::numeric_limits<data_t>::epsilon()).log())
107 : .mean();
108 : #else
109 : DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
110 : DataContainer<data_t> x2_expr = 1 - x_expr;
111 : DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
112 :
113 : for (index_t i = 0; i < x_expr.getSize(); ++i) {
114 : x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
115 : x2_expr[i] = std::max(x2_expr[i], std::numeric_limits<data_t>::epsilon());
116 : }
117 : DataContainer<data_t> l = y_expr * log(x_expr) + (1 - y_expr) * log(x2_expr);
118 : batchLoss[asUnsigned(b)] = l.sum() / x_expr.getSize();
119 : #endif
120 : }
121 :
122 : // reduce loss
123 2 : data_t loss = reduceLoss(reduction, batchLoss);
124 :
125 2 : loss *= data_t(-1);
126 4 : return loss;
127 : }
128 :
129 : template <typename data_t>
130 2 : DataContainer<data_t> BinaryCrossentropy<data_t>::lossGradientImpl(
131 : LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
132 : {
133 : #ifndef ELSA_CUDA_VECTOR
134 2 : auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
135 2 : auto y_expr = (data_t(1) * y).eval().array();
136 6 : Eigen::VectorXf data =
137 2 : data_t(-1) / data_t(2)
138 : * (y_expr * data_t(1) / x_expr
139 : + (data_t(1) - y_expr) * data_t(1)
140 2 : / (data_t(1) - x_expr).max(std::numeric_limits<data_t>::epsilon()));
141 4 : return unreduceGradient(reduction, DataContainer<data_t>(x.getDataDescriptor(), data));
142 : #else
143 : DataContainer<data_t> x_expr = x;
144 : DataContainer<data_t> x2_expr = 1 - x;
145 : for (index_t i = 0; i < x_expr.getSize(); ++i) {
146 : x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
147 : x2_expr[i] = std::max(x2_expr[i], std::numeric_limits<data_t>::epsilon());
148 : }
149 : DataContainer<data_t> data = y / x_expr + (1 - y) / x2_expr;
150 : data *= data_t(-1) / data_t(2);
151 : return unreduceGradient(reduction, data.viewAs(x.getDataDescriptor()));
152 : #endif
153 : }
154 :
155 : template <typename data_t>
156 6 : CategoricalCrossentropy<data_t>::CategoricalCrossentropy(LossReduction reduction)
157 6 : : Loss<data_t>(reduction, "CategoricalCrossentropy")
158 : {
159 6 : this->lossFunction_ = &CategoricalCrossentropy<data_t>::lossImpl;
160 6 : this->lossGradientFunction_ = &CategoricalCrossentropy<data_t>::lossGradientImpl;
161 6 : }
162 :
163 : template <typename data_t>
164 4 : data_t CategoricalCrossentropy<data_t>::lossImpl(LossReduction reduction,
165 : const DataContainer<data_t>& x,
166 : const DataContainer<data_t>& y)
167 : {
168 : // Get blocked descriptor where each block represents a single batch
169 8 : auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
170 :
171 : // Calculate loss for each batch
172 4 : std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
173 20 : for (int b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
174 : #ifndef ELSA_CUDA_VECTOR
175 32 : auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b))
176 : .eval()
177 : .array()
178 16 : .max(std::numeric_limits<data_t>::epsilon());
179 16 : auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval();
180 16 : batchLoss[asUnsigned(b)] = y_expr.dot(x_expr.log().matrix());
181 : #else
182 : DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
183 : for (index_t i = 0; i < x_expr.getSize(); ++i) {
184 : x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
185 : }
186 : DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
187 : batchLoss[asUnsigned(b)] = y_expr.dot(log(x_expr));
188 : #endif
189 : }
190 4 : data_t loss = reduceLoss(reduction, batchLoss);
191 4 : loss *= data_t(-1);
192 8 : return loss;
193 : }
194 :
195 : template <typename data_t>
196 2 : DataContainer<data_t> CategoricalCrossentropy<data_t>::lossGradientImpl(
197 : LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
198 : {
199 : #ifndef ELSA_CUDA_VECTOR
200 2 : auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
201 2 : auto y_expr = (data_t(1) * y).eval().array();
202 2 : Eigen::VectorXf data = -data_t(1) * (y_expr * data_t(1) / x_expr);
203 4 : return unreduceGradient(reduction, DataContainer<data_t>(y.getDataDescriptor(), data));
204 : #else
205 : DataContainer<data_t> x_expr = x;
206 : for (index_t i = 0; i < x.getSize(); ++i) {
207 : x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
208 : }
209 : DataContainer<data_t> data = -data_t(1) * y / x_expr;
210 : return unreduceGradient(reduction, data.viewAs(y.getDataDescriptor()));
211 : #endif
212 : }
213 :
214 : template <typename data_t>
215 4 : SparseCategoricalCrossentropy<data_t>::SparseCategoricalCrossentropy(LossReduction reduction)
216 4 : : Loss<data_t>(reduction, "SparseCategoricalCrossentropy")
217 : {
218 4 : this->lossFunction_ = &SparseCategoricalCrossentropy<data_t>::lossImpl;
219 4 : this->lossGradientFunction_ = &SparseCategoricalCrossentropy<data_t>::lossGradientImpl;
220 4 : }
221 :
222 : template <typename data_t>
223 2 : data_t SparseCategoricalCrossentropy<data_t>::lossImpl(LossReduction reduction,
224 : const DataContainer<data_t>& x,
225 : const DataContainer<data_t>& y)
226 : {
227 : // This loss is the same as CategoricalCrossentropy but doesn't require
228 : // one-hot encoded labels. We therefore translate all labels to one-hot
229 : // and call CategoricalCrossentropy.
230 :
231 : // x has shape (num_classes, batch_size)
232 2 : index_t numClasses = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0);
233 2 : index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
234 :
235 4 : return CategoricalCrossentropy<data_t>(reduction)(
236 4 : x, Utils::Encoding::toOneHot(y, numClasses, batchSize));
237 : }
238 :
239 : template <typename data_t>
240 2 : DataContainer<data_t> SparseCategoricalCrossentropy<data_t>::lossGradientImpl(
241 : LossReduction reduction, const DataContainer<data_t>& x, const DataContainer<data_t>& y)
242 : {
243 : // x has shape (num_classes, batch_size)
244 2 : index_t numClasses = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0);
245 2 : index_t batchSize = x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(1);
246 :
247 4 : DataContainer<data_t> oneHot = Utils::Encoding::toOneHot(y, numClasses, batchSize);
248 : #ifndef ELSA_CUDA_VECTOR
249 2 : auto x_expr = (data_t(1) * x).eval().array().max(std::numeric_limits<data_t>::epsilon());
250 2 : auto y_expr = (data_t(1) * oneHot).eval().array();
251 2 : Eigen::VectorXf data = -data_t(1) * (y_expr * data_t(1) / x_expr);
252 4 : return unreduceGradient(reduction, DataContainer<data_t>(oneHot.getDataDescriptor(), data));
253 : #else
254 : DataContainer<data_t> x_expr = x;
255 : for (index_t i = 0; i < x_expr.getSize(); ++i) {
256 : x_expr[i] = std::max(x_expr[i], std::numeric_limits<data_t>::epsilon());
257 : }
258 : DataContainer<data_t> data = -data_t(1) * (oneHot / x_expr);
259 : return unreduceGradient(reduction, data.viewAs(oneHot.getDataDescriptor()));
260 : #endif
261 : }
262 :
263 : template <typename data_t>
264 4 : MeanSquaredError<data_t>::MeanSquaredError(LossReduction reduction)
265 4 : : Loss<data_t>(reduction, "MeanSquaredError")
266 : {
267 4 : this->lossFunction_ = &MeanSquaredError<data_t>::lossImpl;
268 4 : this->lossGradientFunction_ = &MeanSquaredError<data_t>::lossGradientImpl;
269 4 : }
270 :
271 : template <typename data_t>
272 2 : data_t MeanSquaredError<data_t>::lossImpl(LossReduction reduction,
273 : const DataContainer<data_t>& x,
274 : const DataContainer<data_t>& y)
275 : {
276 :
277 : // Get blocked descriptor where each block represents a single batch
278 4 : auto batchDesc = getBlockedBatchDescriptor(x.getDataDescriptor());
279 :
280 : // Calculate loss for each batch
281 2 : std::vector<data_t> batchLoss(asUnsigned(batchDesc.getNumberOfBlocks()), data_t(0));
282 6 : for (index_t b = 0; b < batchDesc.getNumberOfBlocks(); ++b) {
283 : #ifndef ELSA_CUDA_VECTOR
284 4 : auto x_expr = (data_t(1) * x.viewAs(batchDesc).getBlock(b)).eval().array();
285 4 : auto y_expr = (data_t(1) * y.viewAs(batchDesc).getBlock(b)).eval().array();
286 4 : batchLoss[asUnsigned(b)] = ((y_expr - x_expr) * (y_expr - x_expr)).mean();
287 : #else
288 : DataContainer<data_t> x_expr = x.viewAs(batchDesc).getBlock(b);
289 : DataContainer<data_t> y_expr = y.viewAs(batchDesc).getBlock(b);
290 : DataContainer<data_t> l = ((y_expr - x_expr) * (y_expr - x_expr));
291 : batchLoss[asUnsigned(b)] = l.sum() / x_expr.getSize();
292 : #endif
293 : }
294 2 : data_t loss = reduceLoss(reduction, batchLoss);
295 4 : return loss;
296 : }
297 :
298 : template <typename data_t>
299 2 : DataContainer<data_t> MeanSquaredError<data_t>::lossGradientImpl(LossReduction reduction,
300 : const DataContainer<data_t>& x,
301 : const DataContainer<data_t>& y)
302 : {
303 6 : DataContainer<data_t> gradient =
304 : data_t(2)
305 4 : / static_cast<data_t>(x.getDataDescriptor().getNumberOfCoefficientsPerDimension()(0))
306 : * (y - x);
307 4 : return unreduceGradient(reduction, gradient);
308 : }
309 :
310 : template class Loss<float>;
311 : template class BinaryCrossentropy<float>;
312 : template class CategoricalCrossentropy<float>;
313 : template class SparseCategoricalCrossentropy<float>;
314 : template class MeanSquaredError<float>;
315 :
316 : } // namespace elsa::ml
|