Line data Source code
1 : #include "DnnlLayer.h"
2 : #include "TypeCasts.hpp"
3 : #include <iostream>
4 : #include <sstream>
5 :
6 : namespace elsa::ml
7 : {
8 : namespace detail
9 : {
10 : template <typename data_t>
11 25 : DnnlLayer<data_t>::DnnlLayer(const VolumeDescriptor& inputDescriptor,
12 : const VolumeDescriptor& outputDescriptor,
13 : const std::string& name, int allowedNumberOfInputs)
14 : : DnnlLayer(std::vector<VolumeDescriptor>{inputDescriptor}, outputDescriptor, name,
15 50 : allowedNumberOfInputs)
16 : {
17 25 : }
18 :
19 : template <typename data_t>
20 27 : DnnlLayer<data_t>::DnnlLayer(const std::vector<VolumeDescriptor>& inputDescriptor,
21 : const VolumeDescriptor& outputDescriptor,
22 : const std::string& name, int allowedNumberOfInputs)
23 : : _input(inputDescriptor.size()),
24 : _inputGradient(inputDescriptor.size()),
25 : _outputGradient(1), // we need at least one output-gradient
26 : _outputDescriptor(outputDescriptor.clone()),
27 0 : _engine(std::make_shared<dnnl::engine>(dnnl::engine::kind::cpu, 0)),
28 : _allowedNumberOfInputs(allowedNumberOfInputs),
29 27 : _name(name)
30 : {
31 : // A layer can have several inputs but only one single output.
32 : // However, for the gradients the situation is different: A layer
33 : // can have multiple output-gradients (gradients coming from different
34 : // connected layers after the current layer) and multiple input-gradients
35 : // (e.g. in the case of a concatenation-layer).
36 :
37 : // Set input descriptors and input dimensions
38 58 : for (std::size_t i = 0; i < inputDescriptor.size(); ++i) {
39 : // Clone input-descriptor
40 31 : _inputDescriptor.push_back(inputDescriptor[i].clone());
41 :
42 : // Get memory-format tag for input and input-gradient
43 31 : _input[asUnsigned(i)].formatTag =
44 31 : dataDescriptorToDnnlMemoryFormatTag(inputDescriptor[asUnsigned(i)], true);
45 31 : _inputGradient[asUnsigned(i)].formatTag = _input[asUnsigned(i)].formatTag;
46 :
47 31 : assert(_input[asUnsigned(i)].formatTag != dnnl::memory::format_tag::undef
48 : && "Could not resolve Dnnl source memory format tag");
49 :
50 : // Get input and input-gradient dimensions
51 155 : for (const auto& dim :
52 31 : _inputDescriptor[asUnsigned(i)]->getNumberOfCoefficientsPerDimension()) {
53 124 : _input[asUnsigned(i)].dimensions.push_back(dim);
54 124 : _inputGradient[asUnsigned(i)].dimensions.push_back(dim);
55 : }
56 :
57 : // Get input and input-gradient Dnnl descriptors
58 31 : _input[asUnsigned(i)].descriptor = dnnl::memory::desc(
59 31 : {_input[asUnsigned(i)].dimensions}, _typeTag, dnnl::memory::format_tag::any);
60 :
61 31 : _inputGradient[asUnsigned(i)].descriptor = _input[asUnsigned(i)].descriptor;
62 : }
63 :
64 : // Set output memory descriptor
65 135 : for (const auto& dim : outputDescriptor.getNumberOfCoefficientsPerDimension())
66 108 : _output.dimensions.push_back(dim);
67 :
68 : // The shape of all output-gradients match the shape of the single
69 : // layer output, only the memory can be different
70 54 : _output.descriptor =
71 27 : dnnl::memory::desc({_output.dimensions}, _typeTag, dnnl::memory::format_tag::any);
72 27 : _output.formatTag = dataDescriptorToDnnlMemoryFormatTag(outputDescriptor, true);
73 :
74 54 : for (auto&& outGrad : _outputGradient) {
75 27 : outGrad.dimensions = _output.dimensions;
76 27 : outGrad.descriptor = _output.descriptor;
77 27 : outGrad.formatTag = _output.formatTag;
78 : }
79 27 : }
80 :
81 : template <typename data_t>
82 41 : void DnnlLayer<data_t>::writeToDnnlMemory(const DataContainer<data_t>& data,
83 : dnnl::memory& memory)
84 : {
85 41 : assert(data.getSize() == memory.get_desc().get_size() / sizeof(data_t));
86 41 : assert(memory.get_data_handle() != nullptr);
87 :
88 41 : data_t* dst = static_cast<data_t*>(memory.get_data_handle());
89 82381 : for (int i = 0; i < data.getSize(); ++i)
90 82340 : dst[i] = data[i];
91 41 : }
92 :
93 : template <typename data_t>
94 22 : void DnnlLayer<data_t>::readFromDnnlMemory(DataContainer<data_t>& data,
95 : const dnnl::memory& memory)
96 : {
97 22 : assert(data.getSize() == memory.get_desc().get_size() / sizeof(data_t));
98 22 : assert(memory.get_data_handle() != nullptr);
99 22 : const data_t* src = static_cast<const data_t*>(memory.get_data_handle());
100 45162 : for (int i = 0; i < data.getSize(); ++i)
101 45140 : data[i] = src[i];
102 22 : }
103 :
104 : template <typename data_t>
105 : dnnl::memory::format_tag
106 58 : DnnlLayer<data_t>::dataDescriptorToDnnlMemoryFormatTag(const VolumeDescriptor& desc,
107 : bool isInput)
108 : {
109 : using ft = dnnl::memory::format_tag;
110 :
111 58 : switch (desc.getNumberOfDimensions()) {
112 0 : case 2:
113 0 : return (isInput ? ft::nc : ft::oi);
114 0 : case 3:
115 0 : return (isInput ? ft::ncw : ft::oiw);
116 58 : case 4:
117 58 : return (isInput ? ft::nchw : ft::oihw);
118 0 : case 5:
119 0 : return (isInput ? ft::ncdhw : ft::oidhw);
120 0 : default:
121 0 : return ft::undef;
122 : }
123 : }
124 :
125 : template <typename data_t>
126 0 : std::string DnnlLayer<data_t>::dnnlMemoryFormatTagToString(dnnl::memory::format_tag tag)
127 : {
128 :
129 0 : auto formatStr = [](const std::string& input, const std::string& weights) {
130 0 : std::stringstream ss;
131 : ss << "dnnl::memory:format_tag::" << input
132 0 : << " (input), dnnl::memory:format_tag::" << weights << " (weights)";
133 0 : return ss.str();
134 : };
135 :
136 : using ft = dnnl::memory::format_tag;
137 :
138 0 : switch (tag) {
139 0 : case ft::undef:
140 0 : return formatStr("undef", "undef");
141 0 : case ft::nc:
142 0 : return formatStr("nc", "oi");
143 0 : case ft::ncw:
144 0 : return formatStr("ncw", "oiw");
145 0 : case ft::nchw:
146 0 : return formatStr("nchw", "oihw");
147 0 : case ft::ncdhw:
148 0 : return formatStr("ncdhw", "oidhw");
149 0 : default:
150 0 : assert(false && "This execution path of the code should never be reached");
151 : }
152 : assert(false && "This execution path of the code should never be reached");
153 : return "";
154 : }
155 :
156 : template <typename data_t>
157 10 : void DnnlLayer<data_t>::forwardPropagate(dnnl::stream& executionStream)
158 : {
159 10 : Logger::get(_name)->trace("Forward propagate");
160 10 : assert(_input.size() == _allowedNumberOfInputs
161 : || _allowedNumberOfInputs == DnnlLayer::anyNumberOfInputs
162 : && "Too many inputs provided");
163 :
164 10 : assert(_input.size() == _inputDescriptor.size()
165 : && "Number of provided inputs does not match number of input-descriptors");
166 :
167 10 : assert(_forwardStream.isCompiled
168 : && "Cannot forward propagate because forward-stream has not been compiled");
169 :
170 10 : assert(_forwardStream.primitives.size() == _forwardStream.arguments.size()
171 : && "Number of Dnnl primitives and number of primitive arguments must match");
172 :
173 20 : for (std::size_t i = 0; i < _forwardStream.primitives.size(); ++i)
174 10 : _forwardStream.primitives[i].execute(executionStream, _forwardStream.arguments[i]);
175 :
176 10 : if (needsForwardSynchronisation()) {
177 1 : executionStream.wait();
178 : }
179 10 : }
180 :
181 : template <typename data_t>
182 9 : void DnnlLayer<data_t>::backwardPropagate(dnnl::stream& executionStream)
183 : {
184 9 : Logger::get(_name)->trace("Backward propagate");
185 9 : assert(_input.size() == _allowedNumberOfInputs
186 : || _allowedNumberOfInputs == DnnlLayer::anyNumberOfInputs
187 : && "Too many inputs provided");
188 :
189 9 : assert(_backwardStream.isCompiled
190 : && "Cannot backward propagate because backward-stream has not been compiled");
191 :
192 9 : assert(_backwardStream.primitives.size() == _backwardStream.arguments.size()
193 : && "Number of Dnnl primitives and number of primitive arguments must match");
194 :
195 18 : for (std::size_t i = 0; i < _backwardStream.primitives.size(); ++i)
196 18 : _backwardStream.primitives[i].execute(executionStream,
197 9 : _backwardStream.arguments[i]);
198 :
199 9 : if (needsBackwardSynchronisation()) {
200 0 : executionStream.wait();
201 : }
202 9 : } // namespace elsa
203 :
204 : template <typename data_t>
205 27 : std::shared_ptr<dnnl::engine> DnnlLayer<data_t>::getEngine() const
206 : {
207 27 : return _engine;
208 : }
209 :
210 : template <typename data_t>
211 0 : void DnnlLayer<data_t>::setEngine(std::shared_ptr<dnnl::engine> engine)
212 : {
213 0 : _engine = engine;
214 0 : }
215 :
216 : template <typename data_t>
217 31 : void DnnlLayer<data_t>::setInput(const DataContainer<data_t>& input, index_t index)
218 : {
219 31 : Logger::get(_name)->trace("Set layer input from DataContainer at index {}", index);
220 : // Check if index is valid
221 31 : validateVectorIndex(_input, index);
222 :
223 : // If no input has been set yet we allocate
224 31 : if (!_input[asUnsigned(index)].describedMemory) {
225 31 : _input[asUnsigned(index)].describedMemory = std::make_shared<dnnl::memory>(
226 31 : dnnl::memory::desc({{_input[asUnsigned(index)].dimensions},
227 : _typeTag,
228 31 : _input[asUnsigned(index)].formatTag}),
229 31 : *_engine);
230 : }
231 :
232 31 : writeToDnnlMemory(input, *_input[asUnsigned(index)].describedMemory);
233 31 : }
234 :
235 : template <typename data_t>
236 0 : void DnnlLayer<data_t>::setInputMemory(std::shared_ptr<dnnl::memory> input, index_t index)
237 : {
238 0 : Logger::get(_name)->trace("Set layer input memory at index {}", index);
239 :
240 : // Check if index is valid
241 0 : validateVectorIndex(_input, index);
242 :
243 : // Set input memory
244 0 : _input[asUnsigned(index)].describedMemory = input;
245 0 : validateDnnlMemory(_input[asUnsigned(index)].describedMemory);
246 0 : }
247 :
248 : template <typename data_t>
249 0 : void DnnlLayer<data_t>::setNextInputMemory(std::shared_ptr<dnnl::memory> input)
250 : {
251 0 : index_t nextIndex = _currentInputMemoryIndex++;
252 0 : setInputMemory(input, nextIndex);
253 0 : }
254 :
255 : template <typename data_t>
256 10 : void DnnlLayer<data_t>::setOutputGradient(const DataContainer<data_t>& gradient,
257 : index_t index)
258 : {
259 : // Check if index is valid
260 10 : validateVectorIndex(_outputGradient, index);
261 :
262 10 : if (!_outputGradient[asUnsigned(index)].describedMemory) {
263 1 : _outputGradient[asUnsigned(index)].describedMemory = std::make_shared<dnnl::memory>(
264 1 : dnnl::memory::desc({{_outputGradient[asUnsigned(index)].dimensions},
265 : _typeTag,
266 1 : _outputGradient[asUnsigned(index)].formatTag}),
267 1 : *_engine);
268 : }
269 10 : writeToDnnlMemory(gradient, *_outputGradient[asUnsigned(index)].describedMemory);
270 10 : }
271 :
272 : template <typename data_t>
273 : void
274 0 : DnnlLayer<data_t>::setOutputGradientMemory(std::shared_ptr<dnnl::memory> outputGradient,
275 : index_t index)
276 : {
277 : // Check if index is valid
278 0 : validateVectorIndex(_outputGradient, index);
279 :
280 : // Set output-gradient memory
281 0 : _outputGradient[asUnsigned(index)].describedMemory = outputGradient;
282 0 : validateDnnlMemory(_outputGradient[asUnsigned(index)].describedMemory);
283 0 : }
284 :
285 : template <typename data_t>
286 0 : void DnnlLayer<data_t>::setNextOutputGradientMemory(
287 : std::shared_ptr<dnnl::memory> outputGradient)
288 : {
289 0 : index_t nextIndex = _currentOutputGradientMemoryIndex++;
290 0 : setOutputGradientMemory(outputGradient, nextIndex);
291 0 : }
292 :
293 : /// Reverse a volume-descriptor
294 : ///
295 : /// If we have a descriptor
296 : /// {w, h, c, n}
297 : /// this creates a descriptor
298 : /// {n, c, h, w}.
299 22 : static inline VolumeDescriptor reverseDataDescriptor(const DataDescriptor& desc)
300 : {
301 22 : IndexVector_t dims = desc.getNumberOfCoefficientsPerDimension().reverse();
302 44 : return VolumeDescriptor(dims);
303 : }
304 :
305 : template <typename data_t>
306 10 : DataContainer<data_t> DnnlLayer<data_t>::getOutput() const
307 : {
308 10 : DataContainer<data_t> output(reverseDataDescriptor(*_outputDescriptor));
309 :
310 : // TODO(tellenbach): Check if we really need this reorder based on
311 : // forwardPrimitve.dst_desc(). This can potentially safe a copy.
312 :
313 : // If memory has been reordered, we have to check whether output
314 : // memory needs to be also reordered
315 : // TODO(tellenbach): Add reordering to layer compilation
316 20 : auto outMem = *getOutputMemory();
317 10 : if (_output.wasReordered) {
318 : // We reorder directly and open a new execution stream for this. Note that this
319 : // could be relatively expensive and should be used for reporting the final net
320 : // output or for traceging purposes only
321 0 : outMem =
322 0 : dnnl::memory({{_output.dimensions}, _typeTag, _output.formatTag}, *_engine);
323 0 : dnnl::stream execStream(*_engine);
324 0 : dnnl::reorder(*_output.effectiveMemory, outMem)
325 : .execute(execStream,
326 0 : {{DNNL_ARG_FROM, *_output.effectiveMemory}, {DNNL_ARG_TO, outMem}});
327 0 : execStream.wait();
328 : }
329 :
330 : // Write reordered memory to output DataContainer. This performs a copy.
331 10 : readFromDnnlMemory(output, outMem);
332 20 : return output;
333 : }
334 :
335 : template <typename data_t>
336 27 : void DnnlLayer<data_t>::compileForwardStream()
337 : {
338 58 : for (auto&& input : _input) {
339 31 : if (!input.describedMemory) {
340 0 : input.describedMemory = std::make_shared<dnnl::memory>(
341 0 : dnnl::memory::desc({{input.dimensions}, _typeTag, input.formatTag}),
342 0 : *_engine);
343 : }
344 :
345 : // If this layer may not reorder source or destination memory, we equal
346 : // the pointers of described end effective memory
347 31 : if (!input.canBeReordered) {
348 31 : input.effectiveMemory = input.describedMemory;
349 31 : input.descriptor = input.describedMemory->get_desc();
350 : }
351 : }
352 27 : _forwardStream.isCompiled = true;
353 27 : }
354 :
355 : template <typename data_t>
356 26 : void DnnlLayer<data_t>::compileBackwardStream()
357 : {
358 26 : Logger::get(_name)->trace("Compile backward stream (base)");
359 :
360 52 : for (auto&& outGrad : _outputGradient) {
361 26 : if (!outGrad.describedMemory) {
362 25 : outGrad.describedMemory = std::make_shared<dnnl::memory>(
363 25 : dnnl::memory::desc({{outGrad.dimensions}, _typeTag, outGrad.formatTag}),
364 25 : *_engine);
365 : }
366 :
367 : // If this layer may not reorder source or destination memory, we equal
368 : // the pointers of described end effective memory
369 26 : if (!outGrad.canBeReordered) {
370 26 : outGrad.effectiveMemory = outGrad.describedMemory;
371 26 : outGrad.descriptor = outGrad.describedMemory->get_desc();
372 : }
373 : }
374 :
375 : // Handle multiple output-gradients
376 26 : handleMultipleOutputGradients();
377 :
378 26 : assert(_outputGradient.size() != 0
379 : && "Cannot compile backward-stream without output gradient");
380 26 : _backwardStream.isCompiled = true;
381 26 : }
382 :
383 : template <typename data_t>
384 27 : void DnnlLayer<data_t>::compile(PropagationKind propagation)
385 : {
386 27 : assert(_engine != nullptr && "Failed to compile layer: Dnnl engine is null");
387 :
388 27 : switch (propagation) {
389 1 : case PropagationKind::Forward:
390 1 : if (!_forwardStream.isCompiled)
391 1 : compileForwardStream();
392 1 : break;
393 26 : case PropagationKind::Backward:
394 : case PropagationKind::Full:
395 26 : if (!_forwardStream.isCompiled)
396 26 : compileForwardStream();
397 26 : if (!_backwardStream.isCompiled) {
398 26 : compileBackwardStream();
399 : }
400 26 : break;
401 0 : default:
402 0 : assert(false && "This execution path of the code should never be reached");
403 : }
404 27 : }
405 :
406 : template <typename data_t>
407 10 : std::shared_ptr<dnnl::memory> DnnlLayer<data_t>::getOutputMemory() const
408 : {
409 10 : validateDnnlMemory(_output.effectiveMemory);
410 10 : return _output.effectiveMemory;
411 : }
412 :
413 : template <typename data_t>
414 0 : std::shared_ptr<dnnl::memory> DnnlLayer<data_t>::getInputGradientMemory(index_t index)
415 : {
416 0 : validateVectorIndex(_inputGradient, index);
417 0 : validateDnnlMemory(_inputGradient[asUnsigned(index)].effectiveMemory);
418 0 : return _inputGradient[asUnsigned(index)].effectiveMemory;
419 : }
420 :
421 : template <typename data_t>
422 12 : DataContainer<data_t> DnnlLayer<data_t>::getInputGradient(index_t index) const
423 : {
424 12 : auto i = asUnsigned(index);
425 :
426 12 : validateVectorIndex(_inputGradient, index);
427 12 : validateDnnlMemory(_inputGradient[i].effectiveMemory);
428 :
429 12 : DataContainer<data_t> output(reverseDataDescriptor(*_inputDescriptor[i]));
430 :
431 24 : dnnl::memory outMem;
432 12 : if (_inputGradient[i].effectiveMemory->get_desc() != _inputGradient[i].descriptor) {
433 12 : outMem = dnnl::memory(
434 12 : {{_inputGradient[i].dimensions}, _typeTag, _inputGradient[i].formatTag},
435 12 : *_engine);
436 24 : dnnl::stream execStream(*_engine);
437 48 : dnnl::reorder(*_inputGradient[i].effectiveMemory, outMem)
438 12 : .execute(execStream, {{DNNL_ARG_FROM, *_inputGradient[i].effectiveMemory},
439 : {DNNL_ARG_TO, outMem}});
440 12 : execStream.wait();
441 : }
442 :
443 : // Write reordered memory to output DataContainer. This performs a copy.
444 12 : readFromDnnlMemory(output, outMem);
445 24 : return output;
446 : }
447 :
448 : template <typename data_t>
449 31 : void DnnlLayer<data_t>::reorderMemory(const dnnl::memory::desc& memoryDesc,
450 : DnnlLayer<data_t>::DnnlMemory& memory,
451 : DnnlLayer<data_t>::PropagationStream& stream)
452 : {
453 31 : validateDnnlMemory(memory.describedMemory);
454 : // Default case: effective memory and described memory are the same
455 31 : memory.effectiveMemory = memory.describedMemory;
456 :
457 : // We need reordering if the memory description differs from the description defined by
458 : // the primitive. In this case we reorder from the manual description to the one chosen
459 : // by Dnnl (via a primitive)
460 31 : if (memory.describedMemory->get_desc() != memoryDesc) {
461 0 : memory.wasReordered = true;
462 0 : memory.effectiveMemory = std::make_shared<dnnl::memory>(memoryDesc, *_engine);
463 :
464 : // Add reordering primitive and its arguments to the execution stream
465 0 : ELSA_ML_ADD_DNNL_PRIMITIVE(
466 : stream, dnnl::reorder(*memory.describedMemory, *memory.effectiveMemory));
467 0 : stream.arguments.push_back({{DNNL_ARG_FROM, *memory.describedMemory},
468 0 : {DNNL_ARG_TO, *memory.effectiveMemory}});
469 : }
470 31 : }
471 :
472 : template <typename data_t>
473 2 : bool DnnlLayer<data_t>::isTrainable() const
474 : {
475 2 : return false;
476 : }
477 :
478 : template <typename data_t>
479 0 : bool DnnlLayer<data_t>::canMerge() const
480 : {
481 0 : return false;
482 : }
483 :
484 : template <typename data_t>
485 26 : void DnnlLayer<data_t>::handleMultipleOutputGradients()
486 : {
487 : // Check that all output-gradient descriptors are equal and that
488 : // they match this layer's output-descriptor
489 26 : assert(!_outputGradient.empty() && "List of output-gradients is empty");
490 26 : assert(std::adjacent_find(
491 : _outputGradient.begin(), _outputGradient.end(),
492 : [](const auto& a, const auto& b) { return a.dimensions != b.dimensions; })
493 : == _outputGradient.end()
494 : && "All output-gradient descriptors must be equal");
495 26 : assert(_outputGradient.front().dimensions == _output.dimensions
496 : && "Dimensions of output-gradients must match dimensions of output");
497 :
498 26 : if (_outputGradient.size() > 1) {
499 0 : Logger::get(_name)->trace("Found multiple output-gradients");
500 0 : std::vector<dnnl::memory> mem;
501 0 : std::vector<dnnl::memory::desc> memDesc;
502 0 : for (std::size_t i = 0; i < _outputGradient.size(); ++i) {
503 0 : memDesc.push_back(_outputGradient[i].descriptor);
504 0 : validateDnnlMemory(_outputGradient[i].effectiveMemory);
505 0 : mem.push_back(*_outputGradient[i].effectiveMemory);
506 : }
507 :
508 : // Do not scale during summation
509 0 : std::vector<data_t> scales(_outputGradient.size(), data_t(1));
510 :
511 : // Create primitive-descriptor
512 0 : dnnl::sum::primitive_desc sumPrimitiveDesc(scales, memDesc, *_engine);
513 :
514 : // Add sum primitive to list of primitives
515 0 : ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream, dnnl::sum(sumPrimitiveDesc));
516 :
517 : // We replace the first output-gradient by the sum of all output-gradients
518 0 : _backwardStream.arguments.push_back(
519 0 : {{DNNL_ARG_DST, *_outputGradient.front().effectiveMemory}});
520 0 : for (std::size_t i = 0; i < _outputGradient.size(); ++i) {
521 0 : _backwardStream.arguments.back().insert(
522 0 : {DNNL_ARG_MULTIPLE_SRC + i, mem[asUnsigned(i)]});
523 : }
524 : }
525 26 : }
526 :
527 : template class DnnlLayer<float>;
528 : } // namespace detail
529 : } // namespace elsa::ml
|