LCOV - test_coverage.info.cleaned - ml/backend/Dnnl/DnnlLayer.cpp

LCOV - code coverage report

Current view:	top level - ml/backend/Dnnl - DnnlLayer.cpp (source / functions)		Hit	Total	Coverage
Test:	test_coverage.info.cleaned	Lines:	165	252	65.5 %
Date:	2022-07-06 02:47:47	Functions:	20	30	66.7 %

          Line data    Source code

       1             : #include "DnnlLayer.h"
       2             : #include "TypeCasts.hpp"
       3             : #include <iostream>
       4             : #include <sstream>
       5             : 
       6             : namespace elsa::ml
       7             : {
       8             :     namespace detail
       9             :     {
      10             :         template <typename data_t>
      11          25 :         DnnlLayer<data_t>::DnnlLayer(const VolumeDescriptor& inputDescriptor,
      12             :                                      const VolumeDescriptor& outputDescriptor,
      13             :                                      const std::string& name, int allowedNumberOfInputs)
      14             :             : DnnlLayer(std::vector<VolumeDescriptor>{inputDescriptor}, outputDescriptor, name,
      15          50 :                         allowedNumberOfInputs)
      16             :         {
      17          25 :         }
      18             : 
      19             :         template <typename data_t>
      20          27 :         DnnlLayer<data_t>::DnnlLayer(const std::vector<VolumeDescriptor>& inputDescriptor,
      21             :                                      const VolumeDescriptor& outputDescriptor,
      22             :                                      const std::string& name, int allowedNumberOfInputs)
      23             :             : _input(inputDescriptor.size()),
      24             :               _inputGradient(inputDescriptor.size()),
      25             :               _outputGradient(1), // we need at least one output-gradient
      26             :               _outputDescriptor(outputDescriptor.clone()),
      27           0 :               _engine(std::make_shared<dnnl::engine>(dnnl::engine::kind::cpu, 0)),
      28             :               _allowedNumberOfInputs(allowedNumberOfInputs),
      29          27 :               _name(name)
      30             :         {
      31             :             // A layer can have several inputs but only one single output.
      32             :             // However, for the gradients the situation is different: A layer
      33             :             // can have multiple output-gradients (gradients coming from different
      34             :             // connected layers after the current layer) and multiple input-gradients
      35             :             // (e.g. in the case of a concatenation-layer).
      36             : 
      37             :             // Set input descriptors and input dimensions
      38          58 :             for (std::size_t i = 0; i < inputDescriptor.size(); ++i) {
      39             :                 // Clone input-descriptor
      40          31 :                 _inputDescriptor.push_back(inputDescriptor[i].clone());
      41             : 
      42             :                 // Get memory-format tag for input and input-gradient
      43          31 :                 _input[asUnsigned(i)].formatTag =
      44          31 :                     dataDescriptorToDnnlMemoryFormatTag(inputDescriptor[asUnsigned(i)], true);
      45          31 :                 _inputGradient[asUnsigned(i)].formatTag = _input[asUnsigned(i)].formatTag;
      46             : 
      47          31 :                 assert(_input[asUnsigned(i)].formatTag != dnnl::memory::format_tag::undef
      48             :                        && "Could not resolve Dnnl source memory format tag");
      49             : 
      50             :                 // Get input and input-gradient dimensions
      51         155 :                 for (const auto& dim :
      52          31 :                      _inputDescriptor[asUnsigned(i)]->getNumberOfCoefficientsPerDimension()) {
      53         124 :                     _input[asUnsigned(i)].dimensions.push_back(dim);
      54         124 :                     _inputGradient[asUnsigned(i)].dimensions.push_back(dim);
      55             :                 }
      56             : 
      57             :                 // Get input and input-gradient Dnnl descriptors
      58          31 :                 _input[asUnsigned(i)].descriptor = dnnl::memory::desc(
      59          31 :                     {_input[asUnsigned(i)].dimensions}, _typeTag, dnnl::memory::format_tag::any);
      60             : 
      61          31 :                 _inputGradient[asUnsigned(i)].descriptor = _input[asUnsigned(i)].descriptor;
      62             :             }
      63             : 
      64             :             // Set output memory descriptor
      65         135 :             for (const auto& dim : outputDescriptor.getNumberOfCoefficientsPerDimension())
      66         108 :                 _output.dimensions.push_back(dim);
      67             : 
      68             :             // The shape of all output-gradients match the shape of the single
      69             :             // layer output, only the memory can be different
      70          54 :             _output.descriptor =
      71          27 :                 dnnl::memory::desc({_output.dimensions}, _typeTag, dnnl::memory::format_tag::any);
      72          27 :             _output.formatTag = dataDescriptorToDnnlMemoryFormatTag(outputDescriptor, true);
      73             : 
      74          54 :             for (auto&& outGrad : _outputGradient) {
      75          27 :                 outGrad.dimensions = _output.dimensions;
      76          27 :                 outGrad.descriptor = _output.descriptor;
      77          27 :                 outGrad.formatTag = _output.formatTag;
      78             :             }
      79          27 :         }
      80             : 
      81             :         template <typename data_t>
      82          41 :         void DnnlLayer<data_t>::writeToDnnlMemory(const DataContainer<data_t>& data,
      83             :                                                   dnnl::memory& memory)
      84             :         {
      85          41 :             assert(data.getSize() == memory.get_desc().get_size() / sizeof(data_t));
      86          41 :             assert(memory.get_data_handle() != nullptr);
      87             : 
      88          41 :             data_t* dst = static_cast<data_t*>(memory.get_data_handle());
      89       82381 :             for (int i = 0; i < data.getSize(); ++i)
      90       82340 :                 dst[i] = data[i];
      91          41 :         }
      92             : 
      93             :         template <typename data_t>
      94          22 :         void DnnlLayer<data_t>::readFromDnnlMemory(DataContainer<data_t>& data,
      95             :                                                    const dnnl::memory& memory)
      96             :         {
      97          22 :             assert(data.getSize() == memory.get_desc().get_size() / sizeof(data_t));
      98          22 :             assert(memory.get_data_handle() != nullptr);
      99          22 :             const data_t* src = static_cast<const data_t*>(memory.get_data_handle());
     100       45162 :             for (int i = 0; i < data.getSize(); ++i)
     101       45140 :                 data[i] = src[i];
     102          22 :         }
     103             : 
     104             :         template <typename data_t>
     105             :         dnnl::memory::format_tag
     106          58 :             DnnlLayer<data_t>::dataDescriptorToDnnlMemoryFormatTag(const VolumeDescriptor& desc,
     107             :                                                                    bool isInput)
     108             :         {
     109             :             using ft = dnnl::memory::format_tag;
     110             : 
     111          58 :             switch (desc.getNumberOfDimensions()) {
     112           0 :                 case 2:
     113           0 :                     return (isInput ? ft::nc : ft::oi);
     114           0 :                 case 3:
     115           0 :                     return (isInput ? ft::ncw : ft::oiw);
     116          58 :                 case 4:
     117          58 :                     return (isInput ? ft::nchw : ft::oihw);
     118           0 :                 case 5:
     119           0 :                     return (isInput ? ft::ncdhw : ft::oidhw);
     120           0 :                 default:
     121           0 :                     return ft::undef;
     122             :             }
     123             :         }
     124             : 
     125             :         template <typename data_t>
     126           0 :         std::string DnnlLayer<data_t>::dnnlMemoryFormatTagToString(dnnl::memory::format_tag tag)
     127             :         {
     128             : 
     129           0 :             auto formatStr = [](const std::string& input, const std::string& weights) {
     130           0 :                 std::stringstream ss;
     131             :                 ss << "dnnl::memory:format_tag::" << input
     132           0 :                    << " (input), dnnl::memory:format_tag::" << weights << " (weights)";
     133           0 :                 return ss.str();
     134             :             };
     135             : 
     136             :             using ft = dnnl::memory::format_tag;
     137             : 
     138           0 :             switch (tag) {
     139           0 :                 case ft::undef:
     140           0 :                     return formatStr("undef", "undef");
     141           0 :                 case ft::nc:
     142           0 :                     return formatStr("nc", "oi");
     143           0 :                 case ft::ncw:
     144           0 :                     return formatStr("ncw", "oiw");
     145           0 :                 case ft::nchw:
     146           0 :                     return formatStr("nchw", "oihw");
     147           0 :                 case ft::ncdhw:
     148           0 :                     return formatStr("ncdhw", "oidhw");
     149           0 :                 default:
     150           0 :                     assert(false && "This execution path of the code should never be reached");
     151             :             }
     152             :             assert(false && "This execution path of the code should never be reached");
     153             :             return "";
     154             :         }
     155             : 
     156             :         template <typename data_t>
     157          10 :         void DnnlLayer<data_t>::forwardPropagate(dnnl::stream& executionStream)
     158             :         {
     159          10 :             Logger::get(_name)->trace("Forward propagate");
     160          10 :             assert(_input.size() == _allowedNumberOfInputs
     161             :                    || _allowedNumberOfInputs == DnnlLayer::anyNumberOfInputs
     162             :                           && "Too many inputs provided");
     163             : 
     164          10 :             assert(_input.size() == _inputDescriptor.size()
     165             :                    && "Number of provided inputs does not match number of input-descriptors");
     166             : 
     167          10 :             assert(_forwardStream.isCompiled
     168             :                    && "Cannot forward propagate because forward-stream has not been compiled");
     169             : 
     170          10 :             assert(_forwardStream.primitives.size() == _forwardStream.arguments.size()
     171             :                    && "Number of Dnnl primitives and number of primitive arguments must match");
     172             : 
     173          20 :             for (std::size_t i = 0; i < _forwardStream.primitives.size(); ++i)
     174          10 :                 _forwardStream.primitives[i].execute(executionStream, _forwardStream.arguments[i]);
     175             : 
     176          10 :             if (needsForwardSynchronisation()) {
     177           1 :                 executionStream.wait();
     178             :             }
     179          10 :         }
     180             : 
     181             :         template <typename data_t>
     182           9 :         void DnnlLayer<data_t>::backwardPropagate(dnnl::stream& executionStream)
     183             :         {
     184           9 :             Logger::get(_name)->trace("Backward propagate");
     185           9 :             assert(_input.size() == _allowedNumberOfInputs
     186             :                    || _allowedNumberOfInputs == DnnlLayer::anyNumberOfInputs
     187             :                           && "Too many inputs provided");
     188             : 
     189           9 :             assert(_backwardStream.isCompiled
     190             :                    && "Cannot backward propagate because backward-stream has not been compiled");
     191             : 
     192           9 :             assert(_backwardStream.primitives.size() == _backwardStream.arguments.size()
     193             :                    && "Number of Dnnl primitives and number of primitive arguments must match");
     194             : 
     195          18 :             for (std::size_t i = 0; i < _backwardStream.primitives.size(); ++i)
     196          18 :                 _backwardStream.primitives[i].execute(executionStream,
     197           9 :                                                       _backwardStream.arguments[i]);
     198             : 
     199           9 :             if (needsBackwardSynchronisation()) {
     200           0 :                 executionStream.wait();
     201             :             }
     202           9 :         } // namespace elsa
     203             : 
     204             :         template <typename data_t>
     205          27 :         std::shared_ptr<dnnl::engine> DnnlLayer<data_t>::getEngine() const
     206             :         {
     207          27 :             return _engine;
     208             :         }
     209             : 
     210             :         template <typename data_t>
     211           0 :         void DnnlLayer<data_t>::setEngine(std::shared_ptr<dnnl::engine> engine)
     212             :         {
     213           0 :             _engine = engine;
     214           0 :         }
     215             : 
     216             :         template <typename data_t>
     217          31 :         void DnnlLayer<data_t>::setInput(const DataContainer<data_t>& input, index_t index)
     218             :         {
     219          31 :             Logger::get(_name)->trace("Set layer input from DataContainer at index {}", index);
     220             :             // Check if index is valid
     221          31 :             validateVectorIndex(_input, index);
     222             : 
     223             :             // If no input has been set yet we allocate
     224          31 :             if (!_input[asUnsigned(index)].describedMemory) {
     225          31 :                 _input[asUnsigned(index)].describedMemory = std::make_shared<dnnl::memory>(
     226          31 :                     dnnl::memory::desc({{_input[asUnsigned(index)].dimensions},
     227             :                                         _typeTag,
     228          31 :                                         _input[asUnsigned(index)].formatTag}),
     229          31 :                     *_engine);
     230             :             }
     231             : 
     232          31 :             writeToDnnlMemory(input, *_input[asUnsigned(index)].describedMemory);
     233          31 :         }
     234             : 
     235             :         template <typename data_t>
     236           0 :         void DnnlLayer<data_t>::setInputMemory(std::shared_ptr<dnnl::memory> input, index_t index)
     237             :         {
     238           0 :             Logger::get(_name)->trace("Set layer input memory at index {}", index);
     239             : 
     240             :             // Check if index is valid
     241           0 :             validateVectorIndex(_input, index);
     242             : 
     243             :             // Set input memory
     244           0 :             _input[asUnsigned(index)].describedMemory = input;
     245           0 :             validateDnnlMemory(_input[asUnsigned(index)].describedMemory);
     246           0 :         }
     247             : 
     248             :         template <typename data_t>
     249           0 :         void DnnlLayer<data_t>::setNextInputMemory(std::shared_ptr<dnnl::memory> input)
     250             :         {
     251           0 :             index_t nextIndex = _currentInputMemoryIndex++;
     252           0 :             setInputMemory(input, nextIndex);
     253           0 :         }
     254             : 
     255             :         template <typename data_t>
     256          10 :         void DnnlLayer<data_t>::setOutputGradient(const DataContainer<data_t>& gradient,
     257             :                                                   index_t index)
     258             :         {
     259             :             // Check if index is valid
     260          10 :             validateVectorIndex(_outputGradient, index);
     261             : 
     262          10 :             if (!_outputGradient[asUnsigned(index)].describedMemory) {
     263           1 :                 _outputGradient[asUnsigned(index)].describedMemory = std::make_shared<dnnl::memory>(
     264           1 :                     dnnl::memory::desc({{_outputGradient[asUnsigned(index)].dimensions},
     265             :                                         _typeTag,
     266           1 :                                         _outputGradient[asUnsigned(index)].formatTag}),
     267           1 :                     *_engine);
     268             :             }
     269          10 :             writeToDnnlMemory(gradient, *_outputGradient[asUnsigned(index)].describedMemory);
     270          10 :         }
     271             : 
     272             :         template <typename data_t>
     273             :         void
     274           0 :             DnnlLayer<data_t>::setOutputGradientMemory(std::shared_ptr<dnnl::memory> outputGradient,
     275             :                                                        index_t index)
     276             :         {
     277             :             // Check if index is valid
     278           0 :             validateVectorIndex(_outputGradient, index);
     279             : 
     280             :             // Set output-gradient memory
     281           0 :             _outputGradient[asUnsigned(index)].describedMemory = outputGradient;
     282           0 :             validateDnnlMemory(_outputGradient[asUnsigned(index)].describedMemory);
     283           0 :         }
     284             : 
     285             :         template <typename data_t>
     286           0 :         void DnnlLayer<data_t>::setNextOutputGradientMemory(
     287             :             std::shared_ptr<dnnl::memory> outputGradient)
     288             :         {
     289           0 :             index_t nextIndex = _currentOutputGradientMemoryIndex++;
     290           0 :             setOutputGradientMemory(outputGradient, nextIndex);
     291           0 :         }
     292             : 
     293             :         /// Reverse a volume-descriptor
     294             :         ///
     295             :         /// If we have a descriptor
     296             :         ///   {w, h, c, n}
     297             :         /// this creates a descriptor
     298             :         ///   {n, c, h, w}.
     299          22 :         static inline VolumeDescriptor reverseDataDescriptor(const DataDescriptor& desc)
     300             :         {
     301          22 :             IndexVector_t dims = desc.getNumberOfCoefficientsPerDimension().reverse();
     302          44 :             return VolumeDescriptor(dims);
     303             :         }
     304             : 
     305             :         template <typename data_t>
     306          10 :         DataContainer<data_t> DnnlLayer<data_t>::getOutput() const
     307             :         {
     308          10 :             DataContainer<data_t> output(reverseDataDescriptor(*_outputDescriptor));
     309             : 
     310             :             // TODO(tellenbach): Check if we really need this reorder based on
     311             :             // forwardPrimitve.dst_desc(). This can potentially safe a copy.
     312             : 
     313             :             // If memory has been reordered, we have to check whether output
     314             :             // memory needs to be also reordered
     315             :             // TODO(tellenbach): Add reordering to layer compilation
     316          20 :             auto outMem = *getOutputMemory();
     317          10 :             if (_output.wasReordered) {
     318             :                 // We reorder directly and open a new execution stream for this. Note that this
     319             :                 // could be relatively expensive and should be used for reporting the final net
     320             :                 // output or for traceging purposes only
     321           0 :                 outMem =
     322           0 :                     dnnl::memory({{_output.dimensions}, _typeTag, _output.formatTag}, *_engine);
     323           0 :                 dnnl::stream execStream(*_engine);
     324           0 :                 dnnl::reorder(*_output.effectiveMemory, outMem)
     325             :                     .execute(execStream,
     326           0 :                              {{DNNL_ARG_FROM, *_output.effectiveMemory}, {DNNL_ARG_TO, outMem}});
     327           0 :                 execStream.wait();
     328             :             }
     329             : 
     330             :             // Write reordered memory to output DataContainer. This performs a copy.
     331          10 :             readFromDnnlMemory(output, outMem);
     332          20 :             return output;
     333             :         }
     334             : 
     335             :         template <typename data_t>
     336          27 :         void DnnlLayer<data_t>::compileForwardStream()
     337             :         {
     338          58 :             for (auto&& input : _input) {
     339          31 :                 if (!input.describedMemory) {
     340           0 :                     input.describedMemory = std::make_shared<dnnl::memory>(
     341           0 :                         dnnl::memory::desc({{input.dimensions}, _typeTag, input.formatTag}),
     342           0 :                         *_engine);
     343             :                 }
     344             : 
     345             :                 // If this layer may not reorder source or destination memory, we equal
     346             :                 // the pointers of described end effective memory
     347          31 :                 if (!input.canBeReordered) {
     348          31 :                     input.effectiveMemory = input.describedMemory;
     349          31 :                     input.descriptor = input.describedMemory->get_desc();
     350             :                 }
     351             :             }
     352          27 :             _forwardStream.isCompiled = true;
     353          27 :         }
     354             : 
     355             :         template <typename data_t>
     356          26 :         void DnnlLayer<data_t>::compileBackwardStream()
     357             :         {
     358          26 :             Logger::get(_name)->trace("Compile backward stream (base)");
     359             : 
     360          52 :             for (auto&& outGrad : _outputGradient) {
     361          26 :                 if (!outGrad.describedMemory) {
     362          25 :                     outGrad.describedMemory = std::make_shared<dnnl::memory>(
     363          25 :                         dnnl::memory::desc({{outGrad.dimensions}, _typeTag, outGrad.formatTag}),
     364          25 :                         *_engine);
     365             :                 }
     366             : 
     367             :                 // If this layer may not reorder source or destination memory, we equal
     368             :                 // the pointers of described end effective memory
     369          26 :                 if (!outGrad.canBeReordered) {
     370          26 :                     outGrad.effectiveMemory = outGrad.describedMemory;
     371          26 :                     outGrad.descriptor = outGrad.describedMemory->get_desc();
     372             :                 }
     373             :             }
     374             : 
     375             :             // Handle multiple output-gradients
     376          26 :             handleMultipleOutputGradients();
     377             : 
     378          26 :             assert(_outputGradient.size() != 0
     379             :                    && "Cannot compile backward-stream without output gradient");
     380          26 :             _backwardStream.isCompiled = true;
     381          26 :         }
     382             : 
     383             :         template <typename data_t>
     384          27 :         void DnnlLayer<data_t>::compile(PropagationKind propagation)
     385             :         {
     386          27 :             assert(_engine != nullptr && "Failed to compile layer: Dnnl engine is null");
     387             : 
     388          27 :             switch (propagation) {
     389           1 :                 case PropagationKind::Forward:
     390           1 :                     if (!_forwardStream.isCompiled)
     391           1 :                         compileForwardStream();
     392           1 :                     break;
     393          26 :                 case PropagationKind::Backward:
     394             :                 case PropagationKind::Full:
     395          26 :                     if (!_forwardStream.isCompiled)
     396          26 :                         compileForwardStream();
     397          26 :                     if (!_backwardStream.isCompiled) {
     398          26 :                         compileBackwardStream();
     399             :                     }
     400          26 :                     break;
     401           0 :                 default:
     402           0 :                     assert(false && "This execution path of the code should never be reached");
     403             :             }
     404          27 :         }
     405             : 
     406             :         template <typename data_t>
     407          10 :         std::shared_ptr<dnnl::memory> DnnlLayer<data_t>::getOutputMemory() const
     408             :         {
     409          10 :             validateDnnlMemory(_output.effectiveMemory);
     410          10 :             return _output.effectiveMemory;
     411             :         }
     412             : 
     413             :         template <typename data_t>
     414           0 :         std::shared_ptr<dnnl::memory> DnnlLayer<data_t>::getInputGradientMemory(index_t index)
     415             :         {
     416           0 :             validateVectorIndex(_inputGradient, index);
     417           0 :             validateDnnlMemory(_inputGradient[asUnsigned(index)].effectiveMemory);
     418           0 :             return _inputGradient[asUnsigned(index)].effectiveMemory;
     419             :         }
     420             : 
     421             :         template <typename data_t>
     422          12 :         DataContainer<data_t> DnnlLayer<data_t>::getInputGradient(index_t index) const
     423             :         {
     424          12 :             auto i = asUnsigned(index);
     425             : 
     426          12 :             validateVectorIndex(_inputGradient, index);
     427          12 :             validateDnnlMemory(_inputGradient[i].effectiveMemory);
     428             : 
     429          12 :             DataContainer<data_t> output(reverseDataDescriptor(*_inputDescriptor[i]));
     430             : 
     431          24 :             dnnl::memory outMem;
     432          12 :             if (_inputGradient[i].effectiveMemory->get_desc() != _inputGradient[i].descriptor) {
     433          12 :                 outMem = dnnl::memory(
     434          12 :                     {{_inputGradient[i].dimensions}, _typeTag, _inputGradient[i].formatTag},
     435          12 :                     *_engine);
     436          24 :                 dnnl::stream execStream(*_engine);
     437          48 :                 dnnl::reorder(*_inputGradient[i].effectiveMemory, outMem)
     438          12 :                     .execute(execStream, {{DNNL_ARG_FROM, *_inputGradient[i].effectiveMemory},
     439             :                                           {DNNL_ARG_TO, outMem}});
     440          12 :                 execStream.wait();
     441             :             }
     442             : 
     443             :             // Write reordered memory to output DataContainer. This performs a copy.
     444          12 :             readFromDnnlMemory(output, outMem);
     445          24 :             return output;
     446             :         }
     447             : 
     448             :         template <typename data_t>
     449          31 :         void DnnlLayer<data_t>::reorderMemory(const dnnl::memory::desc& memoryDesc,
     450             :                                               DnnlLayer<data_t>::DnnlMemory& memory,
     451             :                                               DnnlLayer<data_t>::PropagationStream& stream)
     452             :         {
     453          31 :             validateDnnlMemory(memory.describedMemory);
     454             :             // Default case: effective memory and described memory are the same
     455          31 :             memory.effectiveMemory = memory.describedMemory;
     456             : 
     457             :             // We need reordering if the memory description differs from the description defined by
     458             :             // the primitive. In this case we reorder from the manual description to the one chosen
     459             :             // by Dnnl (via a primitive)
     460          31 :             if (memory.describedMemory->get_desc() != memoryDesc) {
     461           0 :                 memory.wasReordered = true;
     462           0 :                 memory.effectiveMemory = std::make_shared<dnnl::memory>(memoryDesc, *_engine);
     463             : 
     464             :                 // Add reordering primitive and its arguments to the execution stream
     465           0 :                 ELSA_ML_ADD_DNNL_PRIMITIVE(
     466             :                     stream, dnnl::reorder(*memory.describedMemory, *memory.effectiveMemory));
     467           0 :                 stream.arguments.push_back({{DNNL_ARG_FROM, *memory.describedMemory},
     468           0 :                                             {DNNL_ARG_TO, *memory.effectiveMemory}});
     469             :             }
     470          31 :         }
     471             : 
     472             :         template <typename data_t>
     473           2 :         bool DnnlLayer<data_t>::isTrainable() const
     474             :         {
     475           2 :             return false;
     476             :         }
     477             : 
     478             :         template <typename data_t>
     479           0 :         bool DnnlLayer<data_t>::canMerge() const
     480             :         {
     481           0 :             return false;
     482             :         }
     483             : 
     484             :         template <typename data_t>
     485          26 :         void DnnlLayer<data_t>::handleMultipleOutputGradients()
     486             :         {
     487             :             // Check that all output-gradient descriptors are equal and that
     488             :             // they match this layer's output-descriptor
     489          26 :             assert(!_outputGradient.empty() && "List of output-gradients is empty");
     490          26 :             assert(std::adjacent_find(
     491             :                        _outputGradient.begin(), _outputGradient.end(),
     492             :                        [](const auto& a, const auto& b) { return a.dimensions != b.dimensions; })
     493             :                        == _outputGradient.end()
     494             :                    && "All output-gradient descriptors must be equal");
     495          26 :             assert(_outputGradient.front().dimensions == _output.dimensions
     496             :                    && "Dimensions of output-gradients must match dimensions of output");
     497             : 
     498          26 :             if (_outputGradient.size() > 1) {
     499           0 :                 Logger::get(_name)->trace("Found multiple output-gradients");
     500           0 :                 std::vector<dnnl::memory> mem;
     501           0 :                 std::vector<dnnl::memory::desc> memDesc;
     502           0 :                 for (std::size_t i = 0; i < _outputGradient.size(); ++i) {
     503           0 :                     memDesc.push_back(_outputGradient[i].descriptor);
     504           0 :                     validateDnnlMemory(_outputGradient[i].effectiveMemory);
     505           0 :                     mem.push_back(*_outputGradient[i].effectiveMemory);
     506             :                 }
     507             : 
     508             :                 // Do not scale during summation
     509           0 :                 std::vector<data_t> scales(_outputGradient.size(), data_t(1));
     510             : 
     511             :                 // Create primitive-descriptor
     512           0 :                 dnnl::sum::primitive_desc sumPrimitiveDesc(scales, memDesc, *_engine);
     513             : 
     514             :                 // Add sum primitive to list of primitives
     515           0 :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream, dnnl::sum(sumPrimitiveDesc));
     516             : 
     517             :                 // We replace the first output-gradient by the sum of all output-gradients
     518           0 :                 _backwardStream.arguments.push_back(
     519           0 :                     {{DNNL_ARG_DST, *_outputGradient.front().effectiveMemory}});
     520           0 :                 for (std::size_t i = 0; i < _outputGradient.size(); ++i) {
     521           0 :                     _backwardStream.arguments.back().insert(
     522           0 :                         {DNNL_ARG_MULTIPLE_SRC + i, mem[asUnsigned(i)]});
     523             :                 }
     524             :             }
     525          26 :         }
     526             : 
     527             :         template class DnnlLayer<float>;
     528             :     } // namespace detail
     529             : } // namespace elsa::ml

Generated by: LCOV version 1.15