LCOV - code coverage report
Current view: top level - ml/backend/Dnnl - DnnlConvolution.cpp (source / functions) Hit Total Coverage
Test: test_coverage.info.cleaned Lines: 0 128 0.0 %
Date: 2022-07-06 02:47:47 Functions: 0 5 0.0 %

          Line data    Source code
       1             : #include "DnnlConvolution.h"
       2             : 
       3             : namespace elsa::ml
       4             : {
       5             :     namespace detail
       6             :     {
       7             :         template <typename data_t>
       8           0 :         DnnlConvolution<data_t>::DnnlConvolution(const VolumeDescriptor& inputDescriptor,
       9             :                                                  const VolumeDescriptor& outputDescriptor,
      10             :                                                  const VolumeDescriptor& weightsDescriptor,
      11             :                                                  const IndexVector_t& strides,
      12             :                                                  const IndexVector_t& paddingLow,
      13             :                                                  const IndexVector_t& paddingHigh,
      14             :                                                  Initializer initializer)
      15             :             : DnnlTrainableLayer<data_t>(inputDescriptor, outputDescriptor, weightsDescriptor,
      16           0 :                                          initializer)
      17             :         {
      18           0 :             for (const auto& dim : strides)
      19           0 :                 _stridesDimensions.push_back(dim);
      20             : 
      21           0 :             for (const auto& dim : paddingLow)
      22           0 :                 _paddingLowDimensions.push_back(dim);
      23             : 
      24           0 :             for (const auto& dim : paddingHigh)
      25           0 :                 _paddingHighDimensions.push_back(dim);
      26           0 :         }
      27             : 
      28             :         template <typename data_t>
      29           0 :         void DnnlConvolution<data_t>::compileForwardStream()
      30             :         {
      31           0 :             BaseType::compileForwardStream();
      32             : 
      33             :             // TODO(tellenbach): Add support for dilated convolution
      34           0 :             auto desc = dnnl::convolution_forward::desc(
      35             :                 /* Propagation kind */ dnnl::prop_kind::forward_training,
      36             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
      37           0 :                 /* Input descriptor */ _input.front().descriptor,
      38           0 :                 /* Weights descriptor */ _weights.descriptor,
      39           0 :                 /* Bias descriptor*/ _bias.descriptor,
      40           0 :                 /* Output descriptor */ _output.descriptor,
      41           0 :                 /* Strides for spatial dims */ _stridesDimensions,
      42           0 :                 /* Lower padding for spatial dims */ _paddingLowDimensions,
      43           0 :                 /* Higher padding for spatial dims */ _paddingHighDimensions);
      44             : 
      45           0 :             _forwardPrimitiveDescriptor = dnnl::convolution_forward::primitive_desc(desc, *_engine);
      46             : 
      47             :             // Do we need to reorder?
      48           0 :             this->reorderMemory(_forwardPrimitiveDescriptor.src_desc(), _input.front(),
      49           0 :                                 _forwardStream);
      50           0 :             this->reorderMemory(_forwardPrimitiveDescriptor.weights_desc(), _weights,
      51           0 :                                 _forwardStream);
      52             : 
      53           0 :             _output.describedMemory =
      54           0 :                 std::make_shared<dnnl::memory>(_forwardPrimitiveDescriptor.dst_desc(), *_engine);
      55             : 
      56           0 :             ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
      57             :                                        dnnl::convolution_forward(_forwardPrimitiveDescriptor));
      58             : 
      59           0 :             BaseType::validateDnnlMemory(_input.front().effectiveMemory, _weights.effectiveMemory,
      60           0 :                                          _bias.effectiveMemory, _output.describedMemory);
      61             : 
      62           0 :             _forwardStream.arguments.push_back({{DNNL_ARG_SRC, *_input.front().effectiveMemory},
      63           0 :                                                 {DNNL_ARG_WEIGHTS, *_weights.effectiveMemory},
      64           0 :                                                 {DNNL_ARG_BIAS, *_bias.effectiveMemory},
      65           0 :                                                 {DNNL_ARG_DST, *_output.describedMemory}});
      66             : 
      67             :             // If either the input or weights have been reordered there could potential reordering
      68             :             // for output
      69           0 :             _output.effectiveMemory = _output.describedMemory;
      70           0 :             if (_input.front().wasReordered || _weights.wasReordered) {
      71           0 :                 _output.wasReordered = true;
      72           0 :                 _output.effectiveMemory = std::make_shared<dnnl::memory>(
      73           0 :                     dnnl::memory::desc({{_output.dimensions}, _typeTag, _output.formatTag}),
      74           0 :                     *_engine);
      75           0 :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream, dnnl::reorder(*_output.describedMemory,
      76             :                                                                          *_output.effectiveMemory));
      77           0 :                 _forwardStream.arguments.push_back({{DNNL_ARG_FROM, *_output.describedMemory},
      78           0 :                                                     {DNNL_ARG_TO, *_output.effectiveMemory}});
      79             :             }
      80           0 :         }
      81             : 
      82             :         template <typename data_t>
      83           0 :         void DnnlConvolution<data_t>::compileBackwardDataStream()
      84             :         {
      85           0 :             auto desc = dnnl::convolution_backward_data::desc(
      86             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
      87           0 :                 /* Input Gradient descriptor */ _inputGradient.front().descriptor,
      88           0 :                 /* Weights descriptor */ _weights.descriptor,
      89           0 :                 /* Output gradient descriptor */ _outputGradient.front().descriptor,
      90           0 :                 /* Strides */ _stridesDimensions,
      91           0 :                 /* Padding */ _paddingLowDimensions, _paddingHighDimensions);
      92             : 
      93           0 :             _backwardDataPrimitiveDescriptor = dnnl::convolution_backward_data::primitive_desc(
      94           0 :                 desc, *_engine, _forwardPrimitiveDescriptor);
      95             : 
      96             :             // Reorder output gradient of necessary
      97           0 :             this->reorderMemory(_backwardDataPrimitiveDescriptor.diff_dst_desc(),
      98           0 :                                 _outputGradient.front(), _backwardStream);
      99             : 
     100             :             // Reorder weights if necessary
     101           0 :             this->reorderMemory(_backwardDataPrimitiveDescriptor.weights_desc(), _weights,
     102           0 :                                 _backwardStream);
     103             : 
     104             :             // Set input gradient memory
     105           0 :             _inputGradient.front().describedMemory = std::make_shared<dnnl::memory>(
     106           0 :                 _backwardDataPrimitiveDescriptor.diff_src_desc(), *_engine);
     107             : 
     108             :             // Push backward data primitive
     109           0 :             ELSA_ML_ADD_DNNL_PRIMITIVE(
     110             :                 _backwardStream, dnnl::convolution_backward_data(_backwardDataPrimitiveDescriptor));
     111             : 
     112           0 :             BaseType::validateDnnlMemory(_inputGradient.front().describedMemory,
     113           0 :                                          _weights.effectiveMemory,
     114           0 :                                          _outputGradient.front().effectiveMemory);
     115             : 
     116           0 :             _backwardStream.arguments.push_back(
     117           0 :                 {/*  Input gradient */ {DNNL_ARG_DIFF_SRC, *_inputGradient.front().describedMemory},
     118           0 :                  /*  Weights */ {DNNL_ARG_WEIGHTS, *_weights.effectiveMemory},
     119             :                  /*  Output gradient */
     120           0 :                  {DNNL_ARG_DIFF_DST, *_outputGradient.front().effectiveMemory}});
     121           0 :         }
     122             : 
     123             :         template <typename data_t>
     124           0 :         void DnnlConvolution<data_t>::compileBackwardWeightsStream()
     125             :         {
     126             :             // Backward descriptor for weights backprop
     127           0 :             auto desc = dnnl::convolution_backward_weights::desc(
     128             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
     129           0 :                 /* Input gradient descriptor */ _input.front().descriptor,
     130           0 :                 /* Weights gradient descriptor */ _weightsGradient.descriptor,
     131           0 :                 /* Bias gradient descriptor */ _biasGradient.descriptor,
     132           0 :                 /* Output gradient descriptor */ _outputGradient.front().descriptor,
     133           0 :                 /* Strides */ _stridesDimensions,
     134           0 :                 /* Padding */ _paddingLowDimensions, _paddingHighDimensions);
     135             : 
     136           0 :             _backwardWeightsPrimitiveDescriptor =
     137           0 :                 dnnl::convolution_backward_weights::primitive_desc(desc, *_engine,
     138           0 :                                                                    _forwardPrimitiveDescriptor);
     139             : 
     140             :             // Do we need reorder for gradient src memory?
     141           0 :             this->reorderMemory(_backwardWeightsPrimitiveDescriptor.src_desc(), _input.front(),
     142           0 :                                 _backwardStream);
     143             : 
     144             :             // Do we need to reorder gradient destination memory?
     145           0 :             this->reorderMemory(_backwardWeightsPrimitiveDescriptor.diff_dst_desc(),
     146           0 :                                 _outputGradient.front(), _backwardStream);
     147             : 
     148           0 :             BaseType::validateDnnlMemory(
     149           0 :                 _input.front().effectiveMemory, _biasGradient.effectiveMemory,
     150           0 :                 _outputGradient.front().effectiveMemory, _weightsGradient.describedMemory);
     151             : 
     152           0 :             ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream, dnnl::convolution_backward_weights(
     153             :                                                             _backwardWeightsPrimitiveDescriptor));
     154             : 
     155           0 :             _backwardStream.arguments.push_back(
     156             :                 {/* Input */
     157           0 :                  {DNNL_ARG_SRC, *_input.front().effectiveMemory},
     158           0 :                  {DNNL_ARG_DIFF_DST, *_outputGradient.front().effectiveMemory},
     159             :                  /* Output */
     160           0 :                  {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.describedMemory},
     161           0 :                  {DNNL_ARG_DIFF_BIAS, *_biasGradient.effectiveMemory}});
     162             : 
     163           0 :             _weightsGradient.effectiveMemory = _weightsGradient.describedMemory;
     164             : 
     165           0 :             _weightsGradient.wasReordered = true;
     166           0 :             _weightsGradient.effectiveMemory = std::make_shared<dnnl::memory>(
     167             :                 dnnl::memory::desc(
     168           0 :                     {{_weightsGradient.dimensions}, _typeTag, _weightsGradient.formatTag}),
     169           0 :                 *_engine);
     170           0 :             ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
     171             :                                        dnnl::reorder(*_weightsGradient.describedMemory,
     172             :                                                      *_weightsGradient.effectiveMemory));
     173           0 :             _forwardStream.arguments.push_back({{DNNL_ARG_FROM, *_weightsGradient.describedMemory},
     174           0 :                                                 {DNNL_ARG_TO, *_weightsGradient.effectiveMemory}});
     175             : 
     176           0 :             if (_weightsGradient.describedMemory->get_desc()
     177           0 :                 != _backwardWeightsPrimitiveDescriptor.diff_weights_desc()) {
     178           0 :                 _weightsGradient.wasReordered = true;
     179           0 :                 _weightsGradient.describedMemory = std::make_shared<dnnl::memory>(
     180           0 :                     _backwardWeightsPrimitiveDescriptor.diff_weights_desc(), *_engine);
     181           0 :                 _backwardStream.arguments.back().insert(
     182           0 :                     {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.describedMemory});
     183           0 :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream,
     184             :                                            dnnl::reorder(*_weightsGradient.describedMemory,
     185             :                                                          *_weightsGradient.effectiveMemory));
     186           0 :                 _backwardStream.arguments.push_back(
     187           0 :                     {{DNNL_ARG_FROM, *_weightsGradient.describedMemory},
     188           0 :                      {DNNL_ARG_TO, *_weightsGradient.effectiveMemory}});
     189             :             } else {
     190           0 :                 _backwardStream.arguments.back().insert(
     191           0 :                     {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.effectiveMemory});
     192             :             }
     193           0 :         }
     194             : 
     195             :         template <typename data_t>
     196           0 :         void DnnlConvolution<data_t>::compileBackwardStream()
     197             :         {
     198           0 :             BaseType::compileBackwardStream();
     199           0 :             compileBackwardWeightsStream();
     200           0 :             compileBackwardDataStream();
     201             : 
     202           0 :             _inputGradient.front().effectiveMemory = _inputGradient.front().describedMemory;
     203             :             if (1) {
     204           0 :                 _inputGradient.front().wasReordered = true;
     205           0 :                 _inputGradient.front().effectiveMemory = std::make_shared<dnnl::memory>(
     206           0 :                     dnnl::memory::desc({{_inputGradient.front().dimensions},
     207             :                                         _typeTag,
     208           0 :                                         _inputGradient.front().formatTag}),
     209           0 :                     *_engine);
     210           0 :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
     211             :                                            dnnl::reorder(*_inputGradient.front().describedMemory,
     212             :                                                          *_inputGradient.front().effectiveMemory));
     213           0 :                 _forwardStream.arguments.push_back(
     214           0 :                     {{DNNL_ARG_FROM, *_inputGradient.front().describedMemory},
     215           0 :                      {DNNL_ARG_TO, *_inputGradient.front().effectiveMemory}});
     216             :             }
     217           0 :         }
     218             : 
     219             :         template class DnnlConvolution<float>;
     220             : 
     221             :         template <typename data_t>
     222             :         DnnlDeconvolution<data_t>::DnnlDeconvolution(const VolumeDescriptor& inputDescriptor,
     223             :                                                      const VolumeDescriptor& outputDescriptor,
     224             :                                                      const VolumeDescriptor& weightsDescriptor,
     225             :                                                      const IndexVector_t& strides,
     226             :                                                      const IndexVector_t& paddingLow,
     227             :                                                      const IndexVector_t& paddingHigh,
     228             :                                                      Initializer initializer)
     229             :             : DnnlTrainableLayer<data_t>(inputDescriptor, outputDescriptor, weightsDescriptor,
     230             :                                          initializer)
     231             :         {
     232             :             for (const auto& dim : strides)
     233             :                 _stridesDimensions.push_back(dim);
     234             : 
     235             :             for (const auto& dim : paddingLow)
     236             :                 _paddingLowDimensions.push_back(dim);
     237             : 
     238             :             for (const auto& dim : paddingHigh)
     239             :                 _paddingHighDimensions.push_back(dim);
     240             :         }
     241             : 
     242             :         template <typename data_t>
     243             :         void DnnlDeconvolution<data_t>::compileForwardStream()
     244             :         {
     245             :             BaseType::compileForwardStream();
     246             : 
     247             :             // TODO(todo): Add support for dilated convolution, we currently assume dilation of 0
     248             :             auto desc = dnnl::deconvolution_forward::desc(
     249             :                 /* Propagation kind */ dnnl::prop_kind::forward_training,
     250             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
     251             :                 /* Input descriptor */ _input.descriptor,
     252             :                 /* Weights descriptor */ _weights.descriptor,
     253             :                 /* Bias descriptor*/ _bias.descriptor,
     254             :                 /* Output descriptor */ _output.descriptor,
     255             :                 /* Strides for spatial dims */ _stridesDimensions,
     256             :                 /* Dilation for spatial dims */ {0, 0},
     257             :                 /* Lower padding for spatial dims */ _paddingLowDimensions,
     258             :                 /* Higher padding for spatial dims */ _paddingHighDimensions);
     259             : 
     260             :             _forwardPrimitiveDescriptor =
     261             :                 dnnl::deconvolution_forward::primitive_desc(desc, *_engine);
     262             : 
     263             :             // Do we need to reorder?
     264             :             this->reorderMemory(_forwardPrimitiveDescriptor.src_desc(), _input.front(),
     265             :                                 _forwardStream);
     266             :             this->reorderMemory(_forwardPrimitiveDescriptor.weights_desc(), _weights,
     267             :                                 _forwardStream);
     268             : 
     269             :             _output.describedMemory =
     270             :                 std::make_shared<dnnl::memory>(_forwardPrimitiveDescriptor.dst_desc(), *_engine);
     271             : 
     272             :             ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
     273             :                                        dnnl::deconvolution_forward(_forwardPrimitiveDescriptor));
     274             : 
     275             :             BaseType::validateDnnlMemory(_input.front().effectiveMemory, _weights.effectiveMemory,
     276             :                                          _bias.effectiveMemory, _output.describedMemory);
     277             : 
     278             :             _forwardStream.arguments.push_back({{DNNL_ARG_SRC, *_input.front().effectiveMemory},
     279             :                                                 {DNNL_ARG_WEIGHTS, *_weights.effectiveMemory},
     280             :                                                 {DNNL_ARG_BIAS, *_bias.effectiveMemory},
     281             :                                                 {DNNL_ARG_DST, *_output.describedMemory}});
     282             : 
     283             :             // If either the input or weights have been reordered there could potential reordering
     284             :             // for output
     285             :             _output.effectiveMemory = _output.describedMemory;
     286             :             if (_input.front().wasReordered || _weights.wasReordered) {
     287             :                 _output.wasReordered = true;
     288             :                 _output.effectiveMemory = std::make_shared<dnnl::memory>(
     289             :                     dnnl::memory::desc({{_output.dimensions}, _typeTag, _output.formatTag}),
     290             :                     *_engine);
     291             :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream, dnnl::reorder(*_output.describedMemory,
     292             :                                                                          *_output.effectiveMemory));
     293             :                 _forwardStream.arguments.push_back({{DNNL_ARG_FROM, *_output.describedMemory},
     294             :                                                     {DNNL_ARG_TO, *_output.effectiveMemory}});
     295             :             }
     296             :         }
     297             : 
     298             :         template <typename data_t>
     299             :         void DnnlDeconvolution<data_t>::compileBackwardStream()
     300             :         {
     301             :             BaseType::compileBackwardStream();
     302             :             compileBackwardWeightsStream();
     303             :             compileBackwardDataStream();
     304             : 
     305             :             _inputGradient.front().effectiveMemory = _inputGradient.front().describedMemory;
     306             :             if (1) {
     307             :                 _inputGradient.front().wasReordered = true;
     308             :                 _inputGradient.front().effectiveMemory = std::make_shared<dnnl::memory>(
     309             :                     dnnl::memory::desc({{_inputGradient.front().dimensions},
     310             :                                         _typeTag,
     311             :                                         _inputGradient.front().formatTag}),
     312             :                     *_engine);
     313             :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
     314             :                                            dnnl::reorder(*_inputGradient.front().describedMemory,
     315             :                                                          *_inputGradient.front().effectiveMemory));
     316             :                 _forwardStream.arguments.push_back(
     317             :                     {{DNNL_ARG_FROM, *_inputGradient.front().describedMemory},
     318             :                      {DNNL_ARG_TO, *_inputGradient.front().effectiveMemory}});
     319             :             }
     320             :         }
     321             : 
     322             :         template <typename data_t>
     323             :         void DnnlDeconvolution<data_t>::compileBackwardDataStream()
     324             :         {
     325             :             auto desc = dnnl::deconvolution_backward_data::desc(
     326             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
     327             :                 /* Input Gradient descriptor */ _inputGradient.front().descriptor,
     328             :                 /* Weights descriptor */ _weights.descriptor,
     329             :                 /* Output gradient descriptor */ _outputGradient.front().descriptor,
     330             :                 /* Strides */ _stridesDimensions,
     331             :                 /* Padding */ _paddingLowDimensions, _paddingHighDimensions);
     332             : 
     333             :             _backwardDataPrimitiveDescriptor = dnnl::deconvolution_backward_data::primitive_desc(
     334             :                 desc, *_engine, _forwardPrimitiveDescriptor);
     335             : 
     336             :             // Reorder output gradient of necessary
     337             :             this->reorderMemory(_backwardDataPrimitiveDescriptor.diff_dst_desc(),
     338             :                                 _outputGradient.front(), _backwardStream);
     339             : 
     340             :             // Reorder weights if necessary
     341             :             this->reorderMemory(_backwardDataPrimitiveDescriptor.weights_desc(), _weights,
     342             :                                 _backwardStream);
     343             : 
     344             :             // Set input gradient memory
     345             :             _inputGradient.front().describedMemory = std::make_shared<dnnl::memory>(
     346             :                 _backwardDataPrimitiveDescriptor.diff_src_desc(), *_engine);
     347             : 
     348             :             // Push backward data primitive
     349             :             ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream, dnnl::deconvolution_backward_data(
     350             :                                                             _backwardDataPrimitiveDescriptor));
     351             : 
     352             :             BaseType::validateDnnlMemory(_inputGradient.front().describedMemory,
     353             :                                          _weights.effectiveMemory,
     354             :                                          _outputGradient.front().effectiveMemory);
     355             : 
     356             :             _backwardStream.arguments.push_back(
     357             :                 {/*  Input gradient */ {DNNL_ARG_DIFF_SRC, *_inputGradient.front().describedMemory},
     358             :                  /*  Weights */ {DNNL_ARG_WEIGHTS, *_weights.effectiveMemory},
     359             :                  /*  Output gradient */
     360             :                  {DNNL_ARG_DIFF_DST, *_outputGradient.front().effectiveMemory}});
     361             :         }
     362             : 
     363             :         template <typename data_t>
     364             :         void DnnlDeconvolution<data_t>::compileBackwardWeightsStream()
     365             :         {
     366             :             // Backward descriptor for weights backprop
     367             :             auto desc = dnnl::deconvolution_backward_weights::desc(
     368             :                 /* Convolution algorithm */ dnnl::algorithm::convolution_auto,
     369             :                 /* Input gradient descriptor */ _input.front().descriptor,
     370             :                 /* Weights gradient descriptor */ _weightsGradient.descriptor,
     371             :                 /* Bias gradient descriptor */ _biasGradient.descriptor,
     372             :                 /* Output gradient descriptor */ _outputGradient.front().descriptor,
     373             :                 /* Strides */ _stridesDimensions,
     374             :                 /* Padding */ _paddingLowDimensions, _paddingHighDimensions);
     375             : 
     376             :             _backwardWeightsPrimitiveDescriptor =
     377             :                 dnnl::deconvolution_backward_weights::primitive_desc(desc, *_engine,
     378             :                                                                      _forwardPrimitiveDescriptor);
     379             : 
     380             :             // Do we need reorder for gradient src memory?
     381             :             this->reorderMemory(_backwardWeightsPrimitiveDescriptor.src_desc(), _input.front(),
     382             :                                 _backwardStream);
     383             : 
     384             :             // Do we need to reorder gradient destination memory?
     385             :             this->reorderMemory(_backwardWeightsPrimitiveDescriptor.diff_dst_desc(),
     386             :                                 _outputGradient.front(), _backwardStream);
     387             : 
     388             :             BaseType::validateDnnlMemory(
     389             :                 _input.front().effectiveMemory, _biasGradient.effectiveMemory,
     390             :                 _outputGradient.front().effectiveMemory, _weightsGradient.describedMemory);
     391             : 
     392             :             ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream, dnnl::deconvolution_backward_weights(
     393             :                                                             _backwardWeightsPrimitiveDescriptor));
     394             : 
     395             :             _backwardStream.arguments.push_back(
     396             :                 {/* Input */
     397             :                  {DNNL_ARG_SRC, *_input.front().effectiveMemory},
     398             :                  {DNNL_ARG_DIFF_DST, *_outputGradient.front().effectiveMemory},
     399             :                  /* Output */
     400             :                  {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.describedMemory},
     401             :                  {DNNL_ARG_DIFF_BIAS, *_biasGradient.effectiveMemory}});
     402             : 
     403             :             _weightsGradient.effectiveMemory = _weightsGradient.describedMemory;
     404             : 
     405             :             _weightsGradient.wasReordered = true;
     406             :             _weightsGradient.effectiveMemory = std::make_shared<dnnl::memory>(
     407             :                 dnnl::memory::desc(
     408             :                     {{_weightsGradient.dimensions}, _typeTag, _weightsGradient.formatTag}),
     409             :                 *_engine);
     410             :             ELSA_ML_ADD_DNNL_PRIMITIVE(_forwardStream,
     411             :                                        dnnl::reorder(*_weightsGradient.describedMemory,
     412             :                                                      *_weightsGradient.effectiveMemory));
     413             :             _forwardStream.arguments.push_back({{DNNL_ARG_FROM, *_weightsGradient.describedMemory},
     414             :                                                 {DNNL_ARG_TO, *_weightsGradient.effectiveMemory}});
     415             : 
     416             :             if (_weightsGradient.describedMemory->get_desc()
     417             :                 != _backwardWeightsPrimitiveDescriptor.diff_weights_desc()) {
     418             :                 _weightsGradient.wasReordered = true;
     419             :                 _weightsGradient.describedMemory = std::make_shared<dnnl::memory>(
     420             :                     _backwardWeightsPrimitiveDescriptor.diff_weights_desc(), *_engine);
     421             :                 _backwardStream.arguments.back().insert(
     422             :                     {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.describedMemory});
     423             :                 ELSA_ML_ADD_DNNL_PRIMITIVE(_backwardStream,
     424             :                                            dnnl::reorder(*_weightsGradient.describedMemory,
     425             :                                                          *_weightsGradient.effectiveMemory));
     426             :                 _backwardStream.arguments.push_back(
     427             :                     {{DNNL_ARG_FROM, *_weightsGradient.describedMemory},
     428             :                      {DNNL_ARG_TO, *_weightsGradient.effectiveMemory}});
     429             :             } else {
     430             :                 _backwardStream.arguments.back().insert(
     431             :                     {DNNL_ARG_DIFF_WEIGHTS, *_weightsGradient.effectiveMemory});
     432             :             }
     433             :         }
     434             :     } // namespace detail
     435             : } // namespace elsa::ml

Generated by: LCOV version 1.15