LCOV - code coverage report
Current view: top level - projectors_cuda - SiddonsMethodCUDA.cpp (source / functions) Hit Total Coverage
Test: test_coverage.info.cleaned Lines: 0 158 0.0 %
Date: 2022-08-04 03:43:28 Functions: 0 22 0.0 %

          Line data    Source code
       1             : #include "SiddonsMethodCUDA.h"
       2             : #include "LogGuard.h"
       3             : #include "Timer.h"
       4             : #include "TypeCasts.hpp"
       5             : 
       6             : #include "Logger.h"
       7             : namespace elsa
       8             : {
       9             :     template <typename data_t>
      10           0 :     SiddonsMethodCUDA<data_t>::SiddonsMethodCUDA(const VolumeDescriptor& domainDescriptor,
      11             :                                                  const DetectorDescriptor& rangeDescriptor)
      12             :         : base_type(domainDescriptor, rangeDescriptor),
      13           0 :           _detectorDescriptor(static_cast<DetectorDescriptor&>(*_rangeDescriptor)),
      14           0 :           _volumeDescriptor(static_cast<VolumeDescriptor&>(*_domainDescriptor))
      15             :     {
      16           0 :         auto dim = static_cast<std::size_t>(_domainDescriptor->getNumberOfDimensions());
      17           0 :         if (dim != static_cast<std::size_t>(_rangeDescriptor->getNumberOfDimensions())) {
      18           0 :             throw LogicError(
      19           0 :                 std::string("SiddonsMethodCUDA: domain and range dimension need to match"));
      20             :         }
      21             : 
      22           0 :         if (dim != 2 && dim != 3) {
      23           0 :             throw LogicError("SiddonsMethodCUDA: only supporting 2d/3d operations");
      24             :         }
      25             : 
      26           0 :         if (_detectorDescriptor.getNumberOfGeometryPoses() == 0) {
      27           0 :             throw LogicError("SiddonsMethodCUDA: geometry list was empty");
      28             :         }
      29             : 
      30           0 :         const index_t numGeometry = _detectorDescriptor.getNumberOfGeometryPoses();
      31             : 
      32             :         // allocate device memory and copy ray origins and the inverse of the significant part of
      33             :         // projection matrices to device
      34           0 :         cudaExtent extent = make_cudaExtent(dim * sizeof(real_t), dim, asUnsigned(numGeometry));
      35             : 
      36           0 :         if (cudaMallocPitch(&_rayOrigins.ptr, &_rayOrigins.pitch, dim * sizeof(real_t),
      37             :                             asUnsigned(numGeometry))
      38           0 :             != cudaSuccess)
      39           0 :             throw std::bad_alloc();
      40           0 :         _rayOrigins.xsize = dim;
      41           0 :         _rayOrigins.ysize = asUnsigned(numGeometry);
      42             : 
      43           0 :         if (cudaMalloc3D(&_projInvMatrices, extent) != cudaSuccess)
      44           0 :             throw std::bad_alloc();
      45             : 
      46           0 :         auto* projPtr = (int8_t*) _projInvMatrices.ptr;
      47           0 :         auto projPitch = _projInvMatrices.pitch;
      48           0 :         auto* rayBasePtr = (int8_t*) _rayOrigins.ptr;
      49           0 :         auto rayPitch = _rayOrigins.pitch;
      50             : 
      51           0 :         for (unsigned i = 0; i < numGeometry; i++) {
      52           0 :             auto geometry = _detectorDescriptor.getGeometryAt(i);
      53             : 
      54           0 :             if (!geometry)
      55           0 :                 throw LogicError("JosephsMethodCUDA: Access not existing geometry pose");
      56             : 
      57           0 :             RealMatrix_t P = geometry->getInverseProjectionMatrix().block(0, 0, dim, dim);
      58           0 :             int8_t* slice = projPtr + i * projPitch * dim;
      59             :             // CUDA also uses a column-major representation, directly transfer matrix
      60             :             // transfer inverse of projection matrix
      61           0 :             if (cudaMemcpy2DAsync(slice, projPitch, P.data(), dim * sizeof(real_t),
      62             :                                   dim * sizeof(real_t), dim, cudaMemcpyDefault)
      63           0 :                 != cudaSuccess)
      64           0 :                 throw LogicError(
      65             :                     "SiddonsMethodCUDA: Could not transfer inverse projection matrices to GPU.");
      66             : 
      67           0 :             int8_t* rayPtr = rayBasePtr + i * rayPitch;
      68             :             // get camera center using direct inverse
      69           0 :             RealVector_t ro =
      70           0 :                 -P * geometry->getProjectionMatrix().block(0, static_cast<index_t>(dim), dim, 1);
      71             :             // transfer ray origin
      72           0 :             if (cudaMemcpyAsync(rayPtr, ro.data(), dim * sizeof(real_t), cudaMemcpyDefault)
      73           0 :                 != cudaSuccess)
      74           0 :                 throw LogicError("SiddonsMethodCUDA: Could not transfer ray origins to GPU.");
      75             :         }
      76           0 :     }
      77             : 
      78             :     template <typename data_t>
      79           0 :     SiddonsMethodCUDA<data_t>::~SiddonsMethodCUDA()
      80             :     {
      81             :         // Free CUDA resources
      82           0 :         if (cudaFree(_rayOrigins.ptr) != cudaSuccess
      83           0 :             || cudaFree(_projInvMatrices.ptr) != cudaSuccess)
      84             :             Logger::get("SiddonsMethodCUDA")
      85           0 :                 ->error("Couldn't free GPU memory; This may cause problems later.");
      86           0 :     }
      87             : 
      88             :     template <typename data_t>
      89           0 :     SiddonsMethodCUDA<data_t>* SiddonsMethodCUDA<data_t>::_cloneImpl() const
      90             :     {
      91           0 :         return new SiddonsMethodCUDA<data_t>(_volumeDescriptor, _detectorDescriptor);
      92             :     }
      93             : 
      94             :     template <typename data_t>
      95           0 :     void SiddonsMethodCUDA<data_t>::forward(const BoundingBox& aabb, const DataContainer<data_t>& x,
      96             :                                             DataContainer<data_t>& Ax) const
      97             :     {
      98           0 :         Timer timeGuard("SiddonsMethodCUDA", "apply");
      99             : 
     100           0 :         traverseVolume<false>(aabb, (void*) &(x[0]), (void*) &(Ax[0]));
     101           0 :     }
     102             : 
     103             :     template <typename data_t>
     104           0 :     void SiddonsMethodCUDA<data_t>::backward(const BoundingBox& aabb,
     105             :                                              const DataContainer<data_t>& y,
     106             :                                              DataContainer<data_t>& Aty) const
     107             :     {
     108           0 :         Timer timeguard("SiddonsMethodCUDA", "applyAdjoint");
     109             : 
     110           0 :         traverseVolume<true>(aabb, (void*) &(Aty[0]), (void*) &(y[0]));
     111           0 :     }
     112             : 
     113             :     template <typename data_t>
     114           0 :     bool SiddonsMethodCUDA<data_t>::_isEqual(const LinearOperator<data_t>& other) const
     115             :     {
     116           0 :         if (!LinearOperator<data_t>::isEqual(other))
     117           0 :             return false;
     118             : 
     119           0 :         auto otherSM = downcast_safe<SiddonsMethodCUDA>(&other);
     120           0 :         if (!otherSM)
     121           0 :             return false;
     122             : 
     123           0 :         return true;
     124             :     }
     125             : 
     126             :     template <typename data_t>
     127             :     template <bool adjoint>
     128           0 :     void SiddonsMethodCUDA<data_t>::traverseVolume(const BoundingBox& aabb, void* volumePtr,
     129             :                                                    void* sinoPtr) const
     130             :     {
     131           0 :         auto domainDims = _domainDescriptor->getNumberOfCoefficientsPerDimension();
     132           0 :         auto domainDimsui = domainDims.template cast<unsigned int>();
     133           0 :         IndexVector_t rangeDims = _rangeDescriptor->getNumberOfCoefficientsPerDimension();
     134           0 :         auto rangeDimsui = rangeDims.template cast<unsigned int>();
     135             :         cudaPitchedPtr dvolumePtr;
     136             :         cudaPitchedPtr dsinoPtr;
     137             : 
     138           0 :         if (_domainDescriptor->getNumberOfDimensions() == 3) {
     139             :             typename TraverseSiddonsCUDA<data_t, 3>::BoundingBox boundingBox;
     140           0 :             boundingBox._max[0] = aabb._max.template cast<uint32_t>()[0];
     141           0 :             boundingBox._max[1] = aabb._max.template cast<uint32_t>()[1];
     142           0 :             boundingBox._max[2] = aabb._max.template cast<uint32_t>()[2];
     143             : 
     144             :             // transfer volume and sinogram
     145             :             cudaExtent volExt =
     146           0 :                 make_cudaExtent(domainDimsui[0] * sizeof(data_t), domainDimsui[1], domainDimsui[2]);
     147           0 :             if (cudaMalloc3D(&dvolumePtr, volExt) != cudaSuccess)
     148           0 :                 throw std::bad_alloc();
     149             : 
     150             :             cudaExtent sinoExt =
     151           0 :                 make_cudaExtent(rangeDimsui[0] * sizeof(data_t), rangeDimsui[1], rangeDimsui[2]);
     152           0 :             if (cudaMalloc3D(&dsinoPtr, sinoExt) != cudaSuccess)
     153           0 :                 throw std::bad_alloc();
     154             : 
     155             :             if (adjoint) {
     156           0 :                 copy3DDataContainerGPU<ContainerCpyKind::cpyContainerToRawGPU>(sinoPtr, dsinoPtr,
     157             :                                                                                sinoExt);
     158           0 :                 if (cudaMemset3DAsync(dvolumePtr, 0, volExt) != cudaSuccess)
     159           0 :                     throw LogicError("SiddonsMethodCUDA::traverseVolume: Could not "
     160             :                                      "zero-initialize volume on GPU.");
     161             :             } else {
     162           0 :                 copy3DDataContainerGPU<ContainerCpyKind::cpyContainerToRawGPU>(volumePtr,
     163             :                                                                                dvolumePtr, volExt);
     164             :             }
     165             : 
     166           0 :             dim3 sinogramDims(rangeDimsui[2], rangeDimsui[1], rangeDimsui[0]);
     167             :             // synchronize because we are using multiple streams
     168           0 :             cudaDeviceSynchronize();
     169             :             if (adjoint) {
     170           0 :                 TraverseSiddonsCUDA<data_t, 3>::traverseAdjoint(
     171           0 :                     sinogramDims, THREADS_PER_BLOCK, (int8_t*) dvolumePtr.ptr, dvolumePtr.pitch,
     172           0 :                     (int8_t*) dsinoPtr.ptr, dsinoPtr.pitch, (int8_t*) _rayOrigins.ptr,
     173           0 :                     static_cast<uint32_t>(_rayOrigins.pitch), (int8_t*) _projInvMatrices.ptr,
     174           0 :                     static_cast<uint32_t>(_projInvMatrices.pitch), boundingBox);
     175             :             } else {
     176           0 :                 TraverseSiddonsCUDA<data_t, 3>::traverseForward(
     177           0 :                     sinogramDims, THREADS_PER_BLOCK, (int8_t*) dvolumePtr.ptr, dvolumePtr.pitch,
     178           0 :                     (int8_t*) dsinoPtr.ptr, dsinoPtr.pitch, (int8_t*) _rayOrigins.ptr,
     179           0 :                     static_cast<uint32_t>(_rayOrigins.pitch), (int8_t*) _projInvMatrices.ptr,
     180           0 :                     static_cast<uint32_t>(_projInvMatrices.pitch), boundingBox);
     181             :             }
     182             :             // synchonize because we are using multiple streams
     183           0 :             cudaDeviceSynchronize();
     184             : 
     185             :             // retrieve results from GPU
     186             :             if (adjoint)
     187           0 :                 copy3DDataContainerGPU<ContainerCpyKind::cpyRawGPUToContainer, false>(
     188             :                     volumePtr, dvolumePtr, volExt);
     189             :             else
     190           0 :                 copy3DDataContainerGPU<ContainerCpyKind::cpyRawGPUToContainer, false>(
     191             :                     sinoPtr, dsinoPtr, sinoExt);
     192             : 
     193             :         } else {
     194             :             typename TraverseSiddonsCUDA<data_t, 2>::BoundingBox boundingBox;
     195           0 :             boundingBox._max[0] = aabb._max.template cast<uint32_t>()[0];
     196           0 :             boundingBox._max[1] = aabb._max.template cast<uint32_t>()[1];
     197             : 
     198             :             // transfer volume and sinogram
     199             : 
     200           0 :             if (cudaMallocPitch(&dvolumePtr.ptr, &dvolumePtr.pitch,
     201           0 :                                 domainDimsui[0] * sizeof(data_t), domainDimsui[1])
     202           0 :                 != cudaSuccess)
     203           0 :                 throw std::bad_alloc();
     204             : 
     205           0 :             if (cudaMallocPitch(&dsinoPtr.ptr, &dsinoPtr.pitch, rangeDimsui[0] * sizeof(data_t),
     206             :                                 rangeDimsui[1])
     207           0 :                 != cudaSuccess)
     208           0 :                 throw std::bad_alloc();
     209             : 
     210             :             if (adjoint) {
     211           0 :                 if (cudaMemcpy2DAsync(
     212           0 :                         dsinoPtr.ptr, dsinoPtr.pitch, sinoPtr, rangeDimsui[0] * sizeof(data_t),
     213           0 :                         rangeDimsui[0] * sizeof(data_t), rangeDimsui[1], cudaMemcpyDefault)
     214           0 :                     != cudaSuccess)
     215           0 :                     throw LogicError(
     216             :                         "SiddonsMethodCUDA::traverseVolume: Couldn't transfer sinogram to GPU.");
     217             : 
     218           0 :                 if (cudaMemset2DAsync(dvolumePtr.ptr, dvolumePtr.pitch, 0,
     219           0 :                                       domainDimsui[0] * sizeof(data_t), domainDimsui[1])
     220           0 :                     != cudaSuccess)
     221           0 :                     throw LogicError("SiddonsMethodCUDA::traverseVolume: Could not "
     222             :                                      "zero-initialize volume on GPU.");
     223             :             } else {
     224           0 :                 if (cudaMemcpy2DAsync(dvolumePtr.ptr, dvolumePtr.pitch, volumePtr,
     225           0 :                                       domainDimsui[0] * sizeof(data_t),
     226           0 :                                       domainDimsui[0] * sizeof(data_t), domainDimsui[1],
     227             :                                       cudaMemcpyDefault)
     228           0 :                     != cudaSuccess)
     229           0 :                     throw LogicError(
     230             :                         "SiddonsMethodCUDA::traverseVolume: Couldn't transfer volume to GPU.");
     231             :             }
     232             : 
     233             :             // perform projection
     234           0 :             dim3 sinogramDims(rangeDimsui[1], 1, rangeDimsui[0]);
     235             :             // synchronize because we are using multiple streams
     236           0 :             cudaDeviceSynchronize();
     237             :             if (adjoint) {
     238           0 :                 TraverseSiddonsCUDA<data_t, 2>::traverseAdjoint(
     239           0 :                     sinogramDims, THREADS_PER_BLOCK, (int8_t*) dvolumePtr.ptr, dvolumePtr.pitch,
     240           0 :                     (int8_t*) dsinoPtr.ptr, dsinoPtr.pitch, (int8_t*) _rayOrigins.ptr,
     241           0 :                     static_cast<uint32_t>(_rayOrigins.pitch), (int8_t*) _projInvMatrices.ptr,
     242           0 :                     static_cast<uint32_t>(_projInvMatrices.pitch), boundingBox);
     243             :             } else {
     244           0 :                 TraverseSiddonsCUDA<data_t, 2>::traverseForward(
     245           0 :                     sinogramDims, THREADS_PER_BLOCK, (int8_t*) dvolumePtr.ptr, dvolumePtr.pitch,
     246           0 :                     (int8_t*) dsinoPtr.ptr, dsinoPtr.pitch, (int8_t*) _rayOrigins.ptr,
     247           0 :                     static_cast<uint32_t>(_rayOrigins.pitch), (int8_t*) _projInvMatrices.ptr,
     248           0 :                     static_cast<uint32_t>(_projInvMatrices.pitch), boundingBox);
     249             :             }
     250             :             // synchonize because we are using multiple streams
     251           0 :             cudaDeviceSynchronize();
     252             : 
     253             :             // retrieve results from GPU
     254             :             if (adjoint) {
     255           0 :                 if (cudaMemcpy2D(volumePtr, domainDimsui[0] * sizeof(data_t), dvolumePtr.ptr,
     256           0 :                                  dvolumePtr.pitch, domainDimsui[0] * sizeof(data_t),
     257           0 :                                  domainDimsui[1], cudaMemcpyDefault)
     258           0 :                     != cudaSuccess)
     259           0 :                     throw LogicError(
     260             :                         "SiddonsMethodCUDA::traverseVolume: Couldn't retrieve results from GPU.");
     261             :             } else {
     262           0 :                 if (cudaMemcpy2D(sinoPtr, rangeDimsui[0] * sizeof(data_t), dsinoPtr.ptr,
     263           0 :                                  dsinoPtr.pitch, rangeDimsui[0] * sizeof(data_t), rangeDimsui[1],
     264             :                                  cudaMemcpyDefault)
     265           0 :                     != cudaSuccess)
     266           0 :                     throw LogicError(
     267             :                         "SiddonsMethodCUDA::traverseVolume: Couldn't retrieve results from GPU");
     268             :             }
     269             :         }
     270             : 
     271             :         // free allocated memory
     272           0 :         if (cudaFree(dvolumePtr.ptr) != cudaSuccess || cudaFree(dsinoPtr.ptr) != cudaSuccess)
     273             :             Logger::get("SiddonsMethodCUDA")
     274           0 :                 ->error("Couldn't free GPU memory; This may cause problems later.");
     275           0 :     }
     276             : 
     277             :     template <typename data_t>
     278             :     template <typename SiddonsMethodCUDA<data_t>::ContainerCpyKind direction, bool async>
     279           0 :     void SiddonsMethodCUDA<data_t>::copy3DDataContainerGPU(void* hostData,
     280             :                                                            const cudaPitchedPtr& gpuData,
     281             :                                                            const cudaExtent& extent) const
     282             :     {
     283           0 :         cudaMemcpy3DParms cpyParams = {};
     284           0 :         cpyParams.extent = extent;
     285           0 :         cpyParams.kind = cudaMemcpyDefault;
     286             : 
     287             :         cudaPitchedPtr tmp =
     288           0 :             make_cudaPitchedPtr(hostData, extent.width, extent.width, extent.height);
     289             : 
     290             :         if (direction == ContainerCpyKind::cpyContainerToRawGPU) {
     291           0 :             cpyParams.dstPtr = gpuData;
     292           0 :             cpyParams.srcPtr = tmp;
     293             :         } else if (direction == ContainerCpyKind::cpyRawGPUToContainer) {
     294           0 :             cpyParams.srcPtr = gpuData;
     295           0 :             cpyParams.dstPtr = tmp;
     296             :         }
     297             : 
     298             :         if (async) {
     299           0 :             if (cudaMemcpy3DAsync(&cpyParams) != cudaSuccess)
     300           0 :                 throw LogicError("Failed copying data between device and host");
     301             :         } else {
     302           0 :             if (cudaMemcpy3D(&cpyParams) != cudaSuccess)
     303           0 :                 throw LogicError("Failed copying data between device acudaMemcpyKindnd host");
     304             :         }
     305           0 :     }
     306             : 
     307             :     // ------------------------------------------
     308             :     // explicit template instantiation
     309             :     template class SiddonsMethodCUDA<float>;
     310             :     template class SiddonsMethodCUDA<double>;
     311             : } // namespace elsa

Generated by: LCOV version 1.14