diff --git a/cuda/include/parallelproj_cuda.h b/cuda/include/parallelproj_cuda.h index d1e2384b..5c6d2383 100644 --- a/cuda/include/parallelproj_cuda.h +++ b/cuda/include/parallelproj_cuda.h @@ -5,350 +5,337 @@ #ifndef __PARALLELPROJ_CUDA_H__ #define __PARALLELPROJ_CUDA_H__ -#ifdef __cplusplus -extern "C" { +#ifdef __cplusplus +extern "C" +{ #endif -/** @brief 3D non-tof joseph back projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * used to store the back projections. - * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. - * The backprojector adds existing values. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors containg the values to be back projected - * @param nlors number of projections (length of p array) - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param threadsperblock number of threads per block - */ -void joseph3d_back_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - const float *h_p, - long long nlors, - const int *h_img_dim, - int threadsperblock); - - - -/** @brief 3D listmode tof joseph back projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * used to store the back projections. - * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. - * The backprojector adds existing values. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors containg the values to be back projected - * @param nlors number of projections (length of p array) - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) - * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) - * with the TOF resolution (sigma) for each LOR in - * spatial units (units of xstart and xend) - * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) - * with the offset of the central TOF bin from the - * midpoint of each LOR in spatial units (units of xstart and xend). - * A positive value means a shift towards the end point of the LOR. - * @param n_sigmas number of sigmas to consider for calculation of TOF kernel - * @param h_tof_bin array containing the TOF bin of each event - * @param lor_dependent_sigma_tof unsigned char 0 or 1 - * 1 means that the TOF sigmas are LOR dependent - * any other value means that the first value in the sigma_tof - * array is used for all LORs - * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 - * 1 means that the TOF center offsets are LOR dependent - * any other value means that the first value in the - * tofcenter_offset array is used for all LORs - * @param threadsperblock number of threads per block - */ -void joseph3d_back_tof_lm_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - const float *h_p, - long long nlors, - const int *h_img_dim, - float tofbin_width, - const float *h_sigma_tof, - const float *h_tofcenter_offset, - float n_sigmas, - const short *h_tof_bin, - unsigned char lor_dependent_sigma_tof, - unsigned char lor_dependent_tofcenter_offset, - int threadsperblock); - - - -/** @brief 3D sinogram tof joseph back projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * used to store the back projections. - * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. - * The backprojector adds existing values. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors*n_tofbins with the values to be back projected - * the order of the array is - * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1), - * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1), - * ... - * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)] - * @param nlors number of gemeometrical projections - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) - * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) - * with the TOF resolution (sigma) for each LOR in - * spatial units (units of xstart and xend) - * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) - * with the offset of the central TOF bin from the - * midpoint of each LOR in spatial units (units of xstart and xend). - * A positive value means a shift towards the end point of the LOR. - * @param n_sigmas number of sigmas to consider for calculation of TOF kernel - * @param n_tofbins number of TOF bins - * @param lor_dependent_sigma_tof unsigned char 0 or 1 - * 1 means that the TOF sigmas are LOR dependent - * any other value means that the first value in the sigma_tof - * array is used for all LORs - * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 - * 1 means that the TOF center offsets are LOR dependent - * any other value means that the first value in the - * tofcenter_offset array is used for all LORs - * @param threadsperblock number of threads per block - */ -void joseph3d_back_tof_sino_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - const float *h_p, - long long nlors, - const int *h_img_dim, - float tofbin_width, - const float *h_sigma_tof, - const float *h_tofcenter_offset, - float n_sigmas, - short n_tofbins, - unsigned char lor_dependent_sigma_tof, - unsigned char lor_dependent_tofcenter_offset, - int threadsperblock); - - - -/** @brief 3D non-tof joseph forward projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * be projected. - * The pixel [i,j,k] ist stored at [n1*n2*i + n2*j + k]. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors (output) used to store the projections - * @param nlors number of projections (length of p array) - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param threadsperblock number of threads per block - */ -void joseph3d_fwd_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - float *h_p, - long long nlors, - const int *h_img_dim, - int threadsperblock); - - - -/** @brief 3D listmode tof joseph forward projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * used to store the back projections. - * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors (output) used to store the projections - * @param nlors number of projections (length of p array) - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) - * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) - * with the TOF resolution (sigma) for each LOR in - * spatial units (units of xstart and xend) - * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) - * with the offset of the central TOF bin from the - * midpoint of each LOR in spatial units (units of xstart and xend). - * A positive value means a shift towards the end point of the LOR. - * @param n_sigmas number of sigmas to consider for calculation of TOF kernel - * @param h_tof_bin array of length nlors with the tofbin of every event - * @param lor_dependent_sigma_tof unsigned char 0 or 1 - * 1 means that the TOF sigmas are LOR dependent - * any other value means that the first value in the sigma_tof - * array is used for all LORs - * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 - * 1 means that the TOF center offsets are LOR dependent - * any other value means that the first value in the - * tofcenter_offset array is used for all LORs - * @param threadsperblock number of threads per block - */ -void joseph3d_fwd_tof_lm_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - float *h_p, - long long nlors, - const int *h_img_dim, - float tofbin_width, - const float *h_sigma_tof, - const float *h_tofcenter_offset, - float n_sigmas, - const short *h_tof_bin, - unsigned char lor_dependent_sigma_tof, - unsigned char lor_dependent_tofcenter_offset, - int threadsperblock); - - - -/** @brief 3D sinogram tof joseph forward projector CUDA wrapper - * - * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. - * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. - * Units are the ones of voxsize. - * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to - * used to store the back projections. - * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. - * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel - * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes - * @param h_p array of length nlors*n_tofbins (output) used to store the projections - * the order of the array is - * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1), - * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1), - * ... - * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)] - * @param nlors number of geometrical LORs - * @param h_img_dim array with dimensions of image [n0,n1,n2] - * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) - * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) - * with the TOF resolution (sigma) for each LOR in - * spatial units (units of xstart and xend) - * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) - * with the offset of the central TOF bin from the - * midpoint of each LOR in spatial units (units of xstart and xend). - * A positive value means a shift towards the end point of the LOR. - * @param n_sigmas number of sigmas to consider for calculation of TOF kernel - * @param n_tofbins number of TOF bins - * @param lor_dependent_sigma_tof unsigned char 0 or 1 - * 1 means that the TOF sigmas are LOR dependent - * any other value means that the first value in the sigma_tof - * array is used for all LORs - * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 - * 1 means that the TOF center offsets are LOR dependent - * any other value means that the first value in the - * tofcenter_offset array is used for all LORs - * @param threadsperblock number of threads per block - */ -void joseph3d_fwd_tof_sino_cuda(const float *h_xstart, - const float *h_xend, - float **d_img, - const float *h_img_origin, - const float *h_voxsize, - float *h_p, - long long nlors, - const int *h_img_dim, - float tofbin_width, - const float *h_sigma_tof, - const float *h_tofcenter_offset, - float n_sigmas, - short n_tofbins, - unsigned char lor_dependent_sigma_tof, - unsigned char lor_dependent_tofcenter_offset, - int threadsperblock); - - - -/** @brief copy a float array to all visible cuda devices - * - * The number of visible cuda devices is determined automatically via the CUDA API - * - * @param h_array array of shape [n] on the host - * @param n number of array elements - * @return a pointer to all devices arrays - */ -float** copy_float_array_to_all_devices(const float *h_array, long long n); - - - -/** @brief free device array on all visible cuda devices - * - * The number of visible cuda devices is determined automatically via the CUDA API - * - * @param d_array a pointer to all devices arrays - */ -void free_float_array_on_all_devices(float **d_array); - - - -/** @brief sum multiple versions of an array on different devices on first device - * - * The number of visible cuda devices is determined automatically via the CUDA API - * This becomes usefule when multiple devices backproject into separate images. - * - * @param d_array a pointer to all devices arrays - * @param n number of array elements - */ -void sum_float_arrays_on_first_device(float **d_array, long long n); - - - -/** @brief copy a (summed) float array from first device back to host - * - * The number of visible cuda devices is determined automatically via the CUDA API - * - * @param d_array a pointer to all devices arrays of shape [n] - * @param n number of array elements - * @param i_dev device number - * @param h_array array of shape [n] on the host used for output - */ -void get_float_array_from_device(float **d_array, long long n, int i_dev, float *h_array); + /** @brief 3D non-tof joseph back projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * used to store the back projections. + * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. + * The backprojector adds existing values. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors containg the values to be back projected + * @param nlors number of projections (length of p array) + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param threadsperblock number of threads per block + */ + void joseph3d_back_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + const float *h_p, + long long nlors, + const int *h_img_dim, + int threadsperblock); + + /** @brief 3D listmode tof joseph back projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * used to store the back projections. + * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. + * The backprojector adds existing values. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors containg the values to be back projected + * @param nlors number of projections (length of p array) + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) + * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) + * with the TOF resolution (sigma) for each LOR in + * spatial units (units of xstart and xend) + * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) + * with the offset of the central TOF bin from the + * midpoint of each LOR in spatial units (units of xstart and xend). + * A positive value means a shift towards the end point of the LOR. + * @param n_sigmas number of sigmas to consider for calculation of TOF kernel + * @param h_tof_bin array containing the TOF bin of each event + * @param lor_dependent_sigma_tof unsigned char 0 or 1 + * 1 means that the TOF sigmas are LOR dependent + * any other value means that the first value in the sigma_tof + * array is used for all LORs + * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 + * 1 means that the TOF center offsets are LOR dependent + * any other value means that the first value in the + * tofcenter_offset array is used for all LORs + * @param threadsperblock number of threads per block + */ + void joseph3d_back_tof_lm_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + const float *h_p, + long long nlors, + const int *h_img_dim, + float tofbin_width, + const float *h_sigma_tof, + const float *h_tofcenter_offset, + float n_sigmas, + const short *h_tof_bin, + unsigned char lor_dependent_sigma_tof, + unsigned char lor_dependent_tofcenter_offset, + int threadsperblock); + + /** @brief 3D sinogram tof joseph back projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * used to store the back projections. + * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. + * The backprojector adds existing values. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors*n_tofbins with the values to be back projected + * the order of the array is + * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1), + * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1), + * ... + * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)] + * @param nlors number of gemeometrical projections + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) + * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) + * with the TOF resolution (sigma) for each LOR in + * spatial units (units of xstart and xend) + * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) + * with the offset of the central TOF bin from the + * midpoint of each LOR in spatial units (units of xstart and xend). + * A positive value means a shift towards the end point of the LOR. + * @param n_sigmas number of sigmas to consider for calculation of TOF kernel + * @param n_tofbins number of TOF bins + * @param lor_dependent_sigma_tof unsigned char 0 or 1 + * 1 means that the TOF sigmas are LOR dependent + * any other value means that the first value in the sigma_tof + * array is used for all LORs + * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 + * 1 means that the TOF center offsets are LOR dependent + * any other value means that the first value in the + * tofcenter_offset array is used for all LORs + * @param threadsperblock number of threads per block + */ + void joseph3d_back_tof_sino_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + const float *h_p, + long long nlors, + const int *h_img_dim, + float tofbin_width, + const float *h_sigma_tof, + const float *h_tofcenter_offset, + float n_sigmas, + short n_tofbins, + unsigned char lor_dependent_sigma_tof, + unsigned char lor_dependent_tofcenter_offset, + int threadsperblock); + + /** @brief 3D non-tof joseph forward projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * be projected. + * The pixel [i,j,k] ist stored at [n1*n2*i + n2*j + k]. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors (output) used to store the projections + * @param nlors number of projections (length of p array) + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param threadsperblock number of threads per block + */ + void joseph3d_fwd_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + float *h_p, + long long nlors, + const int *h_img_dim, + int threadsperblock); + + /** @brief 3D listmode tof joseph forward projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * used to store the back projections. + * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors (output) used to store the projections + * @param nlors number of projections (length of p array) + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) + * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) + * with the TOF resolution (sigma) for each LOR in + * spatial units (units of xstart and xend) + * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) + * with the offset of the central TOF bin from the + * midpoint of each LOR in spatial units (units of xstart and xend). + * A positive value means a shift towards the end point of the LOR. + * @param n_sigmas number of sigmas to consider for calculation of TOF kernel + * @param h_tof_bin array of length nlors with the tofbin of every event + * @param lor_dependent_sigma_tof unsigned char 0 or 1 + * 1 means that the TOF sigmas are LOR dependent + * any other value means that the first value in the sigma_tof + * array is used for all LORs + * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 + * 1 means that the TOF center offsets are LOR dependent + * any other value means that the first value in the + * tofcenter_offset array is used for all LORs + * @param threadsperblock number of threads per block + */ + void joseph3d_fwd_tof_lm_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + float *h_p, + long long nlors, + const int *h_img_dim, + float tofbin_width, + const float *h_sigma_tof, + const float *h_tofcenter_offset, + float n_sigmas, + const short *h_tof_bin, + unsigned char lor_dependent_sigma_tof, + unsigned char lor_dependent_tofcenter_offset, + int threadsperblock); + + /** @brief 3D sinogram tof joseph forward projector CUDA wrapper + * + * @param h_xstart array of shape [3*nlors] with the coordinates of the start points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param h_xend array of shape [3*nlors] with the coordinates of the end points of the LORs. + * The start coordinates of the n-th LOR are at xstart[n*3 + i] with i = 0,1,2. + * Units are the ones of voxsize. + * @param d_img Pointer to device arrays of shape [n0*n1*n2] containing the 3D image to + * used to store the back projections. + * The pixel [i,j,k] is stored at [n1*n2*i + n2*j + k]. + * @param h_img_origin array [x0_0,x0_1,x0_2] of coordinates of the center of the [0,0,0] voxel + * @param h_voxsize array [vs0, vs1, vs2] of the voxel sizes + * @param h_p array of length nlors*n_tofbins (output) used to store the projections + * the order of the array is + * [LOR0-TOFBIN-0, LOR0-TOFBIN-1, ... LOR0_TOFBIN-(n-1), + * LOR1-TOFBIN-0, LOR1-TOFBIN-1, ... LOR1_TOFBIN-(n-1), + * ... + * LOR(N-1)-TOFBIN-0, LOR(N-1)-TOFBIN-1, ... LOR(N-1)_TOFBIN-(n-1)] + * @param nlors number of geometrical LORs + * @param h_img_dim array with dimensions of image [n0,n1,n2] + * @param tofbin_width width of the TOF bins in spatial units (units of xstart and xend) + * @param h_sigma_tof array of length 1 or nlors (depending on lor_dependent_sigma_tof) + * with the TOF resolution (sigma) for each LOR in + * spatial units (units of xstart and xend) + * @param h_tofcenter_offset array of length 1 or nlors (depending on lor_dependent_tofcenter_offset) + * with the offset of the central TOF bin from the + * midpoint of each LOR in spatial units (units of xstart and xend). + * A positive value means a shift towards the end point of the LOR. + * @param n_sigmas number of sigmas to consider for calculation of TOF kernel + * @param n_tofbins number of TOF bins + * @param lor_dependent_sigma_tof unsigned char 0 or 1 + * 1 means that the TOF sigmas are LOR dependent + * any other value means that the first value in the sigma_tof + * array is used for all LORs + * @param lor_dependent_tofcenter_offset unsigned char 0 or 1 + * 1 means that the TOF center offsets are LOR dependent + * any other value means that the first value in the + * tofcenter_offset array is used for all LORs + * @param threadsperblock number of threads per block + */ + void joseph3d_fwd_tof_sino_cuda(const float *h_xstart, + const float *h_xend, + float **d_img, + const float *h_img_origin, + const float *h_voxsize, + float *h_p, + long long nlors, + const int *h_img_dim, + float tofbin_width, + const float *h_sigma_tof, + const float *h_tofcenter_offset, + float n_sigmas, + short n_tofbins, + unsigned char lor_dependent_sigma_tof, + unsigned char lor_dependent_tofcenter_offset, + int threadsperblock); + + /** @brief copy a float array to all visible cuda devices + * + * The number of visible cuda devices is determined automatically via the CUDA API + * + * @param h_array array of shape [n] on the host + * @param n number of array elements + * @return a pointer to all devices arrays + */ + float **copy_float_array_to_all_devices(const float *h_array, long long n); + + /** @brief free device array on all visible cuda devices + * + * The number of visible cuda devices is determined automatically via the CUDA API + * + * @param d_array a pointer to all devices arrays + */ + void free_float_array_on_all_devices(float **d_array); + + /** @brief sum multiple versions of an array on different devices on first device + * + * The number of visible cuda devices is determined automatically via the CUDA API + * This becomes usefule when multiple devices backproject into separate images. + * + * @param d_array a pointer to all devices arrays + * @param n number of array elements + */ + void sum_float_arrays_on_first_device(float **d_array, long long n); + + /** @brief copy a (summed) float array from first device back to host + * + * The number of visible cuda devices is determined automatically via the CUDA API + * + * @param d_array a pointer to all devices arrays of shape [n] + * @param n number of array elements + * @param i_dev device number + * @param h_array array of shape [n] on the host used for output + */ + void get_float_array_from_device(float **d_array, long long n, int i_dev, float *h_array); + + /** @brief get the number of visible cuda devices + */ + int get_cuda_device_count(); #ifdef __cplusplus -} /* extern "C" */ +} /* extern "C" */ #endif #endif diff --git a/cuda/src/utils_cuda.cu b/cuda/src/utils_cuda.cu index fa7d88e7..420fc1a6 100644 --- a/cuda/src/utils_cuda.cu +++ b/cuda/src/utils_cuda.cu @@ -2,60 +2,60 @@ * @file utils_cuda.cu */ -#include -#include +#include +#include -extern "C" __global__ void add_to_first_kernel(float* a, float* b, unsigned long long n) +extern "C" __global__ void add_to_first_kernel(float *a, float *b, unsigned long long n) { -// add a vector b onto a vector a both of length n + // add a vector b onto a vector a both of length n unsigned long long i = blockDim.x * blockIdx.x + threadIdx.x; - if(i < n) + if (i < n) { a[i] += b[i]; } } ////////////////////////////////////////////////////////////////////////////////////////// -extern "C" __global__ void print_int_device_array(int* a) +extern "C" __global__ void print_int_device_array(int *a) { unsigned long long i = blockDim.x * blockIdx.x + threadIdx.x; printf("%lld %d\n", i, a[i]); } ////////////////////////////////////////////////////////////////////////////////////////// -extern "C" __global__ void print_float_device_array(float* a) +extern "C" __global__ void print_float_device_array(float *a) { unsigned long long i = blockDim.x * blockIdx.x + threadIdx.x; printf("%lld %f\n", i, a[i]); } ////////////////////////////////////////////////////////////////////////////////////////// -extern "C" float** copy_float_array_to_all_devices(const float *h_array, long long n) +extern "C" float **copy_float_array_to_all_devices(const float *h_array, long long n) { - cudaError_t error; + cudaError_t error; // get number of visible devices int num_devices; - cudaGetDeviceCount(&num_devices); + cudaGetDeviceCount(&num_devices); // create pointer to device arrays - float **d_array = new float * [num_devices]; + float **d_array = new float *[num_devices]; - long long array_bytes = n*sizeof(float); + long long array_bytes = n * sizeof(float); - # pragma omp parallel for schedule(static) - for (int i_dev = 0; i_dev < num_devices; i_dev++) +#pragma omp parallel for schedule(static) + for (int i_dev = 0; i_dev < num_devices; i_dev++) { cudaSetDevice(i_dev); error = cudaMalloc(&d_array[i_dev], array_bytes); if (error != cudaSuccess) { - printf("cudaMalloc returned error %s (code %d), line(%d)\n", cudaGetErrorString(error), - error, __LINE__); - exit(EXIT_FAILURE); + printf("cudaMalloc returned error %s (code %d), line(%d)\n", cudaGetErrorString(error), + error, __LINE__); + exit(EXIT_FAILURE); } cudaMemcpyAsync(d_array[i_dev], h_array, array_bytes, cudaMemcpyHostToDevice); } @@ -74,8 +74,8 @@ extern "C" void free_float_array_on_all_devices(float **d_array) int num_devices; cudaGetDeviceCount(&num_devices); - # pragma omp parallel for schedule(static) - for (int i_dev = 0; i_dev < num_devices; i_dev++) +#pragma omp parallel for schedule(static) + for (int i_dev = 0; i_dev < num_devices; i_dev++) { cudaFree(d_array[i_dev]); } @@ -88,7 +88,7 @@ extern "C" void free_float_array_on_all_devices(float **d_array) extern "C" void sum_float_arrays_on_first_device(float **d_array, long long n) { - cudaError_t error; + cudaError_t error; int threadsperblock = 32; dim3 block(threadsperblock); int blockspergrid = (int)ceil((float)n / threadsperblock); @@ -99,21 +99,21 @@ extern "C" void sum_float_arrays_on_first_device(float **d_array, long long n) float *d_array2; - long long array_bytes = n*sizeof(float); + long long array_bytes = n * sizeof(float); - if(num_devices > 1) + if (num_devices > 1) { cudaSetDevice(0); - for (int i_dev = 0; i_dev < num_devices; i_dev++) + for (int i_dev = 0; i_dev < num_devices; i_dev++) { - if(i_dev == 0) + if (i_dev == 0) { // allocate memory for aux array to sum arrays on device 0 error = cudaMalloc(&d_array2, array_bytes); if (error != cudaSuccess) { - printf("cudaMalloc returned error %s (code %d), line(%d)\n", + printf("cudaMalloc returned error %s (code %d), line(%d)\n", cudaGetErrorString(error), error, __LINE__); exit(EXIT_FAILURE); } @@ -125,12 +125,12 @@ extern "C" void sum_float_arrays_on_first_device(float **d_array, long long n) cudaMemcpyPeer(d_array2, 0, d_array[i_dev], i_dev, array_bytes); // call summation kernel to add d_array2 to d_array on device 0 - add_to_first_kernel<<>>(d_array[0], d_array2, n); + add_to_first_kernel<<>>(d_array[0], d_array2, n); } cudaDeviceSynchronize(); } - + cudaFree(d_array2); } } @@ -139,5 +139,17 @@ extern "C" void sum_float_arrays_on_first_device(float **d_array, long long n) extern "C" void get_float_array_from_device(float **d_array, long long n, int i_dev, float *h_array) { cudaSetDevice(i_dev); - cudaMemcpy(h_array, d_array[i_dev], n*sizeof(float), cudaMemcpyDeviceToHost); + cudaMemcpy(h_array, d_array[i_dev], n * sizeof(float), cudaMemcpyDeviceToHost); } + +//////////////////////////////////////////////////////////////////////////////////////////// +extern "C" int get_cuda_device_count() +{ + int num_devices = 0; + cudaError_t err = cudaGetDeviceCount(&num_devices); + + if (err != cudaSuccess) + num_devices = 0; + + return num_devices; +} \ No newline at end of file diff --git a/python/parallelproj/__init__.py b/python/parallelproj/__init__.py index df645737..147403cf 100644 --- a/python/parallelproj/__init__.py +++ b/python/parallelproj/__init__.py @@ -1,4 +1,5 @@ -from .config import cuda_enabled, cupy_enabled, get_array_module, XPArray, XPFloat32Array, XPShortArray +from .backend import cuda_present, cupy_enabled, get_array_module, XPArray, XPFloat32Array, XPShortArray +from .backend import num_visible_cuda_devices from .backend import joseph3d_fwd, joseph3d_back from .backend import joseph3d_fwd_tof_sino, joseph3d_back_tof_sino from .backend import joseph3d_fwd_tof_lm, joseph3d_back_tof_lm \ No newline at end of file diff --git a/python/parallelproj/backend.py b/python/parallelproj/backend.py index cd6b4434..289f29d9 100644 --- a/python/parallelproj/backend.py +++ b/python/parallelproj/backend.py @@ -1,4 +1,6 @@ import os +import importlib +import distutils import math import ctypes @@ -10,8 +12,28 @@ import numpy as np import numpy.ctypeslib as npct +import numpy.typing as npt -from parallelproj.config import cuda_enabled, cupy_enabled, XPShortArray, XPFloat32Array, get_array_module +from typing import Union +from types import ModuleType + +# check if cuda is present +cuda_present = distutils.spawn.find_executable('nvidia-smi') is not None + +# check if cupy is available +cupy_enabled = (importlib.util.find_spec('cupy') is not None) + +# define type for cupy or numpy array +if cupy_enabled: + import cupy as cp + import cupy.typing as cpt + XPArray = Union[npt.NDArray, cpt.NDArray] + XPFloat32Array = Union[npt.NDArray[np.float32], cpt.NDArray[np.float32]] + XPShortArray = Union[npt.NDArray[np.int16], cpt.NDArray[np.int16]] +else: + XPArray = npt.NDArray + XPFloat32Array = npt.NDArray[np.float32] + XPShortArray = npt.NDArray[np.int16] # numpy ctypes lib array definitions ar_1d_single = npct.ndpointer(dtype=ctypes.c_float, ndim=1, flags='C') @@ -137,7 +159,7 @@ #--------------------------------------------------------------------------------------- -if cuda_enabled: +if cuda_present: if 'PARALLELPROJ_CUDA_LIB' in os.environ: lib_parallelproj_cuda_fname = os.environ['PARALLELPROJ_CUDA_LIB'] else: @@ -152,6 +174,14 @@ os.path.basename(lib_parallelproj_cuda_fname), os.path.dirname(lib_parallelproj_cuda_fname)) + # get the number of visible cuda devices + lib_parallelproj_cuda.get_cuda_device_count.restype = np.int32 + num_visible_cuda_devices = lib_parallelproj_cuda.get_cuda_device_count( + ) + + if (num_visible_cuda_devices == 0) and cupy_enabled: + cupy_enabled = False + lib_parallelproj_cuda.joseph3d_fwd_cuda.restype = None lib_parallelproj_cuda.joseph3d_fwd_cuda.argtypes = [ ar_1d_single, # h_xstart @@ -387,7 +417,7 @@ def joseph3d_fwd(xstart: XPFloat32Array, xp.asarray(img_dim))) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -461,7 +491,7 @@ def joseph3d_back(xstart: XPFloat32Array, np.int64(nLORs), xp.asarray(img_dim))) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # back projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -569,7 +599,7 @@ def joseph3d_fwd_tof_sino(xstart: XPFloat32Array, lor_dependent_sigma_tof, lor_dependent_tofcenter_offset)) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # back projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -689,7 +719,7 @@ def joseph3d_back_tof_sino(xstart: XPFloat32Array, lor_dependent_sigma_tof, lor_dependent_tofcenter_offset)) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # back projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -815,7 +845,7 @@ def joseph3d_fwd_tof_lm(xstart: XPFloat32Array, lor_dependent_tofcenter_offset)) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -933,7 +963,7 @@ def joseph3d_back_tof_lm(xstart: XPFloat32Array, lor_dependent_tofcenter_offset)) xp.cuda.Device().synchronize() else: - if cuda_enabled: + if num_visible_cuda_devices > 0: # back projection of numpy array using the cuda parallelproj lib num_voxel = ctypes.c_longlong(img_dim[0] * img_dim[1] * img_dim[2]) @@ -982,7 +1012,28 @@ def joseph3d_back_tof_lm(xstart: XPFloat32Array, else: # back projection of numpy array using the openmp parallelproj lib lib_parallelproj_c.joseph3d_back_tof_lm( - xstart.ravel(), xend.ravel(), back_img.ravel(), img_origin, - voxsize, lst, np.int64(nLORs), img_dim, tofbin_width, sigma_tof, + xstart.ravel(), + xend.ravel(), back_img.ravel(), img_origin, voxsize, lst, + np.int64(nLORs), img_dim, tofbin_width, sigma_tof, tofcenter_offset, nsigmas, tofbin, lor_dependent_sigma_tof, - lor_dependent_tofcenter_offset) \ No newline at end of file + lor_dependent_tofcenter_offset) + + +#----------------------------------------------------------------------------- + + +def get_array_module(array) -> ModuleType: + """return module of a cupy or numpy array + + Parameters + ---------- + array : cupy or numpy array + + Returns + ------- + cupy or numpy module + """ + if cupy_enabled: + return cp.get_array_module(array) + else: + return np \ No newline at end of file diff --git a/python/parallelproj/config.py b/python/parallelproj/config.py deleted file mode 100644 index 285fc403..00000000 --- a/python/parallelproj/config.py +++ /dev/null @@ -1,43 +0,0 @@ -"""package configurations""" -import importlib -import GPUtil -import numpy as np -import numpy.typing as npt - -from typing import Union -from types import ModuleType - -# number of available CUDA devices -num_available_cuda_devices = len(GPUtil.getGPUs()) -cuda_enabled = (num_available_cuda_devices > 0) -# check if cupy is available -cupy_enabled = (importlib.util.find_spec('cupy') is not None) - -# define type for cupy or numpy array -if cuda_enabled: - import cupy as cp - import cupy.typing as cpt - XPArray = Union[npt.NDArray, cpt.NDArray] - XPFloat32Array = Union[npt.NDArray[np.float32], cpt.NDArray[np.float32]] - XPShortArray = Union[npt.NDArray[np.int16], cpt.NDArray[np.int16]] -else: - XPArray = npt.NDArray - XPFloat32Array = npt.NDArray[np.float32] - XPShortArray = npt.NDArray[np.int16] - - -def get_array_module(array) -> ModuleType: - """return module of a cupy or numpy array - - Parameters - ---------- - array : cupy or numpy array - - Returns - ------- - cupy or numpy module - """ - if cupy_enabled: - return cp.get_array_module(array) - else: - return np \ No newline at end of file diff --git a/python/test/test_nontof_joseph.py b/python/test/test_nontof_joseph.py index fac79886..de09d710 100644 --- a/python/test/test_nontof_joseph.py +++ b/python/test/test_nontof_joseph.py @@ -64,7 +64,7 @@ def fwd_test(xp: ModuleType, verbose=True) -> bool: if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print('calculated projection = ', img_fwd) print('expected projection = ', expected_projections) @@ -134,7 +134,7 @@ def adjointness_test(xp: ModuleType, if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print('ip_a = ', ip_a) print('ip_b = ', ip_b) @@ -157,11 +157,6 @@ def test_fwd(self): import cupy as cp self.assertTrue(fwd_test(cp)) - if parallelproj.cuda_enabled: - parallelproj.cuda_enabled = False - self.assertTrue(fwd_test(np)) - parallelproj.cuda_enabled = True - def test_adjoint(self): """test non TOF joseph forward projection using different backends""" self.assertTrue(adjointness_test(np)) @@ -170,11 +165,6 @@ def test_adjoint(self): import cupy as cp self.assertTrue(adjointness_test(cp)) - if parallelproj.cuda_enabled: - parallelproj.cuda_enabled = False - self.assertTrue(adjointness_test(np)) - parallelproj.cuda_enabled = True - #-------------------------------------------------------------------------- diff --git a/python/test/test_toflm_joseph.py b/python/test/test_toflm_joseph.py index be825833..8536785e 100644 --- a/python/test/test_toflm_joseph.py +++ b/python/test/test_toflm_joseph.py @@ -69,7 +69,7 @@ def tof_lm_fwd_test(xp: ModuleType, verbose: bool = True) -> None: if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print( f'sum of TOF profile / expected: {float(img_fwd.sum()):.4E} / {voxsize:.4E}' @@ -147,7 +147,7 @@ def adjointness_test(xp: ModuleType, if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print('ip_a = ', ip_a) print('ip_b = ', ip_b) @@ -170,11 +170,6 @@ def test_adjoint(self): import cupy as cp self.assertTrue(adjointness_test(cp)) - #if parallelproj.cuda_enabled: - # parallelproj.cuda_enabled = False - # self.assertTrue(adjointness_test(np)) - # parallelproj.cuda_enabled = True - def test_forward(self): """test TOF joseph forward projection using different backends""" self.assertTrue(tof_lm_fwd_test(np)) @@ -183,11 +178,6 @@ def test_forward(self): import cupy as cp self.assertTrue(tof_lm_fwd_test(cp)) - #if parallelproj.cuda_enabled: - # parallelproj.cuda_enabled = False - # self.assertTrue(tof_sino_fwd_test(np)) - # parallelproj.cuda_enabled = True - #-------------------------------------------------------------------------- diff --git a/python/test/test_tofsino_joseph.py b/python/test/test_tofsino_joseph.py index f6c9d47e..471d87bd 100644 --- a/python/test/test_tofsino_joseph.py +++ b/python/test/test_tofsino_joseph.py @@ -67,7 +67,7 @@ def tof_sino_fwd_test(xp: ModuleType, verbose: bool = True) -> None: if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print( f'sum of TOF profile / expected: {float(img_fwd.sum()):.4E} / {voxsize:.4E}' @@ -145,7 +145,7 @@ def adjointness_test(xp: ModuleType, if verbose: print( - f'module = {xp.__name__} - cuda_enabled {parallelproj.cuda_enabled}' + f'module = {xp.__name__} - cuda_enabled {parallelproj.num_visible_cuda_devices > 0}' ) print('ip_a = ', ip_a) print('ip_b = ', ip_b) @@ -168,11 +168,6 @@ def test_adjoint(self): import cupy as cp self.assertTrue(adjointness_test(cp)) - #if parallelproj.cuda_enabled: - # parallelproj.cuda_enabled = False - # self.assertTrue(adjointness_test(np)) - # parallelproj.cuda_enabled = True - def test_forward(self): """test TOF joseph forward projection using different backends""" self.assertTrue(tof_sino_fwd_test(np)) @@ -181,11 +176,6 @@ def test_forward(self): import cupy as cp self.assertTrue(tof_sino_fwd_test(cp)) - #if parallelproj.cuda_enabled: - # parallelproj.cuda_enabled = False - # self.assertTrue(tof_sino_fwd_test(np)) - # parallelproj.cuda_enabled = True - #--------------------------------------------------------------------------