diff options
Diffstat (limited to 'cuda')
-rw-r--r-- | cuda/3d/cone_bp.cu | 23 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.cu | 14 |
2 files changed, 24 insertions, 13 deletions
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 4f01d25..7c3fc8d 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -41,8 +41,7 @@ static texture3D gT_coneProjTexture; namespace astraCUDA3d { -#define ZSIZE 6 -static const unsigned int g_volBlockZ = ZSIZE; +static const unsigned int g_volBlockZ = 6; static const unsigned int g_anglesPerBlock = 32; static const unsigned int g_volBlockX = 16; @@ -77,7 +76,7 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) -template<bool FDKWEIGHT> +template<bool FDKWEIGHT, unsigned int ZSIZE> __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const astraCUDA3d::SDimensions3D dims, float fOutputScale) @@ -342,11 +341,19 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (params.bFDKWeighting) - dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else if (params.iRaysPerVoxelDim == 1) - dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else + if (params.bFDKWeighting) { + if (dims.iVolZ == 1) { + dev_cone_BP<true, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_cone_BP<true, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else if (params.iRaysPerVoxelDim == 1) { + if (dims.iVolZ == 1) { + dev_cone_BP<false, 1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_cone_BP<false, g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 004fc99..d356b9f 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -41,8 +41,7 @@ static texture3D gT_par3DProjTexture; namespace astraCUDA3d { -#define ZSIZE 6 -static const unsigned int g_volBlockZ = ZSIZE; +static const unsigned int g_volBlockZ = 6; static const unsigned int g_anglesPerBlock = 32; static const unsigned int g_volBlockX = 16; @@ -77,6 +76,7 @@ static bool bindProjDataTexture(const cudaArray* array) } +template<unsigned int ZSIZE> __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) { float* volData = (float*)D_volData; @@ -281,9 +281,13 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (params.iRaysPerVoxelDim == 1) - dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); - else + if (params.iRaysPerVoxelDim == 1) { + if (dims.iVolZ == 1) { + dev_par3D_BP<1><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } else { + dev_par3D_BP<g_volBlockZ><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + } + } else dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } |