diff options
-rw-r--r-- | cuda/3d/cone_fp.cu | 4 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.cu | 7 |
2 files changed, 11 insertions, 0 deletions
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index fede53b..2c3d1f6 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -169,6 +169,8 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -245,6 +247,8 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index cf8336c..e1c82c3 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -175,6 +175,8 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -251,7 +253,10 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float fDistCorr = sc.scale(a1, a2); + const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) @@ -359,6 +364,8 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; + if (detectorU >= dims.iProjU) + return; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; int endDetectorV = startDetectorV + g_detBlockV; if (endDetectorV > dims.iProjV) |