summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'cuda')
-rw-r--r--cuda/3d/cone_fp.cu4
-rw-r--r--cuda/3d/par3d_fp.cu7
2 files changed, 11 insertions, 0 deletions
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index fede53b..2c3d1f6 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -169,6 +169,8 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch,
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+ if (detectorU >= dims.iProjU)
+ return;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
int endDetectorV = startDetectorV + g_detBlockV;
if (endDetectorV > dims.iProjV)
@@ -245,6 +247,8 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+ if (detectorU >= dims.iProjU)
+ return;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
int endDetectorV = startDetectorV + g_detBlockV;
if (endDetectorV > dims.iProjV)
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index cf8336c..e1c82c3 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -175,6 +175,8 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+ if (detectorU >= dims.iProjU)
+ return;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
int endDetectorV = startDetectorV + g_detBlockV;
if (endDetectorV > dims.iProjV)
@@ -251,7 +253,10 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
const float fDistCorr = sc.scale(a1, a2);
+
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+ if (detectorU >= dims.iProjU)
+ return;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
int endDetectorV = startDetectorV + g_detBlockV;
if (endDetectorV > dims.iProjV)
@@ -359,6 +364,8 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+ if (detectorU >= dims.iProjU)
+ return;
const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
int endDetectorV = startDetectorV + g_detBlockV;
if (endDetectorV > dims.iProjV)