Fix non-padded GPULink memory handling in FP3D kernels

This would fail silently if the output projection data object was not padded to a multiple of 32 pixels, potentially corrupting the start of projection rows. 3D GPU memory allocated by ASTRA itself is always padded by cudaMalloc3D and therefore not affected. GPULink allows bypassing this, possibly triggering this bug.
author: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> 2021-06-02 11:44:01 +0200
committer: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> 2021-11-16 17:30:12 +0100
commit: e99c6a75bada269381b247c555786dda8b390b7a (patch)
tree: ec21ba3db1dc7da5b7eeef5621b9a54e72dbf868
parent: ea9703e63e9d3976e89bc1d81bdd1ec3e76b68b6 (diff)
download: astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.gz
astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.bz2
astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.xz
astra-e99c6a75bada269381b247c555786dda8b390b7a.zip
2 files changed, 11 insertions, 0 deletions
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index fede53b..2c3d1f6 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -169,6 +169,8 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch,
 	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
 
 	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+	if (detectorU >= dims.iProjU)
+		return;
 	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
 	int endDetectorV = startDetectorV + g_detBlockV;
 	if (endDetectorV > dims.iProjV)
@@ -245,6 +247,8 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
 	const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ;
 
 	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+	if (detectorU >= dims.iProjU)
+		return;
 	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
 	int endDetectorV = startDetectorV + g_detBlockV;
 	if (endDetectorV > dims.iProjV)
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index cf8336c..e1c82c3 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -175,6 +175,8 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch,
 
 
 	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+	if (detectorU >= dims.iProjU)
+		return;
 	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
 	int endDetectorV = startDetectorV + g_detBlockV;
 	if (endDetectorV > dims.iProjV)
@@ -251,7 +253,10 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
 	const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ);
 	const float fDistCorr = sc.scale(a1, a2);
 
+
 	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+	if (detectorU >= dims.iProjU)
+		return;
 	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
 	int endDetectorV = startDetectorV + g_detBlockV;
 	if (endDetectorV > dims.iProjV)
@@ -359,6 +364,8 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
 
 
 	const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x;
+	if (detectorU >= dims.iProjU)
+		return;
 	const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV;
 	int endDetectorV = startDetectorV + g_detBlockV;
 	if (endDetectorV > dims.iProjV)
author	Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>	2021-06-02 11:44:01 +0200
committer	Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>	2021-11-16 17:30:12 +0100
commit	e99c6a75bada269381b247c555786dda8b390b7a (patch)
tree	ec21ba3db1dc7da5b7eeef5621b9a54e72dbf868
parent	ea9703e63e9d3976e89bc1d81bdd1ec3e76b68b6 (diff)
download	astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.gz astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.bz2 astra-e99c6a75bada269381b247c555786dda8b390b7a.tar.xz astra-e99c6a75bada269381b247c555786dda8b390b7a.zip