diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-02-05 17:17:17 +0100 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-04-18 11:49:16 +0200 |
commit | 5768229672bffc93b5158da4bef893f40fe76ea1 (patch) | |
tree | 6cd677e2335511b88331bc87367a1c155759126c /cuda/3d | |
parent | 9479de2c72d147adf7e57e84fe181408b9582bcd (diff) | |
download | astra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.gz astra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.bz2 astra-5768229672bffc93b5158da4bef893f40fe76ea1.tar.xz astra-5768229672bffc93b5158da4bef893f40fe76ea1.zip |
Move detector-independent vars out of loop
Diffstat (limited to 'cuda/3d')
-rw-r--r-- | cuda/3d/par3d_fp.cu | 26 |
1 files changed, 12 insertions, 14 deletions
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index c6c4fda..6f9ce40 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -174,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -199,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -253,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; - + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; @@ -288,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -309,12 +307,12 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, f2 += a2; } - fVal *= fDistCorr; fV += fVal; } } + fV *= fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -360,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -385,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -400,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; + fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)); f0 += 1.0f; f1 += a1; f2 += a2; } + fVal *= fDistCorr * fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal; } } |