From 559d3e599b7306e2de64f2a584d72bc5c98b692b Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 4 Feb 2016 13:56:06 +0100 Subject: Refactor CUDA projector params into struct --- cuda/3d/par3d_fp.cu | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'cuda/3d/par3d_fp.cu') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 3ce3d42..fde9c2c 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -218,7 +218,7 @@ template __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) { COORD c; @@ -251,7 +251,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, if (endSlice > c.nSlices(dims)) endSlice = c.nSlices(dims); - const float fSubStep = 1.0f/dims.iRaysPerDetDim; + const float fSubStep = 1.0f/iRaysPerDetDim; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { @@ -259,9 +259,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, float fV = 0.0f; float fdU = detectorU - 0.5f + 0.5f*fSubStep; - for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { + for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { float fdV = detectorV - 0.5f + 0.5f*fSubStep; - for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { /* Trace ray in direction Ray to (detectorU,detectorV) from */ /* X = startSlice to X = endSlice */ @@ -301,7 +301,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, } } - D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); + D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -401,7 +401,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -473,22 +473,22 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } } @@ -514,7 +514,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, bool Par3DFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer volume to array cudaArray* cuArray = allocateVolumeArray(dims); @@ -533,7 +533,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, ret = Par3DFP_Array_internal(D_subprojData, dims, iEndAngle - iAngle, angles + iAngle, - fOutputScale); + params); if (!ret) break; } @@ -548,7 +548,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -620,8 +620,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirX<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -630,8 +630,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirY<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -640,8 +640,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirZ<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); -- cgit v1.2.3 From 9479de2c72d147adf7e57e84fe181408b9582bcd Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 17:14:31 +0100 Subject: Add voxel-size scaling to par3d_fp --- cuda/3d/par3d_fp.cu | 108 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 16 deletions(-) (limited to 'cuda/3d/par3d_fp.cu') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index fde9c2c..c6c4fda 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z { __device__ float z(float f0, float f1, float f2) const { return f0; } }; +struct SCALE_CUBE { + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { + float fScale1; + float fScale2; + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; + // threadIdx: x = u detector @@ -136,11 +148,12 @@ struct DIR_Z { // y = angle block -template +template __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE sc) { COORD c; @@ -191,7 +204,7 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -218,7 +231,8 @@ template __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, + SCALE_NONCUBE sc) { COORD c; @@ -279,7 +293,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -324,7 +338,8 @@ template __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE_NONCUBE sc) { COORD c; @@ -375,7 +390,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -436,6 +451,36 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, unsigned int blockEnd = 0; int blockDirection = 0; + bool cube = true; + if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) + cube = false; + if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) + cube = false; + + SCALE_CUBE scube; + scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + // timeval t; // tic(t); @@ -474,21 +519,30 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ); } } @@ -583,6 +637,28 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, unsigned int blockEnd = 0; int blockDirection = 0; + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + + // timeval t; // tic(t); @@ -621,7 +697,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else #if 0 par3D_FP_SS_SumSqW_dirX<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -631,7 +707,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else #if 0 par3D_FP_SS_SumSqW_dirY<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -641,7 +717,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else #if 0 par3D_FP_SS_SumSqW_dirZ<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); -- cgit v1.2.3 From 5768229672bffc93b5158da4bef893f40fe76ea1 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 17:17:17 +0100 Subject: Move detector-independent vars out of loop --- cuda/3d/par3d_fp.cu | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'cuda/3d/par3d_fp.cu') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index c6c4fda..6f9ce40 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -174,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -199,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -253,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; - + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; @@ -288,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -309,12 +307,12 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, f2 += a2; } - fVal *= fDistCorr; fV += fVal; } } + fV *= fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -360,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -385,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -400,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; + fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)); f0 += 1.0f; f1 += a1; f2 += a2; } + fVal *= fDistCorr * fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal; } } -- cgit v1.2.3