summaryrefslogtreecommitdiffstats
path: root/cuda/3d/cone_bp.cu
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-10-07 16:36:28 +0200
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-10-07 16:36:28 +0200
commitf6aa2db83dfea89f9d2cfc6fcbd3da141ee77e02 (patch)
tree6de18fc05a9ee0cf033675aadfd8514a97d9e7d1 /cuda/3d/cone_bp.cu
parent741675b458bf23cf437a6c56f95483c5c66af774 (diff)
parent8f2b55a66db9747419e75dae5973281a7536b934 (diff)
downloadastra-f6aa2db83dfea89f9d2cfc6fcbd3da141ee77e02.tar.gz
astra-f6aa2db83dfea89f9d2cfc6fcbd3da141ee77e02.tar.bz2
astra-f6aa2db83dfea89f9d2cfc6fcbd3da141ee77e02.tar.xz
astra-f6aa2db83dfea89f9d2cfc6fcbd3da141ee77e02.zip
Merge branch 'master' into parallel_vec
Diffstat (limited to 'cuda/3d/cone_bp.cu')
-rw-r--r--cuda/3d/cone_bp.cu40
1 files changed, 24 insertions, 16 deletions
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 4a41f6a..6bd9d16 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -77,6 +77,7 @@ bool bindProjDataTexture(const cudaArray* array)
//__launch_bounds__(32*16, 4)
+template<bool FDKWEIGHT>
__global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle,
int angleOffset, const astraCUDA3d::SDimensions3D dims,
float fOutputScale)
@@ -134,7 +135,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
fU = fUNum * fr;
fV = fVNum * fr;
float fVal = tex3D(gT_coneProjTexture, fU, fAngle, fV);
- Z[idx] += fVal; // fr*fr*fVal;
+ if (FDKWEIGHT)
+ Z[idx] += fr*fr*fVal;
+ else
+ Z[idx] += fVal;
fUNum += fCu.z;
fVNum += fCv.z;
@@ -154,7 +158,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
// supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -185,12 +189,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
if (endZ > dims.iVolZ)
endZ = dims.iVolZ;
- float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ const float fSubStep = 1.0f/iRaysPerVoxelDim;
- fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
@@ -216,11 +220,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
const float fCdc = gC_C[12*angle+11];
float fXs = fX;
- for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) {
+ for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {
float fYs = fY;
- for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) {
+ for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {
float fZs = fZ;
- for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
+ for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
@@ -248,10 +252,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
bindProjDataTexture(D_projArray);
+ float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ;
+
for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) {
unsigned int angleCount = g_MaxAngles;
if (th + angleCount > dims.iProjAngles)
@@ -295,10 +301,12 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (dims.iRaysPerVoxelDim == 1)
- dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ if (params.bFDKWeighting)
+ dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ else if (params.iRaysPerVoxelDim == 1)
+ dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
else
- dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -315,14 +323,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
+ bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params);
cudaFreeArray(cuArray);