diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2014-05-02 09:20:54 +0000 |
---|---|---|
committer | wpalenst <Willem.Jan.Palenstijn@cwi.nl> | 2014-05-02 09:20:54 +0000 |
commit | 1dd79f23f783564719a52de7d9b54b17005c32d7 (patch) | |
tree | 71e3c59863680e9da29a51359786d24c68787f1a /cuda | |
parent | 9dd746071621cf854171b985afbf375f19a5b726 (diff) | |
download | astra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.gz astra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.bz2 astra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.xz astra-1dd79f23f783564719a52de7d9b54b17005c32d7.zip |
Add SIRT-Weighted BP3D (par3d-only) for use in large BP
Diffstat (limited to 'cuda')
-rw-r--r-- | cuda/3d/arith3d.cu | 6 | ||||
-rw-r--r-- | cuda/3d/arith3d.h | 1 | ||||
-rw-r--r-- | cuda/3d/astra3d.cu | 142 | ||||
-rw-r--r-- | cuda/3d/astra3d.h | 22 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.cu | 18 |
5 files changed, 174 insertions, 15 deletions
diff --git a/cuda/3d/arith3d.cu b/cuda/3d/arith3d.cu index 7cb56f6..cc96da5 100644 --- a/cuda/3d/arith3d.cu +++ b/cuda/3d/arith3d.cu @@ -52,6 +52,11 @@ struct opAddMul { out += in1 * in2; } }; +struct opAdd { + __device__ void operator()(float& out, const float in) { + out += in; + } +}; struct opMul { __device__ void operator()(float& out, const float in) { out *= in; @@ -594,6 +599,7 @@ INST_DDFtoD(opAddMulScaled) INST_DDtoD(opAddMul) INST_DDtoD(opMul2) INST_DtoD(opMul) +INST_DtoD(opAdd) INST_DtoD(opDividedBy) INST_toD(opInvert) INST_FtoD(opSet) diff --git a/cuda/3d/arith3d.h b/cuda/3d/arith3d.h index 53c9b79..5635ffd 100644 --- a/cuda/3d/arith3d.h +++ b/cuda/3d/arith3d.h @@ -37,6 +37,7 @@ struct opAddScaled; struct opScaleAndAdd; struct opAddMulScaled; struct opAddMul; +struct opAdd; struct opMul; struct opMul2; struct opDividedBy; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 4447775..2efac98 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1463,6 +1463,40 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, return ok; } +// This computes the column weights, divides by them, and adds the +// result to the current volume. This is both more expensive and more +// GPU memory intensive than the regular BP, but allows saving system RAM. +bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + float fDetUSize, + float fDetVSize, + const float *pfAngles, + int iGPUIndex, int iVoxelSuperSampling) +{ + if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + return false; + if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + return false; + + SPar3DProjection* p = genPar3DProjections(iProjAngles, + iProjU, iProjV, + fDetUSize, fDetVSize, + pfAngles); + + bool ok; + ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ, + iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling); + + delete[] p; + + return ok; +} + bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, unsigned int iVolX, @@ -1491,9 +1525,6 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, dims.iRaysPerVoxelDim = iVoxelSuperSampling; - if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) - return false; - if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); cudaError_t err = cudaGetLastError(); @@ -1540,6 +1571,111 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, } +// This computes the column weights, divides by them, and adds the +// result to the current volume. This is both more expensive and more +// GPU memory intensive than the regular BP, but allows saving system RAM. +bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, + const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + const SPar3DProjection *pfAngles, + int iGPUIndex, int iVoxelSuperSampling) +{ + SDimensions3D dims; + + dims.iVolX = iVolX; + dims.iVolY = iVolY; + dims.iVolZ = iVolZ; + if (iVolX == 0 || iVolY == 0 || iVolZ == 0) + return false; + + dims.iProjAngles = iProjAngles; + dims.iProjU = iProjU; + dims.iProjV = iProjV; + + if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0) + return false; + + dims.iRaysPerVoxelDim = iVoxelSuperSampling; + + if (iGPUIndex != -1) { + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + return false; + } + + + cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims); + bool ok = D_pixelWeight.ptr; + if (!ok) + return false; + + cudaPitchedPtr D_volumeData = allocateVolumeData(dims); + ok = D_volumeData.ptr; + if (!ok) { + cudaFree(D_pixelWeight.ptr); + return false; + } + + cudaPitchedPtr D_projData = allocateProjectionData(dims); + ok = D_projData.ptr; + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + return false; + } + + // Compute weights + ok &= zeroVolumeData(D_pixelWeight, dims); + processSino3D<opSet>(D_projData, 1.0f, dims); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles); + processVol3D<opInvert>(D_pixelWeight, dims); + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + return false; + } + + ok &= copyProjectionsToDevice(pfProjections, D_projData, + dims, dims.iProjU); + ok &= zeroVolumeData(D_volumeData, dims); + // Do BP into D_volumeData + ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles); + // Multiply with weights + processVol3D<opMul>(D_volumeData, D_pixelWeight, dims); + + // Upload previous iterate to D_pixelWeight... + ok &= copyVolumeToDevice(pfVolume, D_pixelWeight, dims, dims.iVolX); + if (!ok) { + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + return false; + } + // ...and add it to the weighted BP + processVol3D<opAdd>(D_volumeData, D_pixelWeight, dims); + + // Then copy the result back + ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); + + + cudaFree(D_pixelWeight.ptr); + cudaFree(D_volumeData.ptr); + cudaFree(D_projData.ptr); + + return ok; + +} + + bool astraCudaFDK(float* pfVolume, const float* pfProjections, unsigned int iVolX, diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 5712f89..9d87e33 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -429,6 +429,28 @@ _AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections, const SPar3DProjection *pfAngles, int iGPUIndex, int iVoxelSuperSampling); +_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + float fDetUSize, + float fDetVSize, + const float *pfAngles, + int iGPUIndex, int iVoxelSuperSampling); + +_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections, + unsigned int iVolX, + unsigned int iVolY, + unsigned int iVolZ, + unsigned int iProjAngles, + unsigned int iProjU, + unsigned int iProjV, + const SPar3DProjection *pfAngles, + int iGPUIndex, int iVoxelSuperSampling); + _AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, unsigned int iVolX, unsigned int iVolY, diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 9bf9a9f..8cb285c 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -136,13 +136,10 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn const float fCvz = gC_Cvz[angle]; const float fCvc = gC_Cvc[angle]; - const float fUNum = fCuc + fX * fCux + fY * fCuy + fZ * fCuz; - const float fVNum = fCvc + fX * fCvx + fY * fCvy + fZ * fCvz; + const float fU = fCuc + fX * fCux + fY * fCuy + fZ * fCuz; + const float fV = fCvc + fX * fCvx + fY * fCvy + fZ * fCvz; - const float fU = fUNum; - const float fV = fVNum; - - fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); // TODO: check order + fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); } @@ -213,13 +210,10 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star float fZs = fZ; for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { - const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; - const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; - - const float fU = fUNum; - const float fV = fVNum; + const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; + const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; - fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); // TODO: check order + fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); fZs += fSubStep; } fYs += fSubStep; |