summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2014-05-02 09:20:54 +0000
committerwpalenst <Willem.Jan.Palenstijn@cwi.nl>2014-05-02 09:20:54 +0000
commit1dd79f23f783564719a52de7d9b54b17005c32d7 (patch)
tree71e3c59863680e9da29a51359786d24c68787f1a /cuda
parent9dd746071621cf854171b985afbf375f19a5b726 (diff)
downloadastra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.gz
astra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.bz2
astra-1dd79f23f783564719a52de7d9b54b17005c32d7.tar.xz
astra-1dd79f23f783564719a52de7d9b54b17005c32d7.zip
Add SIRT-Weighted BP3D (par3d-only) for use in large BP
Diffstat (limited to 'cuda')
-rw-r--r--cuda/3d/arith3d.cu6
-rw-r--r--cuda/3d/arith3d.h1
-rw-r--r--cuda/3d/astra3d.cu142
-rw-r--r--cuda/3d/astra3d.h22
-rw-r--r--cuda/3d/par3d_bp.cu18
5 files changed, 174 insertions, 15 deletions
diff --git a/cuda/3d/arith3d.cu b/cuda/3d/arith3d.cu
index 7cb56f6..cc96da5 100644
--- a/cuda/3d/arith3d.cu
+++ b/cuda/3d/arith3d.cu
@@ -52,6 +52,11 @@ struct opAddMul {
out += in1 * in2;
}
};
+struct opAdd {
+ __device__ void operator()(float& out, const float in) {
+ out += in;
+ }
+};
struct opMul {
__device__ void operator()(float& out, const float in) {
out *= in;
@@ -594,6 +599,7 @@ INST_DDFtoD(opAddMulScaled)
INST_DDtoD(opAddMul)
INST_DDtoD(opMul2)
INST_DtoD(opMul)
+INST_DtoD(opAdd)
INST_DtoD(opDividedBy)
INST_toD(opInvert)
INST_FtoD(opSet)
diff --git a/cuda/3d/arith3d.h b/cuda/3d/arith3d.h
index 53c9b79..5635ffd 100644
--- a/cuda/3d/arith3d.h
+++ b/cuda/3d/arith3d.h
@@ -37,6 +37,7 @@ struct opAddScaled;
struct opScaleAndAdd;
struct opAddMulScaled;
struct opAddMul;
+struct opAdd;
struct opMul;
struct opMul2;
struct opDividedBy;
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 4447775..2efac98 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -1463,6 +1463,40 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
return ok;
}
+// This computes the column weights, divides by them, and adds the
+// result to the current volume. This is both more expensive and more
+// GPU memory intensive than the regular BP, but allows saving system RAM.
+bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
+ unsigned int iVolX,
+ unsigned int iVolY,
+ unsigned int iVolZ,
+ unsigned int iProjAngles,
+ unsigned int iProjU,
+ unsigned int iProjV,
+ float fDetUSize,
+ float fDetVSize,
+ const float *pfAngles,
+ int iGPUIndex, int iVoxelSuperSampling)
+{
+ if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
+ return false;
+ if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+ return false;
+
+ SPar3DProjection* p = genPar3DProjections(iProjAngles,
+ iProjU, iProjV,
+ fDetUSize, fDetVSize,
+ pfAngles);
+
+ bool ok;
+ ok = astraCudaPar3DBP_SIRTWeighted(pfVolume, pfProjections, iVolX, iVolY, iVolZ,
+ iProjAngles, iProjU, iProjV, p, iGPUIndex, iVoxelSuperSampling);
+
+ delete[] p;
+
+ return ok;
+}
+
bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
unsigned int iVolX,
@@ -1491,9 +1525,6 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
- return false;
-
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
cudaError_t err = cudaGetLastError();
@@ -1540,6 +1571,111 @@ bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
}
+// This computes the column weights, divides by them, and adds the
+// result to the current volume. This is both more expensive and more
+// GPU memory intensive than the regular BP, but allows saving system RAM.
+bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume,
+ const float* pfProjections,
+ unsigned int iVolX,
+ unsigned int iVolY,
+ unsigned int iVolZ,
+ unsigned int iProjAngles,
+ unsigned int iProjU,
+ unsigned int iProjV,
+ const SPar3DProjection *pfAngles,
+ int iGPUIndex, int iVoxelSuperSampling)
+{
+ SDimensions3D dims;
+
+ dims.iVolX = iVolX;
+ dims.iVolY = iVolY;
+ dims.iVolZ = iVolZ;
+ if (iVolX == 0 || iVolY == 0 || iVolZ == 0)
+ return false;
+
+ dims.iProjAngles = iProjAngles;
+ dims.iProjU = iProjU;
+ dims.iProjV = iProjV;
+
+ if (iProjAngles == 0 || iProjU == 0 || iProjV == 0 || pfAngles == 0)
+ return false;
+
+ dims.iRaysPerVoxelDim = iVoxelSuperSampling;
+
+ if (iGPUIndex != -1) {
+ cudaSetDevice(iGPUIndex);
+ cudaError_t err = cudaGetLastError();
+
+ // Ignore errors caused by calling cudaSetDevice multiple times
+ if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
+ return false;
+ }
+
+
+ cudaPitchedPtr D_pixelWeight = allocateVolumeData(dims);
+ bool ok = D_pixelWeight.ptr;
+ if (!ok)
+ return false;
+
+ cudaPitchedPtr D_volumeData = allocateVolumeData(dims);
+ ok = D_volumeData.ptr;
+ if (!ok) {
+ cudaFree(D_pixelWeight.ptr);
+ return false;
+ }
+
+ cudaPitchedPtr D_projData = allocateProjectionData(dims);
+ ok = D_projData.ptr;
+ if (!ok) {
+ cudaFree(D_pixelWeight.ptr);
+ cudaFree(D_volumeData.ptr);
+ return false;
+ }
+
+ // Compute weights
+ ok &= zeroVolumeData(D_pixelWeight, dims);
+ processSino3D<opSet>(D_projData, 1.0f, dims);
+ ok &= Par3DBP(D_pixelWeight, D_projData, dims, pfAngles);
+ processVol3D<opInvert>(D_pixelWeight, dims);
+ if (!ok) {
+ cudaFree(D_pixelWeight.ptr);
+ cudaFree(D_volumeData.ptr);
+ cudaFree(D_projData.ptr);
+ return false;
+ }
+
+ ok &= copyProjectionsToDevice(pfProjections, D_projData,
+ dims, dims.iProjU);
+ ok &= zeroVolumeData(D_volumeData, dims);
+ // Do BP into D_volumeData
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pfAngles);
+ // Multiply with weights
+ processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
+
+ // Upload previous iterate to D_pixelWeight...
+ ok &= copyVolumeToDevice(pfVolume, D_pixelWeight, dims, dims.iVolX);
+ if (!ok) {
+ cudaFree(D_pixelWeight.ptr);
+ cudaFree(D_volumeData.ptr);
+ cudaFree(D_projData.ptr);
+ return false;
+ }
+ // ...and add it to the weighted BP
+ processVol3D<opAdd>(D_volumeData, D_pixelWeight, dims);
+
+ // Then copy the result back
+ ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
+
+
+ cudaFree(D_pixelWeight.ptr);
+ cudaFree(D_volumeData.ptr);
+ cudaFree(D_projData.ptr);
+
+ return ok;
+
+}
+
+
bool astraCudaFDK(float* pfVolume, const float* pfProjections,
unsigned int iVolX,
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 5712f89..9d87e33 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -429,6 +429,28 @@ _AstraExport bool astraCudaPar3DBP(float* pfVolume, const float* pfProjections,
const SPar3DProjection *pfAngles,
int iGPUIndex, int iVoxelSuperSampling);
+_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
+ unsigned int iVolX,
+ unsigned int iVolY,
+ unsigned int iVolZ,
+ unsigned int iProjAngles,
+ unsigned int iProjU,
+ unsigned int iProjV,
+ float fDetUSize,
+ float fDetVSize,
+ const float *pfAngles,
+ int iGPUIndex, int iVoxelSuperSampling);
+
+_AstraExport bool astraCudaPar3DBP_SIRTWeighted(float* pfVolume, const float* pfProjections,
+ unsigned int iVolX,
+ unsigned int iVolY,
+ unsigned int iVolZ,
+ unsigned int iProjAngles,
+ unsigned int iProjU,
+ unsigned int iProjV,
+ const SPar3DProjection *pfAngles,
+ int iGPUIndex, int iVoxelSuperSampling);
+
_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections,
unsigned int iVolX,
unsigned int iVolY,
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index 9bf9a9f..8cb285c 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -136,13 +136,10 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
const float fCvz = gC_Cvz[angle];
const float fCvc = gC_Cvc[angle];
- const float fUNum = fCuc + fX * fCux + fY * fCuy + fZ * fCuz;
- const float fVNum = fCvc + fX * fCvx + fY * fCvy + fZ * fCvz;
+ const float fU = fCuc + fX * fCux + fY * fCuy + fZ * fCuz;
+ const float fV = fCvc + fX * fCvx + fY * fCvy + fZ * fCvz;
- const float fU = fUNum;
- const float fV = fVNum;
-
- fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); // TODO: check order
+ fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV);
}
@@ -213,13 +210,10 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
float fZs = fZ;
for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
- const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
- const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
-
- const float fU = fUNum;
- const float fV = fVNum;
+ const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
+ const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
- fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV); // TODO: check order
+ fVal += tex3D(gT_par3DProjTexture, fU, fAngle, fV);
fZs += fSubStep;
}
fYs += fSubStep;