From 3a6769465bee7d56d0ddff36613b886446421e07 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Wed, 16 Apr 2014 11:12:55 +0000 Subject: Remove padding in 2D cuda in favour of Border mode --- cuda/2d/em.cu | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'cuda/2d/em.cu') diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 74d1bbf..cf3f10c 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -73,14 +73,14 @@ void EM::reset() bool EM::init() { - allocateVolume(D_pixelWeight, dims.iVolWidth+2, dims.iVolHeight+2, pixelPitch); - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth+2, dims.iVolHeight+2); + allocateVolume(D_pixelWeight, dims.iVolWidth, dims.iVolHeight, pixelPitch); + zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); - allocateVolume(D_tmpData, dims.iVolWidth+2, dims.iVolHeight+2, tmpPitch); - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2); + allocateVolume(D_tmpData, dims.iVolWidth, dims.iVolHeight, tmpPitch); + zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); - allocateVolume(D_projData, dims.iProjDets+2, dims.iProjAngles, projPitch); - zeroVolume(D_projData, projPitch, dims.iProjDets+2, dims.iProjAngles); + allocateVolume(D_projData, dims.iProjDets, dims.iProjAngles, projPitch); + zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); // We can't precompute pixelWeights when using a volume mask #if 0 @@ -94,22 +94,22 @@ bool EM::init() bool EM::precomputeWeights() { - zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth+2, dims.iVolHeight+2); + zeroVolume(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); #if 0 if (useSinogramMask) { callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch); } else #endif { - processVol(D_projData, 1.0f, projPitch, dims.iProjDets, dims.iProjAngles); + processVol(D_projData, 1.0f, projPitch, dims.iProjDets, dims.iProjAngles); callBP(D_pixelWeight, pixelPitch, D_projData, projPitch); } - processVol(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); #if 0 if (useVolumeMask) { // scale pixel weights with mask to zero out masked pixels - processVol(D_pixelWeight, D_maskData, pixelPitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_pixelWeight, D_maskData, pixelPitch, dims.iVolWidth, dims.iVolHeight); } #endif @@ -129,18 +129,18 @@ bool EM::iterate(unsigned int iterations) for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) { // Do FP of volumeData - zeroVolume(D_projData, projPitch, dims.iProjDets+2, dims.iProjAngles); + zeroVolume(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); callFP(D_volumeData, volumePitch, D_projData, projPitch, 1.0f); // Divide sinogram by FP (into projData) - processVol(D_projData, D_sinoData, projPitch, dims.iProjDets, dims.iProjAngles); + processVol(D_projData, D_sinoData, projPitch, dims.iProjDets, dims.iProjAngles); // Do BP of projData into tmpData - zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth+2, dims.iVolHeight+2); + zeroVolume(D_tmpData, tmpPitch, dims.iVolWidth, dims.iVolHeight); callBP(D_tmpData, tmpPitch, D_projData, projPitch); // Multiply volumeData with tmpData divided by pixel weights - processVol(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); + processVol(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims.iVolWidth, dims.iVolHeight); } @@ -150,12 +150,12 @@ bool EM::iterate(unsigned int iterations) float EM::computeDiffNorm() { // copy sinogram to projection data - cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), dims.iProjAngles, cudaMemcpyDeviceToDevice); + cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets), dims.iProjAngles, cudaMemcpyDeviceToDevice); // do FP, subtracting projection from sinogram if (useVolumeMask) { - cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth+2), dims.iVolHeight+2, cudaMemcpyDeviceToDevice); - processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); + cudaMemcpy2D(D_tmpData, sizeof(float)*tmpPitch, D_volumeData, sizeof(float)*volumePitch, sizeof(float)*(dims.iVolWidth), dims.iVolHeight, cudaMemcpyDeviceToDevice); + processVol(D_tmpData, D_maskData, tmpPitch, dims.iVolWidth, dims.iVolHeight); callFP(D_tmpData, tmpPitch, D_projData, projPitch, -1.0f); } else { callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f); @@ -164,7 +164,7 @@ float EM::computeDiffNorm() // compute norm of D_projData - float s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles, 1, 0); + float s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); return sqrt(s); } @@ -218,12 +218,12 @@ int main() dims.iRaysPerDet = 1; unsigned int volumePitch, sinoPitch; - allocateVolume(D_volumeData, dims.iVolWidth+2, dims.iVolHeight+2, volumePitch); - zeroVolume(D_volumeData, volumePitch, dims.iVolWidth+2, dims.iVolHeight+2); + allocateVolume(D_volumeData, dims.iVolWidth, dims.iVolHeight, volumePitch); + zeroVolume(D_volumeData, volumePitch, dims.iVolWidth, dims.iVolHeight); printf("pitch: %u\n", volumePitch); - allocateVolume(D_sinoData, dims.iProjDets+2, dims.iProjAngles, sinoPitch); - zeroVolume(D_sinoData, sinoPitch, dims.iProjDets+2, dims.iProjAngles); + allocateVolume(D_sinoData, dims.iProjDets, dims.iProjAngles, sinoPitch); + zeroVolume(D_sinoData, sinoPitch, dims.iProjDets, dims.iProjAngles); printf("pitch: %u\n", sinoPitch); unsigned int y, x; -- cgit v1.2.3