/* ----------------------------------------------------------------------- Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp 2014-2015, CWI, Amsterdam Contact: astra@uantwerpen.be Website: http://sf.net/projects/astra-toolbox This file is part of the ASTRA Toolbox. The ASTRA Toolbox is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The ASTRA Toolbox is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------- $Id$ */ #include <cstdio> #include <cassert> #include "cgls3d.h" #include "util3d.h" #include "arith3d.h" #include "cone_fp.h" #ifdef STANDALONE #include "testutil.h" #endif namespace astraCUDA3d { CGLS::CGLS() : ReconAlgo3D() { D_maskData.ptr = 0; D_smaskData.ptr = 0; D_sinoData.ptr = 0; D_volumeData.ptr = 0; D_r.ptr = 0; D_w.ptr = 0; D_z.ptr = 0; D_p.ptr = 0; useVolumeMask = false; useSinogramMask = false; } CGLS::~CGLS() { reset(); } void CGLS::reset() { cudaFree(D_r.ptr); cudaFree(D_w.ptr); cudaFree(D_z.ptr); cudaFree(D_p.ptr); D_maskData.ptr = 0; D_smaskData.ptr = 0; D_sinoData.ptr = 0; D_volumeData.ptr = 0; D_r.ptr = 0; D_w.ptr = 0; D_z.ptr = 0; D_p.ptr = 0; useVolumeMask = false; useSinogramMask = false; sliceInitialized = false; ReconAlgo3D::reset(); } bool CGLS::enableVolumeMask() { useVolumeMask = true; return true; } bool CGLS::enableSinogramMask() { useSinogramMask = true; return true; } bool CGLS::init() { D_z = allocateVolumeData(dims); D_p = allocateVolumeData(dims); D_r = allocateProjectionData(dims); D_w = allocateProjectionData(dims); // TODO: check if allocations succeeded return true; } bool CGLS::setVolumeMask(cudaPitchedPtr& _D_maskData) { assert(useVolumeMask); D_maskData = _D_maskData; return true; } bool CGLS::setSinogramMask(cudaPitchedPtr& _D_smaskData) { return false; #if 0 // TODO: Implement this assert(useSinogramMask); D_smaskData = _D_smaskData; return true; #endif } bool CGLS::setBuffers(cudaPitchedPtr& _D_volumeData, cudaPitchedPtr& _D_projData) { D_volumeData = _D_volumeData; D_sinoData = _D_projData; sliceInitialized = false; return true; } bool CGLS::iterate(unsigned int iterations) { shouldAbort = false; if (!sliceInitialized) { // copy sinogram duplicateProjectionData(D_r, D_sinoData, dims); // r = sino - A*x if (useVolumeMask) { duplicateVolumeData(D_z, D_volumeData, dims); processVol3D<opMul>(D_z, D_maskData, dims); callFP(D_z, D_r, -1.0f); } else { callFP(D_volumeData, D_r, -1.0f); } // p = A'*r zeroVolumeData(D_p, dims); callBP(D_p, D_r); if (useVolumeMask) processVol3D<opMul>(D_p, D_maskData, dims); gamma = dotProduct3D(D_p, dims.iVolX, dims.iVolY, dims.iVolZ); sliceInitialized = true; } // iteration for (unsigned int iter = 0; iter < iterations && !shouldAbort; ++iter) { // w = A*p zeroProjectionData(D_w, dims); callFP(D_p, D_w, 1.0f); // alpha = gamma / <w,w> float ww = dotProduct3D(D_w, dims.iProjU, dims.iProjAngles, dims.iProjV); float alpha = gamma / ww; // x += alpha*p processVol3D<opAddScaled>(D_volumeData, D_p, alpha, dims); // r -= alpha*w processSino3D<opAddScaled>(D_r, D_w, -alpha, dims); // z = A'*r zeroVolumeData(D_z, dims); callBP(D_z, D_r); if (useVolumeMask) processVol3D<opMul>(D_z, D_maskData, dims); float beta = 1.0f / gamma; gamma = dotProduct3D(D_z, dims.iVolX, dims.iVolY, dims.iVolZ); beta *= gamma; // p = z + beta*p processVol3D<opScaleAndAdd>(D_p, D_z, beta, dims); } return true; } float CGLS::computeDiffNorm() { // We can use w and z as temporary storage here since they're not // used outside of iterations. // copy sinogram to w duplicateProjectionData(D_w, D_sinoData, dims); // do FP, subtracting projection from sinogram if (useVolumeMask) { duplicateVolumeData(D_z, D_volumeData, dims); processVol3D<opMul>(D_z, D_maskData, dims); callFP(D_z, D_w, -1.0f); } else { callFP(D_volumeData, D_w, -1.0f); } float s = dotProduct3D(D_w, dims.iProjU, dims.iProjAngles, dims.iProjV); return sqrt(s); } bool doCGLS(cudaPitchedPtr& D_volumeData, cudaPitchedPtr& D_sinoData, cudaPitchedPtr& D_maskData, const SDimensions3D& dims, const SConeProjection* angles, unsigned int iterations) { CGLS cgls; bool ok = true; ok &= cgls.setConeGeometry(dims, angles); if (D_maskData.ptr) ok &= cgls.enableVolumeMask(); if (!ok) return false; ok = cgls.init(); if (!ok) return false; if (D_maskData.ptr) ok &= cgls.setVolumeMask(D_maskData); ok &= cgls.setBuffers(D_volumeData, D_sinoData); if (!ok) return false; ok = cgls.iterate(iterations); return ok; } } #ifdef STANDALONE using namespace astraCUDA3d; int main() { SDimensions3D dims; dims.iVolX = 256; dims.iVolY = 256; dims.iVolZ = 256; dims.iProjAngles = 100; dims.iProjU = 512; dims.iProjV = 512; dims.iRaysPerDet = 1; SConeProjection angle[100]; angle[0].fSrcX = -2905.6; angle[0].fSrcY = 0; angle[0].fSrcZ = 0; angle[0].fDetSX = 694.4; angle[0].fDetSY = -122.4704; angle[0].fDetSZ = -122.4704; angle[0].fDetUX = 0; angle[0].fDetUY = .4784; //angle[0].fDetUY = .5; angle[0].fDetUZ = 0; angle[0].fDetVX = 0; angle[0].fDetVY = 0; angle[0].fDetVZ = .4784; #define ROTATE0(name,i,alpha) do { angle[i].f##name##X = angle[0].f##name##X * cos(alpha) - angle[0].f##name##Y * sin(alpha); angle[i].f##name##Y = angle[0].f##name##X * sin(alpha) + angle[0].f##name##Y * cos(alpha); } while(0) for (int i = 1; i < 100; ++i) { angle[i] = angle[0]; ROTATE0(Src, i, i*2*M_PI/100); ROTATE0(DetS, i, i*2*M_PI/100); ROTATE0(DetU, i, i*2*M_PI/100); ROTATE0(DetV, i, i*2*M_PI/100); } #undef ROTATE0 cudaPitchedPtr volData = allocateVolumeData(dims); cudaPitchedPtr projData = allocateProjectionData(dims); zeroProjectionData(projData, dims); float* pbuf = new float[100*512*512]; copyProjectionsFromDevice(pbuf, projData, dims); copyProjectionsToDevice(pbuf, projData, dims); delete[] pbuf; #if 0 float* slice = new float[256*256]; cudaPitchedPtr ptr; ptr.ptr = slice; ptr.pitch = 256*sizeof(float); ptr.xsize = 256*sizeof(float); ptr.ysize = 256; for (unsigned int i = 0; i < 256; ++i) { for (unsigned int y = 0; y < 256; ++y) for (unsigned int x = 0; x < 256; ++x) slice[y*256+x] = (i-127.5)*(i-127.5)+(y-127.5)*(y-127.5)+(x-127.5)*(x-127.5) < 4900 ? 1.0f : 0.0f; cudaExtent extentS; extentS.width = dims.iVolX*sizeof(float); extentS.height = dims.iVolY; extentS.depth = 1; cudaPos sp = { 0, 0, 0 }; cudaPos dp = { 0, 0, i }; cudaMemcpy3DParms p; p.srcArray = 0; p.srcPos = sp; p.srcPtr = ptr; p.dstArray = 0; p.dstPos = dp; p.dstPtr = volData; p.extent = extentS; p.kind = cudaMemcpyHostToDevice; cudaMemcpy3D(&p); } astraCUDA3d::ConeFP(volData, projData, dims, angle, 1.0f); #else for (int i = 0; i < 100; ++i) { char fname[32]; sprintf(fname, "Tiffs/%04d.png", 4*i); unsigned int w,h; float* bufp = loadImage(fname, w,h); for (int j = 0; j < 512*512; ++j) { float v = bufp[j]; if (v > 236.0f) v = 236.0f; v = logf(236.0f / v); bufp[j] = 256*v; } for (int j = 0; j < 512; ++j) { cudaMemcpy(((float*)projData.ptr)+100*512*j+512*i, bufp+512*j, 512*sizeof(float), cudaMemcpyHostToDevice); } delete[] bufp; } #endif #if 0 float* bufs = new float[100*512]; for (int i = 0; i < 512; ++i) { cudaMemcpy(bufs, ((float*)projData.ptr)+100*512*i, 100*512*sizeof(float), cudaMemcpyDeviceToHost); printf("%d %d %d\n", projData.pitch, projData.xsize, projData.ysize); char fname[20]; sprintf(fname, "sino%03d.png", i); saveImage(fname, 100, 512, bufs); } float* bufp = new float[512*512]; for (int i = 0; i < 100; ++i) { for (int j = 0; j < 512; ++j) { cudaMemcpy(bufp+512*j, ((float*)projData.ptr)+100*512*j+512*i, 512*sizeof(float), cudaMemcpyDeviceToHost); } char fname[20]; sprintf(fname, "proj%03d.png", i); saveImage(fname, 512, 512, bufp); } #endif zeroVolumeData(volData, dims); cudaPitchedPtr maskData; maskData.ptr = 0; astraCUDA3d::doCGLS(volData, projData, maskData, dims, angle, 50); #if 1 float* buf = new float[256*256]; for (int i = 0; i < 256; ++i) { cudaMemcpy(buf, ((float*)volData.ptr)+256*256*i, 256*256*sizeof(float), cudaMemcpyDeviceToHost); char fname[20]; sprintf(fname, "vol%03d.png", i); saveImage(fname, 256, 256, buf); } #endif return 0; } #endif