diff options
-rw-r--r-- | cuda/2d/astra.cu | 3 | ||||
-rw-r--r-- | cuda/2d/fft.cu | 46 | ||||
-rw-r--r-- | cuda/3d/algo3d.cu | 21 | ||||
-rw-r--r-- | cuda/3d/algo3d.h | 5 | ||||
-rw-r--r-- | cuda/3d/astra3d.cu | 215 | ||||
-rw-r--r-- | cuda/3d/astra3d.h | 18 | ||||
-rw-r--r-- | cuda/3d/cgls3d.cu | 2 | ||||
-rw-r--r-- | cuda/3d/cone_bp.cu | 40 | ||||
-rw-r--r-- | cuda/3d/cone_bp.h | 4 | ||||
-rw-r--r-- | cuda/3d/cone_fp.cu | 95 | ||||
-rw-r--r-- | cuda/3d/cone_fp.h | 4 | ||||
-rw-r--r-- | cuda/3d/dims3d.h | 24 | ||||
-rw-r--r-- | cuda/3d/fdk.cu | 289 | ||||
-rw-r--r-- | cuda/3d/fdk.h | 8 | ||||
-rw-r--r-- | cuda/3d/mem3d.cu | 55 | ||||
-rw-r--r-- | cuda/3d/mem3d.h | 1 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.cu | 30 | ||||
-rw-r--r-- | cuda/3d/par3d_bp.h | 4 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.cu | 156 | ||||
-rw-r--r-- | cuda/3d/par3d_fp.h | 6 | ||||
-rw-r--r-- | cuda/3d/sirt3d.cu | 2 | ||||
-rw-r--r-- | include/astra/CompositeGeometryManager.h | 11 | ||||
-rw-r--r-- | include/astra/GeometryUtil3D.h | 52 | ||||
-rw-r--r-- | include/astra/Singleton.h | 6 | ||||
-rw-r--r-- | include/astra/Utilities.h | 3 | ||||
-rw-r--r-- | matlab/mex/mexHelpFunctions.cpp | 28 | ||||
-rw-r--r-- | python/astra/src/PythonPluginAlgorithm.cpp | 10 | ||||
-rw-r--r-- | src/CompositeGeometryManager.cpp | 373 | ||||
-rw-r--r-- | src/CudaFDKAlgorithm3D.cpp | 32 | ||||
-rw-r--r-- | src/Utilities.cpp | 34 |
30 files changed, 863 insertions, 714 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 4c69628..b56ddf9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -341,10 +341,9 @@ bool AstraFBP::run() dims3d.iProjAngles = pData->dims.iProjAngles; dims3d.iProjU = pData->dims.iProjDets; dims3d.iProjV = 1; - dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1; astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, - pData->fOriginDetectorDistance, 0.0f, 0.0f, + pData->fOriginDetectorDistance, 0.0f, pData->dims.fDetScale, 1.0f, // TODO: Are these correct? pData->bShortScan, dims3d, pData->angles); } diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bfd493..3dd1c22 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,6 +35,7 @@ $Id$ #include <fstream> #include "../../include/astra/Logging.h" +#include "../../include/astra/Fourier.h" using namespace astra; @@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, float * pfFilt = new float[_iFFTFourierDetectorCount]; float * pfW = new float[_iFFTFourierDetectorCount]; + // We cache one Fourier transform for repeated FBP's of the same size + static float *pfData = 0; + static int iFilterCacheSize = 0; + + if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) { + // Compute filter in spatial domain + + delete[] pfData; + pfData = new float[2*_iFFTRealDetectorCount]; + int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)]; + ip[0] = 0; + float32 *w = new float32[_iFFTRealDetectorCount/2]; + + for (int i = 0; i < _iFFTRealDetectorCount; ++i) { + pfData[2*i+1] = 0.0f; + + if (i & 1) { + int j = i; + if (2*j > _iFFTRealDetectorCount) + j = _iFFTRealDetectorCount - j; + float f = M_PI * j; + pfData[2*i] = -1 / (f*f); + } else { + pfData[2*i] = 0.0f; + } + } + + pfData[0] = 0.25f; + + cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w); + delete[] ip; + delete[] w; + + iFilterCacheSize = _iFFTRealDetectorCount; + } + for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) { float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; - // filt = 2*( 0:(order/2) )./order; - pfFilt[iDetectorIndex] = 2.0f * fRelIndex; - //pfFilt[iDetectorIndex] = 1.0f; - - // w = 2*pi*(0:size(filt,2)-1)/order - pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex; + pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex]; + pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex; } switch(_eFilter) @@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount, free(pfHostFilter); } - #endif diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index cc86b70..e2c1e43 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -41,7 +41,6 @@ ReconAlgo3D::ReconAlgo3D() coneProjs = 0; par3DProjs = 0; shouldAbort = false; - fOutputScale = 1.0f; } ReconAlgo3D::~ReconAlgo3D() @@ -58,10 +57,10 @@ void ReconAlgo3D::reset() shouldAbort = false; } -bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale) +bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, const SProjectorParams3D& _params) { dims = _dims; - fOutputScale = _outputScale; + params = _params; coneProjs = new SConeProjection[dims.iProjAngles]; par3DProjs = 0; @@ -71,10 +70,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject return true; } -bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale) +bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, const SProjectorParams3D& _params) { dims = _dims; - fOutputScale = _outputScale; + params = _params; par3DProjs = new SPar3DProjection[dims.iProjAngles]; coneProjs = 0; @@ -89,10 +88,12 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData, cudaPitchedPtr& D_projData, float outputScale) { + SProjectorParams3D p = params; + p.fOutputScale *= outputScale; if (coneProjs) { - return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); + return ConeFP(D_volumeData, D_projData, dims, coneProjs, p); } else { - return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); + return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, p); } } @@ -100,10 +101,12 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, cudaPitchedPtr& D_projData, float outputScale) { + SProjectorParams3D p = params; + p.fOutputScale *= outputScale; if (coneProjs) { - return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); + return ConeBP(D_volumeData, D_projData, dims, coneProjs, p); } else { - return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); + return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, p); } } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index 886b092..ce331ad 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -39,8 +39,8 @@ public: ReconAlgo3D(); ~ReconAlgo3D(); - bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale); - bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale); + bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, const SProjectorParams3D& params); + bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, const SProjectorParams3D& params); void signalAbort() { shouldAbort = true; } @@ -55,6 +55,7 @@ protected: float outputScale); SDimensions3D dims; + SProjectorParams3D params; SConeProjection* coneProjs; SPar3DProjection* par3DProjs; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 35e3cd4..3bd56ea 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -67,34 +67,41 @@ template<typename ProjectionT> static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, unsigned int iProjectionAngleCount, ProjectionT*& pProjs, - float& fOutputScale) + SProjectorParams3D& params) { assert(pVolGeom); assert(pProjs); +#if 0 // TODO: Relative instead of absolute const float EPS = 0.00001f; if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS) return false; if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS) return false; - +#endif // Translate float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2; - float factor = 1.0f / pVolGeom->getPixelLengthX(); + float fx = 1.0f / pVolGeom->getPixelLengthX(); + float fy = 1.0f / pVolGeom->getPixelLengthY(); + float fz = 1.0f / pVolGeom->getPixelLengthZ(); for (int i = 0; i < iProjectionAngleCount; ++i) { // CHECKME: Order of scaling and translation pProjs[i].translate(dx, dy, dz); - pProjs[i].scale(factor); + pProjs[i].scale(fx, fy, fz); } + params.fVolScaleX = pVolGeom->getPixelLengthX(); + params.fVolScaleY = pVolGeom->getPixelLengthY(); + params.fVolScaleZ = pVolGeom->getPixelLengthZ(); + // CHECKME: Check factor - fOutputScale *= pVolGeom->getPixelLengthX(); + //params.fOutputScale *= pVolGeom->getPixelLengthX(); return true; } @@ -108,10 +115,8 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, dims.iVolY = pVolGeom->getGridRowCount(); dims.iVolZ = pVolGeom->getGridSliceCount(); dims.iProjAngles = pProjGeom->getProjectionCount(); - dims.iProjU = pProjGeom->getDetectorColCount(), - dims.iProjV = pProjGeom->getDetectorRowCount(), - dims.iRaysPerDetDim = 1; - dims.iRaysPerVoxelDim = 1; + dims.iProjU = pProjGeom->getDetectorColCount(); + dims.iProjV = pProjGeom->getDetectorRowCount(); if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0) return false; @@ -124,7 +129,7 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CParallelProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale) + SPar3DProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -141,16 +146,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CParallelVecProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale) + SPar3DProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -164,16 +167,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CConeProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale) + SConeProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -192,16 +193,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CConeVecProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale) + SConeProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -215,9 +214,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } @@ -227,7 +224,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom, SPar3DProjection*& pParProjs, SConeProjection*& pConeProjs, - float& fOutputScale) + SProjectorParams3D& params) { const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom); const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); @@ -240,13 +237,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; if (conegeom) { - ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, params); } else if (conevec3dgeom) { - ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, params); } else if (par3dgeom) { - ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, params); } else if (parvec3dgeom) { - ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, params); } else { ok = false; } @@ -260,20 +257,17 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, class AstraSIRT3d_internal { public: SDimensions3D dims; + SProjectorParams3D params; CUDAProjectionType3d projType; float* angles; float fOriginSourceDistance; float fOriginDetectorDistance; - float fSourceZ; - float fDetSize; float fRelaxation; SConeProjection* projs; SPar3DProjection* parprojs; - float fOutputScale; - bool initialized; bool setStartReconstruction; @@ -305,12 +299,9 @@ AstraSIRT3d::AstraSIRT3d() pData->dims.iProjAngles = 0; pData->dims.iProjU = 0; pData->dims.iProjV = 0; - pData->dims.iRaysPerDetDim = 1; - pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; pData->parprojs = 0; - pData->fOutputScale = 1.0f; pData->fRelaxation = 1.0f; @@ -361,7 +352,7 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - pData->fOutputScale); + pData->params); if (!ok) return false; @@ -386,8 +377,8 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling, if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0) return false; - pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; - pData->dims.iRaysPerDetDim = iDetectorSuperSampling; + pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; + pData->params.iRaysPerDetDim = iDetectorSuperSampling; return true; } @@ -447,9 +438,9 @@ bool AstraSIRT3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); + ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->params); } else { - ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); + ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->params); } if (!ok) @@ -652,19 +643,16 @@ float AstraSIRT3d::computeDiffNorm() class AstraCGLS3d_internal { public: SDimensions3D dims; + SProjectorParams3D params; CUDAProjectionType3d projType; float* angles; float fOriginSourceDistance; float fOriginDetectorDistance; - float fSourceZ; - float fDetSize; SConeProjection* projs; SPar3DProjection* parprojs; - float fOutputScale; - bool initialized; bool setStartReconstruction; @@ -696,12 +684,9 @@ AstraCGLS3d::AstraCGLS3d() pData->dims.iProjAngles = 0; pData->dims.iProjU = 0; pData->dims.iProjV = 0; - pData->dims.iRaysPerDetDim = 1; - pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; pData->parprojs = 0; - pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -750,7 +735,7 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - pData->fOutputScale); + pData->params); if (!ok) return false; @@ -774,8 +759,8 @@ bool AstraCGLS3d::enableSuperSampling(unsigned int iVoxelSuperSampling, if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0) return false; - pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; - pData->dims.iRaysPerDetDim = iDetectorSuperSampling; + pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; + pData->params.iRaysPerDetDim = iDetectorSuperSampling; return true; } @@ -829,9 +814,9 @@ bool AstraCGLS3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); + ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->params); } else { - ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); + ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->params); } if (!ok) @@ -1039,23 +1024,23 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, Cuda3DProjectionKernel projKernel) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerDetDim = iDetectorSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerDetDim = iDetectorSuperSampling; if (iDetectorSuperSampling == 0) return false; SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { @@ -1093,10 +1078,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, if (pParProjs) { switch (projKernel) { case ker3d_default: - ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, params); break; case ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale); + ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, params); break; default: assert(false); @@ -1104,7 +1089,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, } else { switch (projKernel) { case ker3d_default: - ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, params); break; default: assert(false); @@ -1129,21 +1114,20 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerVoxelDim = iVoxelSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; - SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1179,9 +1163,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, } if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1204,21 +1188,21 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerVoxelDim = iVoxelSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1255,9 +1239,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, processSino3D<opSet>(D_projData, 1.0f, dims); if (pParProjs) - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, params); processVol3D<opInvert>(D_pixelWeight, dims); if (!ok) { @@ -1272,9 +1256,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params); // Multiply with weights processVol3D<opMul>(D_volumeData, D_pixelWeight, dims); @@ -1305,83 +1289,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, } - - -bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling, const float* filter) -{ - SDimensions3D dims; - - bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - - // TODO: Check that pVolGeom is normalized, since we don't support - // other volume geometries yet - - if (!ok) - return false; - - dims.iRaysPerVoxelDim = iVoxelSuperSampling; - - if (iVoxelSuperSampling == 0) - return false; - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - ok = D_volumeData.ptr; - if (!ok) - return false; - - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU); - - ok &= zeroVolumeData(D_volumeData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); - float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); - float fDetUSize = pProjGeom->getDetectorSpacingX(); - float fDetVSize = pProjGeom->getDetectorSpacingY(); - const float *pfAngles = pProjGeom->getProjectionAngles(); - - - // TODO: Offer interface for SrcZ, DetZ - ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, - fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, - dims, pfAngles, bShortScan, filter); - - ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - - - - } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index dde1347..93bf576 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -37,11 +37,6 @@ namespace astra { // TODO: Switch to a class hierarchy as with the 2D algorithms -enum Cuda3DProjectionKernel { - ker3d_default = 0, - ker3d_sum_square_weights -}; - class CProjectionGeometry3D; class CParallelProjectionGeometry3D; class CParallelVecProjectionGeometry3D; @@ -50,6 +45,10 @@ class CConeVecProjectionGeometry3D; class CVolumeGeometry3D; class AstraSIRT3d_internal; +using astraCUDA3d::Cuda3DProjectionKernel; +using astraCUDA3d::ker3d_default; +using astraCUDA3d::ker3d_sum_square_weights; + class _AstraExport AstraSIRT3d { public: @@ -291,7 +290,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom, SPar3DProjection*& pParProjs, SConeProjection*& pConeProjs, - float& fOutputScale); + astraCUDA3d::SProjectorParams3D& params); _AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections, const CVolumeGeometry3D* pVolGeom, @@ -310,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling, const float* filter); - - } diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index dd0e8a0..0b0331e 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData, CGLS cgls; bool ok = true; - ok &= cgls.setConeGeometry(dims, angles, 1.0f); + ok &= cgls.setConeGeometry(dims, angles, SProjectorParams3D()); if (D_maskData.ptr) ok &= cgls.enableVolumeMask(); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 4a41f6a..6bd9d16 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -77,6 +77,7 @@ bool bindProjDataTexture(const cudaArray* array) //__launch_bounds__(32*16, 4) +template<bool FDKWEIGHT> __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const astraCUDA3d::SDimensions3D dims, float fOutputScale) @@ -134,7 +135,10 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng fU = fUNum * fr; fV = fVNum * fr; float fVal = tex3D(gT_coneProjTexture, fU, fAngle, fV); - Z[idx] += fVal; // fr*fr*fVal; + if (FDKWEIGHT) + Z[idx] += fr*fr*fVal; + else + Z[idx] += fVal; fUNum += fCu.z; fVNum += fCv.z; @@ -154,7 +158,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale) { float* volData = (float*)D_volData; @@ -185,12 +189,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start if (endZ > dims.iVolZ) endZ = dims.iVolZ; - float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + const float fSubStep = 1.0f/iRaysPerVoxelDim; - fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim); for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -216,11 +220,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start const float fCdc = gC_C[12*angle+11]; float fXs = fX; - for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { + for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) { float fYs = fY; - for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { + for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) { float fZs = fZ; - for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { + for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) { const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -248,10 +252,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { bindProjDataTexture(D_projArray); + float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; + for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) { unsigned int angleCount = g_MaxAngles; if (th + angleCount > dims.iProjAngles) @@ -295,10 +301,12 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (dims.iRaysPerVoxelDim == 1) - dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + if (params.bFDKWeighting) + dev_cone_BP<true><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + else if (params.iRaysPerVoxelDim == 1) + dev_cone_BP<false><<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -315,14 +323,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); + bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params); cudaFreeArray(cuArray); diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index 4d3d2dd..a6e2c23 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 13b184f..fefcdc1 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z { __device__ float z(float f0, float f1, float f2) const { return f0; } }; +struct SCALE_CUBE { + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { + float fScale1; + float fScale2; + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; + // threadIdx: x = ??? detector (u?) // y = relative angle @@ -135,11 +147,12 @@ struct DIR_Z { // blockIdx: x = ??? detector (u+v?) // y = angle block -template<class COORD> +template<class COORD, class SCALE> __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE sc) { COORD c; @@ -188,7 +201,7 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -214,7 +227,8 @@ template<class COORD> __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, + SCALE_NONCUBE sc) { COORD c; @@ -245,7 +259,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, if (endSlice > c.nSlices(dims)) endSlice = c.nSlices(dims); - const float fSubStep = 1.0f/dims.iRaysPerDetDim; + const float fSubStep = 1.0f/iRaysPerDetDim; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { @@ -255,9 +269,9 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, float fV = 0.0f; float fdU = detectorU - 0.5f + 0.5f*fSubStep; - for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { + for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { float fdV = detectorV - 0.5f + 0.5f*fSubStep; - for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX; const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY; @@ -272,7 +286,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -294,14 +308,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, } } - D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); + D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } bool ConeFP_Array_internal(cudaPitchedPtr D_projData, const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[angleCount]; @@ -336,6 +350,36 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, unsigned int blockEnd = 0; int blockDirection = 0; + bool cube = true; + if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) + cube = false; + if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) + cube = false; + + SCALE_CUBE scube; + scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + // timeval t; // tic(t); @@ -373,22 +417,31 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else - cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else - cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else - cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ); } } @@ -414,7 +467,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, bool ConeFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer volume to array @@ -434,7 +487,7 @@ bool ConeFP(cudaPitchedPtr D_volumeData, ret = ConeFP_Array_internal(D_subprojData, dims, iEndAngle - iAngle, angles + iAngle, - fOutputScale); + params); if (!ret) break; } diff --git a/cuda/3d/cone_fp.h b/cuda/3d/cone_fp.h index 969a6d8..62c9caa 100644 --- a/cuda/3d/cone_fp.h +++ b/cuda/3d/cone_fp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool ConeFP_Array(cudaArray *D_volArray, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool ConeFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h index 5437a85..eb7045f 100644 --- a/cuda/3d/dims3d.h +++ b/cuda/3d/dims3d.h @@ -37,6 +37,13 @@ namespace astraCUDA3d { using astra::SConeProjection; using astra::SPar3DProjection; + +enum Cuda3DProjectionKernel { + ker3d_default = 0, + ker3d_sum_square_weights +}; + + struct SDimensions3D { unsigned int iVolX; unsigned int iVolY; @@ -44,8 +51,25 @@ struct SDimensions3D { unsigned int iProjAngles; unsigned int iProjU; // number of detectors in the U direction unsigned int iProjV; // number of detectors in the V direction +}; + +struct SProjectorParams3D { + SProjectorParams3D() : + iRaysPerDetDim(1), iRaysPerVoxelDim(1), + fOutputScale(1.0f), + fVolScaleX(1.0f), fVolScaleY(1.0f), fVolScaleZ(1.0f), + ker(ker3d_default), + bFDKWeighting(false) + { } + unsigned int iRaysPerDetDim; unsigned int iRaysPerVoxelDim; + float fOutputScale; + float fVolScaleX; + float fVolScaleY; + float fVolScaleZ; + Cuda3DProjectionKernel ker; + bool bFDKWeighting; }; } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 4899ad1..bbac0be 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -41,174 +41,26 @@ $Id$ #include "dims3d.h" #include "arith3d.h" +#include "cone_bp.h" #include "../2d/fft.h" -typedef texture<float, 3, cudaReadModeElementType> texture3D; - -static texture3D gT_coneProjTexture; +#include "../../include/astra/Logging.h" namespace astraCUDA3d { -static const unsigned int g_volBlockZ = 16; - -static const unsigned int g_anglesPerBlock = 64; -static const unsigned int g_volBlockX = 32; -static const unsigned int g_volBlockY = 16; - static const unsigned int g_anglesPerWeightBlock = 16; static const unsigned int g_detBlockU = 32; static const unsigned int g_detBlockV = 32; -static const unsigned g_MaxAngles = 2048; +static const unsigned g_MaxAngles = 12000; -__constant__ float gC_angle_sin[g_MaxAngles]; -__constant__ float gC_angle_cos[g_MaxAngles]; __constant__ float gC_angle[g_MaxAngles]; // per-detector u/v shifts? -static bool bindProjDataTexture(const cudaArray* array) -{ - cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); - - gT_coneProjTexture.addressMode[0] = cudaAddressModeBorder; - gT_coneProjTexture.addressMode[1] = cudaAddressModeBorder; - gT_coneProjTexture.addressMode[2] = cudaAddressModeBorder; - gT_coneProjTexture.filterMode = cudaFilterModeLinear; - gT_coneProjTexture.normalized = false; - - cudaBindTextureToArray(gT_coneProjTexture, array, channelDesc); - // TODO: error value? - - return true; -} - - -__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims) -{ - float* volData = (float*)D_volData; - - int endAngle = startAngle + g_anglesPerBlock; - if (endAngle > dims.iProjAngles) - endAngle = dims.iProjAngles; - - // threadIdx: x = rel x - // y = rel y - - // blockIdx: x = x + y - // y = z - - - // TO TRY: precompute part of detector intersection formulas in shared mem? - // TO TRY: inner loop over z, gather ray values in shared mem - - const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x; - const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y; - - if (X > dims.iVolX) - return; - if (Y > dims.iVolY) - return; - - const int startZ = blockIdx.y * g_volBlockZ; - int endZ = startZ + g_volBlockZ; - if (endZ > dims.iVolZ) - endZ = dims.iVolZ; - - float fX = X - 0.5f*dims.iVolX + 0.5f; - float fY = Y - 0.5f*dims.iVolY + 0.5f; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ; - - const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f; - const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ); - - // Note re. fZ/rV_base: the computations below are all relative to the - // optical axis, so we do the Z-adjustments beforehand. - - for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) - { - - float fVal = 0.0f; - float fAngle = startAngle + 0.5f; - - for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f) - { - - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - - const float fR = fSrcOrigin; - const float fD = fR - fX * sin_theta + fY * cos_theta; - float fWeight = fR / fD; - fWeight *= fWeight; - - const float fScaleFactor = (fR + fDetOrigin) / fD; - const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize; - const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize; - - fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV); - - } - - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal; -// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f; -// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); } - } - -} - - -bool FDK_BP(cudaPitchedPtr D_volumeData, - cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles) -{ - // transfer projections to array - - cudaArray* cuArray = allocateProjectionArray(dims); - transferProjectionsToArray(D_projData, cuArray, dims); - - bindProjDataTexture(cuArray); - - float* angle_sin = new float[dims.iProjAngles]; - float* angle_cos = new float[dims.iProjAngles]; - - for (unsigned int i = 0; i < dims.iProjAngles; ++i) { - angle_sin[i] = sinf(angles[i]); - angle_cos[i] = cosf(angles[i]); - } - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - assert(e1 == cudaSuccess); - assert(e2 == cudaSuccess); - - delete[] angle_sin; - delete[] angle_cos; - - dim3 dimBlock(g_volBlockX, g_volBlockY); - - dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); - - // timeval t; - // tic(t); - - for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims); - } - - cudaTextForceKernelsCompletion(); - - cudaFreeArray(cuArray); - - // printf("%f\n", toc(t)); - - return true; -} - -__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims) +__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -230,21 +82,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig const float fT = fCentralRayLength * fCentralRayLength + fU * fU; - float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ; + float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift; + + //const float fW = fCentralRayLength; + //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles; + const float fW1 = fSrcOrigin * fDetUSize * fDetVSize; + const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { const float fRayLength = sqrtf(fT + fV * fV); - const float fWeight = fCentralRayLength / fRayLength; + const float fWeight = fW / fRayLength; projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight; - fV += 1.0f; + fV += fDetVSize; } } -__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) +__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -305,7 +162,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un // Perform the FDK pre-weighting and filtering bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles) { @@ -318,23 +175,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, int projPitch = D_projData.pitch/sizeof(float); - devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims); + devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims); cudaTextForceKernelsCompletion(); - if (bShortScan) { + if (bShortScan && dims.iProjAngles > 1) { + ASTRA_DEBUG("Doing Parker weighting"); // We do short-scan Parker weighting - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles, + // First, determine (in a very basic way) the interval that's + // been scanned. We assume angles[0] is one of the endpoints of the + // range. + float fdA = angles[1] - angles[0]; + + while (fdA < -M_PI) + fdA += 2*M_PI; + while (fdA >= M_PI) + fdA -= 2*M_PI; + + float fAngleBase; + if (fdA >= 0.0f) { + // going up from angles[0] + fAngleBase = angles[dims.iProjAngles - 1]; + } else { + // going down from angles[0] + fAngleBase = angles[0]; + } + + // We pick the highest end of the range, and then + // move all angles so they fall in (-2pi,0] + + float *fRelAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + float f = angles[i] - fAngleBase; + while (f > 0) + f -= 2*M_PI; + while (f <= -2*M_PI) + f += 2*M_PI; + fRelAngles[i] = f; + + } + + cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); assert(!e1); + delete[] fRelAngles; - // TODO: detector pixel size! - float fCentralFanAngle = atanf((dims.iProjU*0.5f) / + float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) / (fSrcOrigin + fDetOrigin)); - devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims); + devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims); } @@ -344,10 +235,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, bool FDK_Filter(cudaPitchedPtr D_projData, cufftComplex * D_filter, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, - float fDetUSize, float fDetVSize, bool bShortScan, - const SDimensions3D& dims, const float* angles) + const SDimensions3D& dims) { // The filtering is a regular ramp filter per detector line. @@ -392,10 +280,9 @@ bool FDK_Filter(cudaPitchedPtr D_projData, bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan, - const float* filter) + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan, + const float* pfFilter) { bool ok; // Generate filter @@ -404,20 +291,53 @@ bool FDK(cudaPitchedPtr D_volumeData, int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + + // NB: We don't support arbitrary cone_vec geometries here. + // Only those that are vertical sub-geometries + // (cf. CompositeGeometryManager) of regular cone geometries. + assert(dims.iProjAngles > 0); + const SConeProjection& p0 = angles[0]; + + // assuming U is in the XY plane, V is parallel to Z axis + float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX; + float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY; + float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ; + + float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY); + float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY); + float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY); + float fDetVSize = abs(p0.fDetVZ); + + float fZShift = fDetCZ - p0.fSrcZ; + + float *pfAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + // FIXME: Sign/order + pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI; + } + + +#if 1 ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + fZShift, fDetUSize, fDetVSize, + bShortScan, dims, pfAngles); +#else + ok = true; +#endif + delete[] pfAngles; + if (!ok) return false; +#if 1 cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize]; memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); - if (filter==NULL){ + if (pfFilter == 0){ genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize); - }else{ - for(int i=0;i<dims.iProjAngles * iHalfFFTSize;i++){ - pHostFilter[i].x = filter[i]; + } else { + for (int i = 0; i < dims.iProjAngles * iHalfFFTSize; i++) { + pHostFilter[i].x = pfFilter[i]; pHostFilter[i].y = 0; } } @@ -433,28 +353,25 @@ bool FDK(cudaPitchedPtr D_volumeData, - ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + ok = FDK_Filter(D_projData, D_filter, dims); // Clean up filter freeComplexOnDevice(D_filter); - +#endif if (!ok) return false; // Perform BP - ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, - fDetUSize, fDetVSize, dims, angles); + params.bFDKWeighting = true; + + //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles); + ok = ConeBP(D_volumeData, D_projData, dims, angles, params); if (!ok) return false; - processVol3D<opMul>(D_volumeData, - (M_PI / 2.0f) / (float)dims.iProjAngles, dims); - return true; } diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h index 7a9d318..fc0df20 100644 --- a/cuda/3d/fdk.h +++ b/cuda/3d/fdk.h @@ -35,18 +35,16 @@ namespace astraCUDA3d { bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles); bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan, + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan, const float* filter); - } #endif diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 0320117..cb15ab9 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -38,6 +38,7 @@ $Id$ #include "cone_bp.h" #include "par3d_fp.h" #include "par3d_bp.h" +#include "fdk.h" #include "astra/Logging.h" @@ -193,17 +194,17 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel) { SDimensions3D dims; + SProjectorParams3D params; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; #if 1 - dims.iRaysPerDetDim = iDetectorSuperSampling; + params.iRaysPerDetDim = iDetectorSuperSampling; if (iDetectorSuperSampling == 0) return false; #else - dims.iRaysPerDetDim = 1; astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default; #endif @@ -211,11 +212,9 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale = 1.0f; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (pParProjs) { #if 0 @@ -230,10 +229,10 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con switch (projKernel) { case astra::ker3d_default: - ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); break; case astra::ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale); + ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); break; default: ok = false; @@ -241,7 +240,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con } else { switch (projKernel) { case astra::ker3d_default: - ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); break; default: ok = false; @@ -254,35 +253,59 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; #if 1 - dims.iRaysPerVoxelDim = iVoxelSuperSampling; -#else - dims.iRaysPerVoxelDim = 1; + params.iRaysPerVoxelDim = iVoxelSuperSampling; #endif SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale = 1.0f; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (pParProjs) - ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); else - ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); return ok; } +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter) +{ + SDimensions3D dims; + SProjectorParams3D params; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + params); + + if (!ok || !pConeProjs) + return false; + + ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan, pfFilter); + + return ok; + + + +} + diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 6fff80b..bb8b091 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan, const float *pfFilter = 0); } diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index cafab46..491ee2f 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -143,7 +143,7 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn } // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale) { float* volData = (float*)D_volData; @@ -174,13 +174,13 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star if (endZ > dims.iVolZ) endZ = dims.iVolZ; - float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; + float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; - const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + const float fSubStep = 1.0f/iRaysPerVoxelDim; - fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim); for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -201,11 +201,11 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star const float fCvc = gC_C[8*angle+7]; float fXs = fX; - for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { + for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) { float fYs = fY; - for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { + for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) { float fZs = fZ; - for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { + for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) { const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -228,10 +228,12 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { bindProjDataTexture(D_projArray); + float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; + for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) { unsigned int angleCount = g_MaxAngles; if (th + angleCount > dims.iProjAngles) @@ -274,10 +276,10 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (dims.iRaysPerVoxelDim == 1) + if (params.iRaysPerVoxelDim == 1) dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } cudaTextForceKernelsCompletion(); @@ -293,14 +295,14 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); + bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, params); cudaFreeArray(cuArray); diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index f1fc62d..3efad19 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 3ce3d42..6f9ce40 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z { __device__ float z(float f0, float f1, float f2) const { return f0; } }; +struct SCALE_CUBE { + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { + float fScale1; + float fScale2; + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; + // threadIdx: x = u detector @@ -136,11 +148,12 @@ struct DIR_Z { // y = angle block -template<class COORD> +template<class COORD, class SCALE> __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE sc) { COORD c; @@ -161,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -186,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -218,7 +230,8 @@ template<class COORD> __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, + SCALE_NONCUBE sc) { COORD c; @@ -239,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; - + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; @@ -251,7 +266,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, if (endSlice > c.nSlices(dims)) endSlice = c.nSlices(dims); - const float fSubStep = 1.0f/dims.iRaysPerDetDim; + const float fSubStep = 1.0f/iRaysPerDetDim; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { @@ -259,9 +274,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, float fV = 0.0f; float fdU = detectorU - 0.5f + 0.5f*fSubStep; - for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { + for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { float fdV = detectorV - 0.5f + 0.5f*fSubStep; - for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { /* Trace ray in direction Ray to (detectorU,detectorV) from */ /* X = startSlice to X = endSlice */ @@ -274,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; float fVal = 0.0f; @@ -295,13 +307,13 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, f2 += a2; } - fVal *= fDistCorr; fV += fVal; } } - D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); + fV *= fDistCorr; + D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -324,7 +336,8 @@ template<class COORD> __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE_NONCUBE sc) { COORD c; @@ -345,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -370,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -385,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; + fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)); f0 += 1.0f; f1 += a1; f2 += a2; } + fVal *= fDistCorr * fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal; } } @@ -401,7 +414,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -436,6 +449,36 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, unsigned int blockEnd = 0; int blockDirection = 0; + bool cube = true; + if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) + cube = false; + if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) + cube = false; + + SCALE_CUBE scube; + scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + // timeval t; // tic(t); @@ -473,22 +516,31 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else - par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else - par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + if (cube) + par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else - par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ); } } @@ -514,7 +566,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, bool Par3DFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer volume to array cudaArray* cuArray = allocateVolumeArray(dims); @@ -533,7 +585,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, ret = Par3DFP_Array_internal(D_subprojData, dims, iEndAngle - iAngle, angles + iAngle, - fOutputScale); + params); if (!ret) break; } @@ -548,7 +600,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -583,6 +635,28 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, unsigned int blockEnd = 0; int blockDirection = 0; + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + + // timeval t; // tic(t); @@ -620,8 +694,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else #if 0 par3D_FP_SS_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -630,8 +704,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else #if 0 par3D_FP_SS_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -640,8 +714,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else #if 0 par3D_FP_SS_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); diff --git a/cuda/3d/par3d_fp.h b/cuda/3d/par3d_fp.h index 41f204f..5f65cd9 100644 --- a/cuda/3d/par3d_fp.h +++ b/cuda/3d/par3d_fp.h @@ -34,17 +34,17 @@ namespace astraCUDA3d { _AstraExport bool Par3DFP_Array(cudaArray *D_volArray, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 713944b..ba6a159 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -353,7 +353,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData, SIRT sirt; bool ok = true; - ok &= sirt.setConeGeometry(dims, angles, 1.0f); + ok &= sirt.setConeGeometry(dims, angles, SProjectorParams3D()); if (D_maskData.ptr) ok &= sirt.enableVolumeMask(); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 18dd72f..d30ffd1 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -55,6 +55,11 @@ struct SGPUParams { size_t memory; }; +struct SFDKSettings { + bool bShortScan; + const float *pfFilter; +}; + class _AstraExport CCompositeGeometryManager { public: @@ -127,9 +132,10 @@ public: CProjector3D *pProjector; // For a `global' geometry. It will not match // the geometries of the input and output. + SFDKSettings FDKSettings; enum { - JOB_FP, JOB_BP, JOB_NOP + JOB_FP, JOB_BP, JOB_FDK, JOB_NOP } eType; enum { MODE_ADD, MODE_SET @@ -155,6 +161,9 @@ public: CFloat32ProjectionData3DMemory *pProjData); bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, CFloat32ProjectionData3DMemory *pProjData); + bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData, bool bShortScan, + const float *pfFilter = 0); bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); diff --git a/include/astra/GeometryUtil3D.h b/include/astra/GeometryUtil3D.h index e4d73e4..e051240 100644 --- a/include/astra/GeometryUtil3D.h +++ b/include/astra/GeometryUtil3D.h @@ -56,19 +56,19 @@ struct SConeProjection { fDetSZ += dz; } - void scale(double factor) { - fSrcX *= factor; - fSrcY *= factor; - fSrcZ *= factor; - fDetSX *= factor; - fDetSY *= factor; - fDetSZ *= factor; - fDetUX *= factor; - fDetUY *= factor; - fDetUZ *= factor; - fDetVX *= factor; - fDetVY *= factor; - fDetVZ *= factor; + void scale(double fx, double fy, double fz) { + fSrcX *= fx; + fSrcY *= fy; + fSrcZ *= fz; + fDetSX *= fx; + fDetSY *= fy; + fDetSZ *= fz; + fDetUX *= fx; + fDetUY *= fy; + fDetUZ *= fz; + fDetVX *= fx; + fDetVY *= fy; + fDetVZ *= fz; } }; @@ -93,19 +93,19 @@ struct SPar3DProjection { fDetSY += dy; fDetSZ += dz; } - void scale(double factor) { - fRayX *= factor; - fRayY *= factor; - fRayZ *= factor; - fDetSX *= factor; - fDetSY *= factor; - fDetSZ *= factor; - fDetUX *= factor; - fDetUY *= factor; - fDetUZ *= factor; - fDetVX *= factor; - fDetVY *= factor; - fDetVZ *= factor; + void scale(double fx, double fy, double fz) { + fRayX *= fx; + fRayY *= fy; + fRayZ *= fz; + fDetSX *= fx; + fDetSY *= fy; + fDetSZ *= fz; + fDetUX *= fx; + fDetUY *= fy; + fDetUZ *= fz; + fDetVX *= fx; + fDetVY *= fy; + fDetVZ *= fz; } }; diff --git a/include/astra/Singleton.h b/include/astra/Singleton.h index 784c521..9d3c088 100644 --- a/include/astra/Singleton.h +++ b/include/astra/Singleton.h @@ -45,11 +45,7 @@ class Singleton { public: // constructor - Singleton() { - assert(!m_singleton); - int offset = (uintptr_t)(T*)1 - (uintptr_t)(Singleton<T>*)(T*)1; - m_singleton = (T*)((uintptr_t)this + offset); - }; + Singleton() { } // destructor virtual ~Singleton() { diff --git a/include/astra/Utilities.h b/include/astra/Utilities.h index 8d7c44d..22adfe2 100644 --- a/include/astra/Utilities.h +++ b/include/astra/Utilities.h @@ -85,6 +85,9 @@ _AstraExport std::string doubleToString(double f); template<typename T> _AstraExport std::string toString(T f); + +_AstraExport void splitString(std::vector<std::string> &items, const std::string& s, const char *delim); + } diff --git a/matlab/mex/mexHelpFunctions.cpp b/matlab/mex/mexHelpFunctions.cpp index 13c4ade..d957aea 100644 --- a/matlab/mex/mexHelpFunctions.cpp +++ b/matlab/mex/mexHelpFunctions.cpp @@ -33,11 +33,6 @@ $Id$ #include "mexHelpFunctions.h" #include "astra/Utilities.h" -#include <algorithm> -#include <boost/algorithm/string.hpp> -#include <boost/algorithm/string/split.hpp> -#include <boost/algorithm/string/classification.hpp> - using namespace std; using namespace astra; @@ -362,8 +357,8 @@ mxArray* stringToMxArray(std::string input) // split rows std::vector<std::string> row_strings; std::vector<std::string> col_strings; - boost::split(row_strings, input, boost::is_any_of(";")); - boost::split(col_strings, row_strings[0], boost::is_any_of(",")); + StringUtil::splitString(row_strings, input, ";"); + StringUtil::splitString(col_strings, row_strings[0], ","); // get dimensions int rows = row_strings.size(); @@ -375,7 +370,7 @@ mxArray* stringToMxArray(std::string input) // loop elements for (unsigned int row = 0; row < rows; row++) { - boost::split(col_strings, row_strings[row], boost::is_any_of(",")); + StringUtil::splitString(col_strings, row_strings[row], ","); // check size for (unsigned int col = 0; col < col_strings.size(); col++) { out[col*rows + row] = StringUtil::stringToFloat(col_strings[col]); @@ -389,7 +384,7 @@ mxArray* stringToMxArray(std::string input) // split std::vector<std::string> items; - boost::split(items, input, boost::is_any_of(",")); + StringUtil::splitString(items, input, ","); // init matrix mxArray* pVector = mxCreateDoubleMatrix(1, items.size(), mxREAL); @@ -402,16 +397,13 @@ mxArray* stringToMxArray(std::string input) return pVector; } - // number - char* end; - double content = ::strtod(input.c_str(), &end); - bool isnumber = !*end; - if (isnumber) { - return mxCreateDoubleScalar(content); + try { + // number + return mxCreateDoubleScalar(StringUtil::stringToDouble(input)); + } catch (const StringUtil::bad_cast &) { + // string + return mxCreateString(input.c_str()); } - - // string - return mxCreateString(input.c_str()); } //----------------------------------------------------------------------------------------- // turn a c++ map into a matlab struct diff --git a/python/astra/src/PythonPluginAlgorithm.cpp b/python/astra/src/PythonPluginAlgorithm.cpp index 617c0f4..893db94 100644 --- a/python/astra/src/PythonPluginAlgorithm.cpp +++ b/python/astra/src/PythonPluginAlgorithm.cpp @@ -31,8 +31,6 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. #include "astra/Logging.h" #include "astra/Utilities.h" -#include <boost/algorithm/string.hpp> -#include <boost/algorithm/string/split.hpp> #include <iostream> #include <fstream> #include <string> @@ -146,7 +144,7 @@ CPythonPluginAlgorithmFactory::~CPythonPluginAlgorithmFactory(){ PyObject * getClassFromString(std::string str){ std::vector<std::string> items; - boost::split(items, str, boost::is_any_of(".")); + StringUtil::splitString(items, str, "."); PyObject *pyclass = PyImport_ImportModule(items[0].c_str()); if(pyclass==NULL){ logPythonError(); @@ -303,10 +301,10 @@ PyObject * pyStringFromString(std::string str){ PyObject* stringToPythonValue(std::string str){ if(str.find(";")!=std::string::npos){ std::vector<std::string> rows, row; - boost::split(rows, str, boost::is_any_of(";")); + StringUtil::splitString(rows, str, ";"); PyObject *mat = PyList_New(rows.size()); for(unsigned int i=0; i<rows.size(); i++){ - boost::split(row, rows[i], boost::is_any_of(",")); + StringUtil::splitString(row, rows[i], ","); PyObject *rowlist = PyList_New(row.size()); for(unsigned int j=0;j<row.size();j++){ PyList_SetItem(rowlist, j, PyFloat_FromDouble(StringUtil::stringToDouble(row[j]))); @@ -317,7 +315,7 @@ PyObject* stringToPythonValue(std::string str){ } if(str.find(",")!=std::string::npos){ std::vector<std::string> vec; - boost::split(vec, str, boost::is_any_of(",")); + StringUtil::splitString(vec, str, ","); PyObject *veclist = PyList_New(vec.size()); for(unsigned int i=0;i<vec.size();i++){ PyList_SetItem(veclist, i, PyFloat_FromDouble(StringUtil::stringToDouble(vec[i]))); diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 084ba8c..5879aec 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div newjob.eType = j->eType; newjob.eMode = j->eMode; newjob.pProjector = j->pProjector; + newjob.FDKSettings = j->FDKSettings; CPart* input = job.pInput->reduce(outputPart.get()); @@ -196,6 +197,69 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div return true; } + +static std::pair<double, double> reduceProjectionVertical(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom) +{ + double vmin_g, vmax_g; + + // reduce self to only cover intersection with projection of VolumePart + // (Project corners of volume, take bounding box) + + for (int i = 0; i < pProjGeom->getProjectionCount(); ++i) { + + double vol_u[8]; + double vol_v[8]; + + double pixx = pVolGeom->getPixelLengthX(); + double pixy = pVolGeom->getPixelLengthY(); + double pixz = pVolGeom->getPixelLengthZ(); + + // TODO: Is 0.5 sufficient? + double xmin = pVolGeom->getWindowMinX() - 0.5 * pixx; + double xmax = pVolGeom->getWindowMaxX() + 0.5 * pixx; + double ymin = pVolGeom->getWindowMinY() - 0.5 * pixy; + double ymax = pVolGeom->getWindowMaxY() + 0.5 * pixy; + double zmin = pVolGeom->getWindowMinZ() - 0.5 * pixz; + double zmax = pVolGeom->getWindowMaxZ() + 0.5 * pixz; + + pProjGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]); + pProjGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]); + pProjGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]); + pProjGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]); + pProjGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]); + pProjGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]); + pProjGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]); + pProjGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]); + + double vmin = vol_v[0]; + double vmax = vol_v[0]; + + for (int j = 1; j < 8; ++j) { + if (vol_v[j] < vmin) + vmin = vol_v[j]; + if (vol_v[j] > vmax) + vmax = vol_v[j]; + } + + if (i == 0 || vmin < vmin_g) + vmin_g = vmin; + if (i == 0 || vmax > vmax_g) + vmax_g = vmax; + } + + if (vmin_g < -1.0) + vmin_g = -1.0; + if (vmax_g > pProjGeom->getDetectorRowCount()) + vmax_g = pProjGeom->getDetectorRowCount(); + + if (vmin_g >= vmax_g) + vmin_g = vmax_g = 0.0; + + return std::pair<double, double>(vmin_g, vmax_g); + +} + + CCompositeGeometryManager::CPart::CPart(const CPart& other) { eType = other.eType; @@ -236,119 +300,135 @@ size_t CCompositeGeometryManager::CPart::getSize() } +static bool testVolumeRange(const std::pair<double, double>& fullRange, + const CVolumeGeometry3D *pVolGeom, + const CProjectionGeometry3D *pProjGeom, + int zmin, int zmax) +{ + double pixz = pVolGeom->getPixelLengthZ(); + + CVolumeGeometry3D test(pVolGeom->getGridColCount(), + pVolGeom->getGridRowCount(), + zmax - zmin, + pVolGeom->getWindowMinX(), + pVolGeom->getWindowMinY(), + pVolGeom->getWindowMinZ() + zmin * pixz, + pVolGeom->getWindowMaxX(), + pVolGeom->getWindowMaxY(), + pVolGeom->getWindowMinZ() + zmax * pixz); + + + std::pair<double, double> subRange = reduceProjectionVertical(&test, pProjGeom); + + // empty + if (subRange.first == subRange.second) + return true; + + // fully outside of fullRange + if (subRange.first >= fullRange.second || subRange.second <= fullRange.first) + return true; + + return false; +} + CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other) { const CProjectionPart *other = dynamic_cast<const CProjectionPart *>(_other); assert(other); - // TODO: Is 0.5 sufficient? - double umin = -0.5; - double umax = other->pGeom->getDetectorColCount() + 0.5; - double vmin = -0.5; - double vmax = other->pGeom->getDetectorRowCount() + 0.5; - - double uu[4]; - double vv[4]; - uu[0] = umin; vv[0] = vmin; - uu[1] = umin; vv[1] = vmax; - uu[2] = umax; vv[2] = vmin; - uu[3] = umax; vv[3] = vmax; - - double pixx = pGeom->getPixelLengthX(); - double pixy = pGeom->getPixelLengthY(); - double pixz = pGeom->getPixelLengthZ(); - double xmin = pGeom->getWindowMinX() - 0.5 * pixx; - double xmax = pGeom->getWindowMaxX() + 0.5 * pixx; - double ymin = pGeom->getWindowMinY() - 0.5 * pixy; - double ymax = pGeom->getWindowMaxY() + 0.5 * pixy; - - // NB: Flipped - double zmax = pGeom->getWindowMinZ() - 2.5 * pixz; - double zmin = pGeom->getWindowMaxZ() + 2.5 * pixz; - - // TODO: This isn't as tight as it could be. - // In particular it won't detect the detector being - // missed entirely on the u side. - - for (int i = 0; i < other->pGeom->getProjectionCount(); ++i) { - for (int j = 0; j < 4; ++j) { - double px, py, pz; - - other->pGeom->backprojectPointX(i, uu[j], vv[j], xmin, py, pz); - //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax); - if (pz < zmin) zmin = pz; - if (pz > zmax) zmax = pz; - other->pGeom->backprojectPointX(i, uu[j], vv[j], xmax, py, pz); - //ASTRA_DEBUG("%f %f (%f - %f)", py, pz, ymin, ymax); - if (pz < zmin) zmin = pz; - if (pz > zmax) zmax = pz; - - other->pGeom->backprojectPointY(i, uu[j], vv[j], ymin, px, pz); - //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax); - if (pz < zmin) zmin = pz; - if (pz > zmax) zmax = pz; - other->pGeom->backprojectPointY(i, uu[j], vv[j], ymax, px, pz); - //ASTRA_DEBUG("%f %f (%f - %f)", px, pz, xmin, xmax); - if (pz < zmin) zmin = pz; - if (pz > zmax) zmax = pz; + std::pair<double, double> fullRange = reduceProjectionVertical(pGeom, other->pGeom); + + int top_slice = 0, bottom_slice = 0; + + if (fullRange.first < fullRange.second) { + + + // TOP SLICE + + int zmin = 0; + int zmax = pGeom->getGridSliceCount()-1; // (Don't try empty region) + + // Setting top slice to zmin is always valid. + + while (zmin < zmax) { + int zmid = (zmin + zmax + 1) / 2; + + bool ok = testVolumeRange(fullRange, pGeom, other->pGeom, + 0, zmid); + + ASTRA_DEBUG("binsearch min: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much"); + + if (ok) + zmin = zmid; + else + zmax = zmid - 1; } - } - //ASTRA_DEBUG("coord extent: %f - %f", zmin, zmax); + top_slice = zmin; + + + // BOTTOM SLICE + + zmin = top_slice + 1; // (Don't try empty region) + zmax = pGeom->getGridSliceCount(); + + // Setting bottom slice to zmax is always valid - // Clip both zmin and zmax to get rid of extreme (or infinite) values - // NB: When individual pz values are +/-Inf, the sign is determined - // by ray direction and on which side of the face the ray passes. - if (zmin < pGeom->getWindowMinZ() - 2*pixz) - zmin = pGeom->getWindowMinZ() - 2*pixz; - if (zmin > pGeom->getWindowMaxZ() + 2*pixz) - zmin = pGeom->getWindowMaxZ() + 2*pixz; - if (zmax < pGeom->getWindowMinZ() - 2*pixz) - zmax = pGeom->getWindowMinZ() - 2*pixz; - if (zmax > pGeom->getWindowMaxZ() + 2*pixz) - zmax = pGeom->getWindowMaxZ() + 2*pixz; + while (zmin < zmax) { + int zmid = (zmin + zmax) / 2; - zmin = (zmin - pixz - pGeom->getWindowMinZ()) / pixz; - zmax = (zmax + pixz - pGeom->getWindowMinZ()) / pixz; + bool ok = testVolumeRange(fullRange, pGeom, other->pGeom, + zmid, pGeom->getGridSliceCount()); - int _zmin = (int)floor(zmin); - int _zmax = (int)ceil(zmax); + ASTRA_DEBUG("binsearch max: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much"); - //ASTRA_DEBUG("index extent: %d - %d", _zmin, _zmax); + if (ok) + zmax = zmid; + else + zmin = zmid + 1; - if (_zmin < 0) - _zmin = 0; - if (_zmax > pGeom->getGridSliceCount()) - _zmax = pGeom->getGridSliceCount(); + } + + bottom_slice = zmax; - if (_zmax <= _zmin) { - _zmin = _zmax = 0; } - //ASTRA_DEBUG("adjusted extent: %d - %d", _zmin, _zmax); + + ASTRA_DEBUG("found extent: %d - %d", top_slice, bottom_slice); + + top_slice -= 1; + if (top_slice < 0) + top_slice = 0; + bottom_slice += 1; + if (bottom_slice >= pGeom->getGridSliceCount()) + bottom_slice = pGeom->getGridSliceCount(); + + ASTRA_DEBUG("adjusted extent: %d - %d", top_slice, bottom_slice); + + double pixz = pGeom->getPixelLengthZ(); CVolumePart *sub = new CVolumePart(); sub->subX = this->subX; sub->subY = this->subY; - sub->subZ = this->subZ + _zmin; + sub->subZ = this->subZ + top_slice; sub->pData = pData; - if (_zmin == _zmax) { + if (top_slice == bottom_slice) { sub->pGeom = 0; } else { sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), pGeom->getGridRowCount(), - _zmax - _zmin, + bottom_slice - top_slice, pGeom->getWindowMinX(), pGeom->getWindowMinY(), - pGeom->getWindowMinZ() + _zmin * pixz, + pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMaxX(), pGeom->getWindowMaxY(), - pGeom->getWindowMinZ() + _zmax * pixz); + pGeom->getWindowMinZ() + bottom_slice * pixz); } - ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + _zmin, this->subZ + _zmax); + ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d ( %f - %f )", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + top_slice, this->subZ + bottom_slice, pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMinZ() + bottom_slice * pixz); return sub; } @@ -583,7 +663,9 @@ void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::T size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); - int rem = sliceCount % blockSize; + int rem = blockSize - (sliceCount % blockSize); + if (rem == blockSize) + rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); @@ -630,7 +712,9 @@ void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::T size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); - int rem = sliceCount % blockSize; + int rem = blockSize - (sliceCount % blockSize); + if (rem == blockSize) + rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); @@ -677,7 +761,9 @@ void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::T size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); - int rem = sliceCount % blockSize; + int rem = blockSize - (sliceCount % blockSize); + if (rem == blockSize) + rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); @@ -742,62 +828,16 @@ void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, s } + CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other) { const CVolumePart *other = dynamic_cast<const CVolumePart *>(_other); assert(other); - double vmin_g, vmax_g; - - // reduce self to only cover intersection with projection of VolumePart - // (Project corners of volume, take bounding box) - - for (int i = 0; i < pGeom->getProjectionCount(); ++i) { - - double vol_u[8]; - double vol_v[8]; - - double pixx = other->pGeom->getPixelLengthX(); - double pixy = other->pGeom->getPixelLengthY(); - double pixz = other->pGeom->getPixelLengthZ(); - - // TODO: Is 0.5 sufficient? - double xmin = other->pGeom->getWindowMinX() - 0.5 * pixx; - double xmax = other->pGeom->getWindowMaxX() + 0.5 * pixx; - double ymin = other->pGeom->getWindowMinY() - 0.5 * pixy; - double ymax = other->pGeom->getWindowMaxY() + 0.5 * pixy; - double zmin = other->pGeom->getWindowMinZ() - 0.5 * pixz; - double zmax = other->pGeom->getWindowMaxZ() + 0.5 * pixz; - - pGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]); - pGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]); - pGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]); - pGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]); - pGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]); - pGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]); - pGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]); - pGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]); - - double vmin = vol_v[0]; - double vmax = vol_v[0]; - - for (int j = 1; j < 8; ++j) { - if (vol_v[j] < vmin) - vmin = vol_v[j]; - if (vol_v[j] > vmax) - vmax = vol_v[j]; - } - - if (i == 0 || vmin < vmin_g) - vmin_g = vmin; - if (i == 0 || vmax > vmax_g) - vmax_g = vmax; - } - - // fprintf(stderr, "v extent: %f %f\n", vmin_g, vmax_g); - - int _vmin = (int)floor(vmin_g - 1.0f); - int _vmax = (int)ceil(vmax_g + 1.0f); + std::pair<double, double> r = reduceProjectionVertical(other->pGeom, pGeom); + // fprintf(stderr, "v extent: %f %f\n", r.first, r.second); + int _vmin = (int)floor(r.first - 1.0); + int _vmax = (int)ceil(r.second + 1.0); if (_vmin < 0) _vmin = 0; if (_vmax > pGeom->getDetectorRowCount()) @@ -837,7 +877,11 @@ void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManage size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); - int rem = sliceCount % blockSize; + int rem = blockSize - (sliceCount % blockSize); + if (rem == blockSize) + rem = 0; + + ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int x = -(rem / 2); x < sliceCount; x += blockSize) { int newsubX = x; @@ -879,7 +923,11 @@ void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManage size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); - int rem = sliceCount % blockSize; + int rem = blockSize - (sliceCount % blockSize); + if (rem == blockSize) + rem = 0; + + ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int z = -(rem / 2); z < sliceCount; z += blockSize) { int newsubZ = z; @@ -992,6 +1040,27 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat return doJobs(L); } + +bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData, bool bShortScan, + const float *pfFilter) +{ + if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) { + ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required"); + return false; + } + + SJob job = createJobBP(pProjector, pVolData, pProjData); + job.eType = SJob::JOB_FDK; + job.FDKSettings.bShortScan = bShortScan; + job.FDKSettings.pfFilter = pfFilter; + + TJobList L; + L.push_back(job); + + return doJobs(L); +} + bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData) { ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume"); @@ -1185,7 +1254,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + switch (j.eType) { + case CCompositeGeometryManager::SJob::JOB_FP: + { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); @@ -1194,7 +1265,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); if (!ok) ASTRA_ERROR("Error performing sub-FP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + } + break; + case CCompositeGeometryManager::SJob::JOB_BP: + { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); @@ -1203,7 +1277,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); if (!ok) ASTRA_ERROR("Error performing sub-BP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { + } + break; + case CCompositeGeometryManager::SJob::JOB_FDK: + { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + + if (srcdims.subx || srcdims.suby) { + ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK"); + ok = false; + } else { + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK"); + + ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter); + if (!ok) ASTRA_ERROR("Error performing sub-FDK"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done"); + } + } + break; + default: assert(false); } diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp index bc15a3d..15abed4 100644 --- a/src/CudaFDKAlgorithm3D.cpp +++ b/src/CudaFDKAlgorithm3D.cpp @@ -32,6 +32,7 @@ $Id$ #include "astra/CudaProjector3D.h" #include "astra/ConeProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h" #include "astra/Logging.h" @@ -84,6 +85,17 @@ bool CCudaFDKAlgorithm3D::_check() const CProjectionGeometry3D* projgeom = m_pSinogram->getGeometry(); ASTRA_CONFIG_CHECK(dynamic_cast<const CConeProjectionGeometry3D*>(projgeom), "CUDA_FDK", "Error setting FDK geometry"); + + const CVolumeGeometry3D* volgeom = m_pReconstruction->getGeometry(); + bool cube = true; + if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthY() - 1.0) > 0.00001) + cube = false; + if (abs(volgeom->getPixelLengthX() / volgeom->getPixelLengthZ() - 1.0) > 0.00001) + cube = false; + ASTRA_CONFIG_CHECK(cube, "CUDA_FDK", "Voxels must be cubes for FDK"); + + + return true; } @@ -219,20 +231,28 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations) CFloat32VolumeData3DMemory* pReconMem = dynamic_cast<CFloat32VolumeData3DMemory*>(m_pReconstruction); ASTRA_ASSERT(pReconMem); - - bool ok = true; - const float *filter = NULL; - if(m_iFilterDataId!=-1){ - const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(m_iFilterDataId)); - filter = pFilterData->getDataConst(); + if (m_iFilterDataId != -1) { + const CFloat32ProjectionData2D *pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(m_iFilterDataId)); + if (pFilterData) + filter = pFilterData->getDataConst(); } +#if 0 + bool ok = true; + ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(), &volgeom, conegeom, m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling, filter); ASTRA_ASSERT(ok); +#endif + + CCompositeGeometryManager cgm; + + cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan, filter); + + } //---------------------------------------------------------------------------------------- diff --git a/src/Utilities.cpp b/src/Utilities.cpp index c9740bf..8b0ca94 100644 --- a/src/Utilities.cpp +++ b/src/Utilities.cpp @@ -28,10 +28,6 @@ $Id$ #include "astra/Utilities.h" -#include <boost/algorithm/string.hpp> -#include <boost/algorithm/string/split.hpp> -#include <boost/algorithm/string/classification.hpp> - #include <sstream> #include <locale> #include <iomanip> @@ -84,18 +80,16 @@ std::vector<double> stringToDoubleVector(const std::string &s) template<typename T> std::vector<T> stringToVector(const std::string& s) { - // split - std::vector<std::string> items; - boost::split(items, s, boost::is_any_of(",;")); - - // init list std::vector<T> out; - out.resize(items.size()); + size_t current = 0; + size_t next; + do { + next = s.find_first_of(",;", current); + std::string t = s.substr(current, next - current); + out.push_back(stringTo<T>(t)); + current = next + 1; + } while (next != std::string::npos); - // loop elements - for (unsigned int i = 0; i < items.size(); i++) { - out[i] = stringTo<T>(items[i]); - } return out; } @@ -120,6 +114,18 @@ std::string doubleToString(double f) template<> std::string toString(float f) { return floatToString(f); } template<> std::string toString(double f) { return doubleToString(f); } +void splitString(std::vector<std::string> &items, const std::string& s, + const char *delim) +{ + items.clear(); + size_t current = 0; + size_t next; + do { + next = s.find_first_of(",;", current); + items.push_back(s.substr(current, next - current)); + current = next + 1; + } while (next != std::string::npos); +} } |