From 559d3e599b7306e2de64f2a584d72bc5c98b692b Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Thu, 4 Feb 2016 13:56:06 +0100 Subject: Refactor CUDA projector params into struct --- cuda/3d/algo3d.cu | 21 +++++---- cuda/3d/algo3d.h | 5 +- cuda/3d/astra3d.cu | 128 ++++++++++++++++++++++------------------------------ cuda/3d/astra3d.h | 11 ++--- cuda/3d/cgls3d.cu | 2 +- cuda/3d/cone_bp.cu | 30 ++++++------ cuda/3d/cone_bp.h | 4 +- cuda/3d/cone_fp.cu | 34 +++++++------- cuda/3d/cone_fp.h | 4 +- cuda/3d/dims3d.h | 18 ++++++++ cuda/3d/mem3d.cu | 27 +++++------ cuda/3d/par3d_bp.cu | 30 ++++++------ cuda/3d/par3d_bp.h | 4 +- cuda/3d/par3d_fp.cu | 48 ++++++++++---------- cuda/3d/par3d_fp.h | 6 +-- cuda/3d/sirt3d.cu | 2 +- 16 files changed, 185 insertions(+), 189 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu index cc86b70..e2c1e43 100644 --- a/cuda/3d/algo3d.cu +++ b/cuda/3d/algo3d.cu @@ -41,7 +41,6 @@ ReconAlgo3D::ReconAlgo3D() coneProjs = 0; par3DProjs = 0; shouldAbort = false; - fOutputScale = 1.0f; } ReconAlgo3D::~ReconAlgo3D() @@ -58,10 +57,10 @@ void ReconAlgo3D::reset() shouldAbort = false; } -bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale) +bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, const SProjectorParams3D& _params) { dims = _dims; - fOutputScale = _outputScale; + params = _params; coneProjs = new SConeProjection[dims.iProjAngles]; par3DProjs = 0; @@ -71,10 +70,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject return true; } -bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale) +bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, const SProjectorParams3D& _params) { dims = _dims; - fOutputScale = _outputScale; + params = _params; par3DProjs = new SPar3DProjection[dims.iProjAngles]; coneProjs = 0; @@ -89,10 +88,12 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData, cudaPitchedPtr& D_projData, float outputScale) { + SProjectorParams3D p = params; + p.fOutputScale *= outputScale; if (coneProjs) { - return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); + return ConeFP(D_volumeData, D_projData, dims, coneProjs, p); } else { - return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); + return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, p); } } @@ -100,10 +101,12 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData, cudaPitchedPtr& D_projData, float outputScale) { + SProjectorParams3D p = params; + p.fOutputScale *= outputScale; if (coneProjs) { - return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale); + return ConeBP(D_volumeData, D_projData, dims, coneProjs, p); } else { - return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale); + return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, p); } } diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h index 886b092..ce331ad 100644 --- a/cuda/3d/algo3d.h +++ b/cuda/3d/algo3d.h @@ -39,8 +39,8 @@ public: ReconAlgo3D(); ~ReconAlgo3D(); - bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale); - bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale); + bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, const SProjectorParams3D& params); + bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, const SProjectorParams3D& params); void signalAbort() { shouldAbort = true; } @@ -55,6 +55,7 @@ protected: float outputScale); SDimensions3D dims; + SProjectorParams3D params; SConeProjection* coneProjs; SPar3DProjection* par3DProjs; diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 8328229..454530e 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -67,7 +67,7 @@ template static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, unsigned int iProjectionAngleCount, ProjectionT*& pProjs, - float& fOutputScale) + SProjectorParams3D& params) { assert(pVolGeom); assert(pProjs); @@ -94,7 +94,7 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, } // CHECKME: Check factor - fOutputScale *= pVolGeom->getPixelLengthX(); + params.fOutputScale *= pVolGeom->getPixelLengthX(); return true; } @@ -108,10 +108,8 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, dims.iVolY = pVolGeom->getGridRowCount(); dims.iVolZ = pVolGeom->getGridSliceCount(); dims.iProjAngles = pProjGeom->getProjectionCount(); - dims.iProjU = pProjGeom->getDetectorColCount(), - dims.iProjV = pProjGeom->getDetectorRowCount(), - dims.iRaysPerDetDim = 1; - dims.iRaysPerVoxelDim = 1; + dims.iProjU = pProjGeom->getDetectorColCount(); + dims.iProjV = pProjGeom->getDetectorRowCount(); if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0) return false; @@ -124,7 +122,7 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom, bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CParallelProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale) + SPar3DProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -141,16 +139,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CParallelVecProjectionGeometry3D* pProjGeom, - SPar3DProjection*& pProjs, float& fOutputScale) + SPar3DProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -164,16 +160,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CConeProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale) + SConeProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -192,16 +186,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CConeVecProjectionGeometry3D* pProjGeom, - SConeProjection*& pProjs, float& fOutputScale) + SConeProjection*& pProjs, SProjectorParams3D& params) { assert(pVolGeom); assert(pProjGeom); @@ -215,9 +207,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; - fOutputScale = 1.0f; - - ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params); return ok; } @@ -227,7 +217,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom, SPar3DProjection*& pParProjs, SConeProjection*& pConeProjs, - float& fOutputScale) + SProjectorParams3D& params) { const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom); const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom); @@ -240,13 +230,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, bool ok; if (conegeom) { - ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, params); } else if (conevec3dgeom) { - ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, params); } else if (par3dgeom) { - ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, params); } else if (parvec3dgeom) { - ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale); + ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, params); } else { ok = false; } @@ -260,19 +250,16 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, class AstraSIRT3d_internal { public: SDimensions3D dims; + SProjectorParams3D params; CUDAProjectionType3d projType; float* angles; float fOriginSourceDistance; float fOriginDetectorDistance; - float fSourceZ; - float fDetSize; SConeProjection* projs; SPar3DProjection* parprojs; - float fOutputScale; - bool initialized; bool setStartReconstruction; @@ -304,12 +291,9 @@ AstraSIRT3d::AstraSIRT3d() pData->dims.iProjAngles = 0; pData->dims.iProjU = 0; pData->dims.iProjV = 0; - pData->dims.iRaysPerDetDim = 1; - pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; pData->parprojs = 0; - pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -358,7 +342,7 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom, ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - pData->fOutputScale); + pData->params); if (!ok) return false; @@ -383,8 +367,8 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling, if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0) return false; - pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; - pData->dims.iRaysPerDetDim = iDetectorSuperSampling; + pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; + pData->params.iRaysPerDetDim = iDetectorSuperSampling; return true; } @@ -436,9 +420,9 @@ bool AstraSIRT3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); + ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->params); } else { - ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); + ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->params); } if (!ok) @@ -639,19 +623,16 @@ float AstraSIRT3d::computeDiffNorm() class AstraCGLS3d_internal { public: SDimensions3D dims; + SProjectorParams3D params; CUDAProjectionType3d projType; float* angles; float fOriginSourceDistance; float fOriginDetectorDistance; - float fSourceZ; - float fDetSize; SConeProjection* projs; SPar3DProjection* parprojs; - float fOutputScale; - bool initialized; bool setStartReconstruction; @@ -683,12 +664,9 @@ AstraCGLS3d::AstraCGLS3d() pData->dims.iProjAngles = 0; pData->dims.iProjU = 0; pData->dims.iProjV = 0; - pData->dims.iRaysPerDetDim = 1; - pData->dims.iRaysPerVoxelDim = 1; pData->projs = 0; pData->parprojs = 0; - pData->fOutputScale = 1.0f; pData->initialized = false; pData->setStartReconstruction = false; @@ -737,7 +715,7 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom, ok = convertAstraGeometry(pVolGeom, pProjGeom, pData->parprojs, pData->projs, - pData->fOutputScale); + pData->params); if (!ok) return false; @@ -761,8 +739,8 @@ bool AstraCGLS3d::enableSuperSampling(unsigned int iVoxelSuperSampling, if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0) return false; - pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling; - pData->dims.iRaysPerDetDim = iDetectorSuperSampling; + pData->params.iRaysPerVoxelDim = iVoxelSuperSampling; + pData->params.iRaysPerDetDim = iDetectorSuperSampling; return true; } @@ -816,9 +794,9 @@ bool AstraCGLS3d::init() bool ok; if (pData->projType == PROJ_PARALLEL) { - ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale); + ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->params); } else { - ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale); + ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->params); } if (!ok) @@ -1026,23 +1004,23 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, Cuda3DProjectionKernel projKernel) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerDetDim = iDetectorSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerDetDim = iDetectorSuperSampling; if (iDetectorSuperSampling == 0) return false; SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { @@ -1080,10 +1058,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, if (pParProjs) { switch (projKernel) { case ker3d_default: - ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, params); break; case ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale); + ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, params); break; default: assert(false); @@ -1091,7 +1069,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections, } else { switch (projKernel) { case ker3d_default: - ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, params); break; default: assert(false); @@ -1116,21 +1094,20 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerVoxelDim = iVoxelSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; - SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1166,9 +1143,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections, } if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params); ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); @@ -1191,21 +1168,21 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerVoxelDim = iVoxelSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (iGPUIndex != -1) { cudaSetDevice(iGPUIndex); @@ -1242,9 +1219,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, processSino3D(D_projData, 1.0f, dims); if (pParProjs) - ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, params); processVol3D(D_pixelWeight, dims); if (!ok) { @@ -1259,9 +1236,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, ok &= zeroVolumeData(D_volumeData, dims); // Do BP into D_volumeData if (pParProjs) - ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale); + ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params); else - ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale); + ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params); // Multiply with weights processVol3D(D_volumeData, D_pixelWeight, dims); @@ -1301,6 +1278,9 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, int iGPUIndex, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; + + params.iRaysPerVoxelDim = iVoxelSuperSampling; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); @@ -1310,7 +1290,6 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, if (!ok) return false; - dims.iRaysPerVoxelDim = iVoxelSuperSampling; if (iVoxelSuperSampling == 0) return false; @@ -1354,6 +1333,7 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections, // TODO: Offer interface for SrcZ, DetZ + // TODO: Voxel scaling ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, dims, pfAngles, bShortScan); diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 2782994..1f1cee2 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -37,11 +37,6 @@ namespace astra { // TODO: Switch to a class hierarchy as with the 2D algorithms -enum Cuda3DProjectionKernel { - ker3d_default = 0, - ker3d_sum_square_weights -}; - class CProjectionGeometry3D; class CParallelProjectionGeometry3D; class CParallelVecProjectionGeometry3D; @@ -50,6 +45,10 @@ class CConeVecProjectionGeometry3D; class CVolumeGeometry3D; class AstraSIRT3d_internal; +using astraCUDA3d::Cuda3DProjectionKernel; +using astraCUDA3d::ker3d_default; +using astraCUDA3d::ker3d_sum_square_weights; + class _AstraExport AstraSIRT3d { public: @@ -289,7 +288,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom, SPar3DProjection*& pParProjs, SConeProjection*& pConeProjs, - float& fOutputScale); + astraCUDA3d::SProjectorParams3D& params); _AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections, const CVolumeGeometry3D* pVolGeom, diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu index dd0e8a0..0b0331e 100644 --- a/cuda/3d/cgls3d.cu +++ b/cuda/3d/cgls3d.cu @@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData, CGLS cgls; bool ok = true; - ok &= cgls.setConeGeometry(dims, angles, 1.0f); + ok &= cgls.setConeGeometry(dims, angles, SProjectorParams3D()); if (D_maskData.ptr) ok &= cgls.enableVolumeMask(); diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 4a41f6a..8c35125 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -154,7 +154,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng // supersampling version -__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale) { float* volData = (float*)D_volData; @@ -185,12 +185,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start if (endZ > dims.iVolZ) endZ = dims.iVolZ; - float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + const float fSubStep = 1.0f/iRaysPerVoxelDim; - fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim); for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -216,11 +216,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start const float fCdc = gC_C[12*angle+11]; float fXs = fX; - for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { + for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) { float fYs = fY; - for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { + for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) { float fZs = fZ; - for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { + for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) { const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -248,7 +248,7 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { bindProjDataTexture(D_projArray); @@ -295,10 +295,10 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (dims.iRaysPerVoxelDim == 1) - dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + if (params.iRaysPerVoxelDim == 1) + dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale); else - dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale); } cudaTextForceKernelsCompletion(); @@ -315,14 +315,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); + bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params); cudaFreeArray(cuArray); diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h index 4d3d2dd..a6e2c23 100644 --- a/cuda/3d/cone_bp.h +++ b/cuda/3d/cone_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool ConeBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 13b184f..5a31b65 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -214,7 +214,7 @@ template __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) { COORD c; @@ -245,7 +245,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, if (endSlice > c.nSlices(dims)) endSlice = c.nSlices(dims); - const float fSubStep = 1.0f/dims.iRaysPerDetDim; + const float fSubStep = 1.0f/iRaysPerDetDim; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { @@ -255,9 +255,9 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, float fV = 0.0f; float fdU = detectorU - 0.5f + 0.5f*fSubStep; - for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { + for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { float fdV = detectorV - 0.5f + 0.5f*fSubStep; - for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX; const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY; @@ -294,14 +294,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, } } - D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); + D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } bool ConeFP_Array_internal(cudaPitchedPtr D_projData, const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[angleCount]; @@ -373,22 +373,22 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } } @@ -414,7 +414,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, bool ConeFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer volume to array @@ -434,7 +434,7 @@ bool ConeFP(cudaPitchedPtr D_volumeData, ret = ConeFP_Array_internal(D_subprojData, dims, iEndAngle - iAngle, angles + iAngle, - fOutputScale); + params); if (!ret) break; } diff --git a/cuda/3d/cone_fp.h b/cuda/3d/cone_fp.h index 969a6d8..62c9caa 100644 --- a/cuda/3d/cone_fp.h +++ b/cuda/3d/cone_fp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool ConeFP_Array(cudaArray *D_volArray, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool ConeFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SConeProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h index 5437a85..569b395 100644 --- a/cuda/3d/dims3d.h +++ b/cuda/3d/dims3d.h @@ -37,6 +37,13 @@ namespace astraCUDA3d { using astra::SConeProjection; using astra::SPar3DProjection; + +enum Cuda3DProjectionKernel { + ker3d_default = 0, + ker3d_sum_square_weights +}; + + struct SDimensions3D { unsigned int iVolX; unsigned int iVolY; @@ -44,8 +51,19 @@ struct SDimensions3D { unsigned int iProjAngles; unsigned int iProjU; // number of detectors in the U direction unsigned int iProjV; // number of detectors in the V direction +}; + +struct SProjectorParams3D { + SProjectorParams3D() : + iRaysPerDetDim(1), iRaysPerVoxelDim(1), + fOutputScale(1.0f), + ker(ker3d_default) + { } + unsigned int iRaysPerDetDim; unsigned int iRaysPerVoxelDim; + float fOutputScale; + Cuda3DProjectionKernel ker; }; } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 6d81dc0..d09c203 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -174,17 +174,17 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel) { SDimensions3D dims; + SProjectorParams3D params; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; #if 1 - dims.iRaysPerDetDim = iDetectorSuperSampling; + params.iRaysPerDetDim = iDetectorSuperSampling; if (iDetectorSuperSampling == 0) return false; #else - dims.iRaysPerDetDim = 1; astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default; #endif @@ -192,11 +192,9 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale = 1.0f; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (pParProjs) { #if 0 @@ -211,10 +209,10 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con switch (projKernel) { case astra::ker3d_default: - ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); break; case astra::ker3d_sum_square_weights: - ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale); + ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); break; default: ok = false; @@ -222,7 +220,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con } else { switch (projKernel) { case astra::ker3d_default: - ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); break; default: ok = false; @@ -235,30 +233,27 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling) { SDimensions3D dims; + SProjectorParams3D params; bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); if (!ok) return false; #if 1 - dims.iRaysPerVoxelDim = iVoxelSuperSampling; -#else - dims.iRaysPerVoxelDim = 1; + params.iRaysPerVoxelDim = iVoxelSuperSampling; #endif SPar3DProjection* pParProjs; SConeProjection* pConeProjs; - float outputScale = 1.0f; - ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pConeProjs, - outputScale); + params); if (pParProjs) - ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale); + ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params); else - ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale); + ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params); return ok; diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index cafab46..40f839d 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -143,7 +143,7 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn } // supersampling version -__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale) +__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale) { float* volData = (float*)D_volData; @@ -174,13 +174,13 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star if (endZ > dims.iVolZ) endZ = dims.iVolZ; - float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim; + float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; + float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim; - const float fSubStep = 1.0f/dims.iRaysPerVoxelDim; + const float fSubStep = 1.0f/iRaysPerVoxelDim; - fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim); + fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim); for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) @@ -201,11 +201,11 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star const float fCvc = gC_C[8*angle+7]; float fXs = fX; - for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) { + for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) { float fYs = fY; - for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) { + for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) { float fZs = fZ; - for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) { + for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) { const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz; const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz; @@ -228,7 +228,7 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { bindProjDataTexture(D_projArray); @@ -274,10 +274,10 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); - if (dims.iRaysPerVoxelDim == 1) - dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + if (params.iRaysPerVoxelDim == 1) + dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale); else - dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); + dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale); } cudaTextForceKernelsCompletion(); @@ -293,14 +293,14 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer projections to array cudaArray* cuArray = allocateProjectionArray(dims); transferProjectionsToArray(D_projData, cuArray, dims); - bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale); + bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, params); cudaFreeArray(cuArray); diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h index f1fc62d..3efad19 100644 --- a/cuda/3d/par3d_bp.h +++ b/cuda/3d/par3d_bp.h @@ -34,12 +34,12 @@ namespace astraCUDA3d { _AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData, cudaArray *D_projArray, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index 3ce3d42..fde9c2c 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -218,7 +218,7 @@ template __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) { COORD c; @@ -251,7 +251,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, if (endSlice > c.nSlices(dims)) endSlice = c.nSlices(dims); - const float fSubStep = 1.0f/dims.iRaysPerDetDim; + const float fSubStep = 1.0f/iRaysPerDetDim; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { @@ -259,9 +259,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, float fV = 0.0f; float fdU = detectorU - 0.5f + 0.5f*fSubStep; - for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { + for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) { float fdV = detectorV - 0.5f + 0.5f*fSubStep; - for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { + for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) { /* Trace ray in direction Ray to (detectorU,detectorV) from */ /* X = startSlice to X = endSlice */ @@ -301,7 +301,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, } } - D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim); + D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -401,7 +401,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -473,22 +473,22 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); } } @@ -514,7 +514,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, bool Par3DFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer volume to array cudaArray* cuArray = allocateVolumeArray(dims); @@ -533,7 +533,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, ret = Par3DFP_Array_internal(D_subprojData, dims, iEndAngle - iAngle, angles + iAngle, - fOutputScale); + params); if (!ret) break; } @@ -548,7 +548,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData, bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale) + const SProjectorParams3D& params) { // transfer angles to constant memory float* tmp = new float[dims.iProjAngles]; @@ -620,8 +620,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirX<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -630,8 +630,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirY<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -640,8 +640,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, #endif } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) - if (dims.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); + if (params.iRaysPerDetDim == 1) + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); else #if 0 par3D_FP_SS_SumSqW_dirZ<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); diff --git a/cuda/3d/par3d_fp.h b/cuda/3d/par3d_fp.h index 41f204f..5f65cd9 100644 --- a/cuda/3d/par3d_fp.h +++ b/cuda/3d/par3d_fp.h @@ -34,17 +34,17 @@ namespace astraCUDA3d { _AstraExport bool Par3DFP_Array(cudaArray *D_volArray, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DFP(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); _AstraExport bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, const SDimensions3D& dims, const SPar3DProjection* angles, - float fOutputScale); + const SProjectorParams3D& params); } diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu index 484521e..4cb35c3 100644 --- a/cuda/3d/sirt3d.cu +++ b/cuda/3d/sirt3d.cu @@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData, SIRT sirt; bool ok = true; - ok &= sirt.setConeGeometry(dims, angles, 1.0f); + ok &= sirt.setConeGeometry(dims, angles, SProjectorParams3D()); if (D_maskData.ptr) ok &= sirt.enableVolumeMask(); -- cgit v1.2.3 From e38ff1723306b30a677d21bb2ea29436b763dfd6 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 14:46:59 +0100 Subject: Add cone_fp kernel support for anisotropic voxels --- cuda/3d/cone_fp.cu | 37 +++++++++++++++++++++++++++---------- cuda/3d/dims3d.h | 4 ++++ 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 5a31b65..2feec06 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -139,7 +139,8 @@ template __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + float fScale1, float fScale2, float fOutputScale) { COORD c; @@ -188,7 +189,7 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; float fVal = 0.0f; @@ -214,7 +215,8 @@ template __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, + float fScale1, float fScale2, float fOutputScale) { COORD c; @@ -272,7 +274,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; float fVal = 0.0f; @@ -372,23 +374,38 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, // printf("angle block: %d to %d, %d (%dx%d, %dx%d)\n", blockStart, blockEnd, blockDirection, dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y); if (blockDirection == 0) { + float fS1 = params.fVolScaleY / params.fVolScaleX; + fS1 *= fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + fS2 *= fS2; + float fS0 = params.fOutputScale * params.fVolScaleX; for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); } else if (blockDirection == 1) { + float fS1 = params.fVolScaleX / params.fVolScaleY; + fS1 *= fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleY; + fS2 *= fS2; + float fS0 = params.fOutputScale * params.fVolScaleY; for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); } else if (blockDirection == 2) { + float fS1 = params.fVolScaleX / params.fVolScaleZ; + fS1 *= fS1; + float fS2 = params.fVolScaleY / params.fVolScaleZ; + fS2 *= fS2; + float fS0 = params.fOutputScale * params.fVolScaleZ; for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); } } diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h index 569b395..a15c67a 100644 --- a/cuda/3d/dims3d.h +++ b/cuda/3d/dims3d.h @@ -57,12 +57,16 @@ struct SProjectorParams3D { SProjectorParams3D() : iRaysPerDetDim(1), iRaysPerVoxelDim(1), fOutputScale(1.0f), + fVolScaleX(1.0f), fVolScaleY(1.0f), fVolScaleZ(1.0f), ker(ker3d_default) { } unsigned int iRaysPerDetDim; unsigned int iRaysPerVoxelDim; float fOutputScale; + float fVolScaleX; + float fVolScaleY; + float fVolScaleZ; Cuda3DProjectionKernel ker; }; -- cgit v1.2.3 From ae518f2954d8c6b9f1d5156595ccb1d7dc2ec581 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 14:54:47 +0100 Subject: Process non-cubic-voxel astra geometries --- cuda/3d/astra3d.cu | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index 454530e..42cece2 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -72,29 +72,36 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom, assert(pVolGeom); assert(pProjs); +#if 0 // TODO: Relative instead of absolute const float EPS = 0.00001f; if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS) return false; if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthZ()) > EPS) return false; - +#endif // Translate float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; float dz = -(pVolGeom->getWindowMinZ() + pVolGeom->getWindowMaxZ()) / 2; - float factor = 1.0f / pVolGeom->getPixelLengthX(); + float fx = 1.0f / pVolGeom->getPixelLengthX(); + float fy = 1.0f / pVolGeom->getPixelLengthY(); + float fz = 1.0f / pVolGeom->getPixelLengthZ(); for (int i = 0; i < iProjectionAngleCount; ++i) { // CHECKME: Order of scaling and translation pProjs[i].translate(dx, dy, dz); - pProjs[i].scale(factor); + pProjs[i].scale(fx, fy, fz); } + params.fVolScaleX = pVolGeom->getPixelLengthX(); + params.fVolScaleY = pVolGeom->getPixelLengthY(); + params.fVolScaleZ = pVolGeom->getPixelLengthZ(); + // CHECKME: Check factor - params.fOutputScale *= pVolGeom->getPixelLengthX(); + //params.fOutputScale *= pVolGeom->getPixelLengthX(); return true; } -- cgit v1.2.3 From 407438840d749c74b3c9bd5d66bd5b81e62c8c41 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 15:49:32 +0100 Subject: Refactor voxel size scaling CUDA kernel, and special-case cubes --- cuda/3d/cone_fp.cu | 88 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 26 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu index 2feec06..fefcdc1 100644 --- a/cuda/3d/cone_fp.cu +++ b/cuda/3d/cone_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z { __device__ float z(float f0, float f1, float f2) const { return f0; } }; +struct SCALE_CUBE { + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { + float fScale1; + float fScale2; + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; + // threadIdx: x = ??? detector (u?) // y = relative angle @@ -135,12 +147,12 @@ struct DIR_Z { // blockIdx: x = ??? detector (u+v?) // y = angle block -template +template __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, - float fScale1, float fScale2, float fOutputScale) + SCALE sc) { COORD c; @@ -189,7 +201,7 @@ __global__ void cone_FP_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -216,7 +228,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions3D dims, int iRaysPerDetDim, - float fScale1, float fScale2, float fOutputScale) + SCALE_NONCUBE sc) { COORD c; @@ -274,7 +286,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fSrcX,fSrcY,fSrcZ) - a1 * c.c0(fSrcX,fSrcY,fSrcZ); const float b2 = c.c2(fSrcX,fSrcY,fSrcZ) - a2 * c.c0(fSrcX,fSrcY,fSrcZ); - const float fDistCorr = sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -338,6 +350,36 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, unsigned int blockEnd = 0; int blockDirection = 0; + bool cube = true; + if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) + cube = false; + if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) + cube = false; + + SCALE_CUBE scube; + scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + // timeval t; // tic(t); @@ -374,38 +416,32 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData, // printf("angle block: %d to %d, %d (%dx%d, %dx%d)\n", blockStart, blockEnd, blockDirection, dimGrid.x, dimGrid.y, dimBlock.x, dimBlock.y); if (blockDirection == 0) { - float fS1 = params.fVolScaleY / params.fVolScaleX; - fS1 *= fS1; - float fS2 = params.fVolScaleZ / params.fVolScaleX; - fS2 *= fS2; - float fS0 = params.fOutputScale * params.fVolScaleX; for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); + if (cube) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX); } else if (blockDirection == 1) { - float fS1 = params.fVolScaleX / params.fVolScaleY; - fS1 *= fS1; - float fS2 = params.fVolScaleZ / params.fVolScaleY; - fS2 *= fS2; - float fS0 = params.fOutputScale * params.fVolScaleY; for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); + if (cube) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY); } else if (blockDirection == 2) { - float fS1 = params.fVolScaleX / params.fVolScaleZ; - fS1 *= fS1; - float fS2 = params.fVolScaleY / params.fVolScaleZ; - fS2 *= fS2; - float fS0 = params.fOutputScale * params.fVolScaleZ; for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fS1, fS2, fS0); + if (cube) + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + cone_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else - cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, fS1, fS2, fS0); + cone_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ); } } -- cgit v1.2.3 From 9479de2c72d147adf7e57e84fe181408b9582bcd Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 17:14:31 +0100 Subject: Add voxel-size scaling to par3d_fp --- cuda/3d/par3d_fp.cu | 108 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 92 insertions(+), 16 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index fde9c2c..c6c4fda 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -128,6 +128,18 @@ struct DIR_Z { __device__ float z(float f0, float f1, float f2) const { return f0; } }; +struct SCALE_CUBE { + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; } +}; + +struct SCALE_NONCUBE { + float fScale1; + float fScale2; + float fOutputScale; + __device__ float scale(float a1, float a2) const { return sqrt(a1*a1*fScale1+a2*a2*fScale2+1.0f) * fOutputScale; } +}; + // threadIdx: x = u detector @@ -136,11 +148,12 @@ struct DIR_Z { // y = angle block -template +template __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE sc) { COORD c; @@ -191,7 +204,7 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -218,7 +231,8 @@ template __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale) + const SDimensions3D dims, int iRaysPerDetDim, + SCALE_NONCUBE sc) { COORD c; @@ -279,7 +293,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -324,7 +338,8 @@ template __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, - const SDimensions3D dims, float fOutputScale) + const SDimensions3D dims, + SCALE_NONCUBE sc) { COORD c; @@ -375,7 +390,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sqrt(a1*a1+a2*a2+1.0f) * fOutputScale; + const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -436,6 +451,36 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, unsigned int blockEnd = 0; int blockDirection = 0; + bool cube = true; + if (abs(params.fVolScaleX / params.fVolScaleY - 1.0) > 0.00001) + cube = false; + if (abs(params.fVolScaleX / params.fVolScaleZ - 1.0) > 0.00001) + cube = false; + + SCALE_CUBE scube; + scube.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + // timeval t; // tic(t); @@ -474,21 +519,30 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeX); } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeY); } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + if (cube) + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, scube); + else + par3D_FP_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else - par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale); + par3D_FP_SS_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, snoncubeZ); } } @@ -583,6 +637,28 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, unsigned int blockEnd = 0; int blockDirection = 0; + SCALE_NONCUBE snoncubeX; + float fS1 = params.fVolScaleY / params.fVolScaleX; + snoncubeX.fScale1 = fS1 * fS1; + float fS2 = params.fVolScaleZ / params.fVolScaleX; + snoncubeX.fScale2 = fS2 * fS2; + snoncubeX.fOutputScale = params.fOutputScale * params.fVolScaleX; + + SCALE_NONCUBE snoncubeY; + fS1 = params.fVolScaleX / params.fVolScaleY; + snoncubeY.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleY; + snoncubeY.fScale2 = fS2 * fS2; + snoncubeY.fOutputScale = params.fOutputScale * params.fVolScaleY; + + SCALE_NONCUBE snoncubeZ; + fS1 = params.fVolScaleX / params.fVolScaleZ; + snoncubeZ.fScale1 = fS1 * fS1; + fS2 = params.fVolScaleY / params.fVolScaleZ; + snoncubeZ.fScale2 = fS2 * fS2; + snoncubeZ.fOutputScale = params.fOutputScale * params.fVolScaleZ; + + // timeval t; // tic(t); @@ -621,7 +697,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, if (blockDirection == 0) { for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeX); else #if 0 par3D_FP_SS_SumSqW_dirX<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -631,7 +707,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } else if (blockDirection == 1) { for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeY); else #if 0 par3D_FP_SS_SumSqW_dirY<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); @@ -641,7 +717,7 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData, } else if (blockDirection == 2) { for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices) if (params.iRaysPerDetDim == 1) - par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale); + par3D_FP_SumSqW_t<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, snoncubeZ); else #if 0 par3D_FP_SS_SumSqW_dirZ<<>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale); -- cgit v1.2.3 From 5768229672bffc93b5158da4bef893f40fe76ea1 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Fri, 5 Feb 2016 17:17:17 +0100 Subject: Move detector-independent vars out of loop --- cuda/3d/par3d_fp.cu | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu index c6c4fda..6f9ce40 100644 --- a/cuda/3d/par3d_fp.cu +++ b/cuda/3d/par3d_fp.cu @@ -174,6 +174,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -199,13 +202,9 @@ __global__ void par3D_FP_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -253,7 +252,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; - + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; const int startDetectorV = (blockIdx.x/((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockV; @@ -288,12 +289,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); float fVal = 0.0f; @@ -309,12 +307,12 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch, f2 += a2; } - fVal *= fDistCorr; fV += fVal; } } + fV *= fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim); } } @@ -360,6 +358,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, const float fDetSY = gC_DetSY[angle] + 0.5f * fDetUY + 0.5f * fDetVY; const float fDetSZ = gC_DetSZ[angle] + 0.5f * fDetUZ + 0.5f * fDetVZ; + const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); + const float fDistCorr = sc.scale(a1, a2); const int detectorU = (blockIdx.x%((dims.iProjU+g_detBlockU-1)/g_detBlockU)) * g_detBlockU + threadIdx.x; @@ -385,13 +386,9 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, /* ray: (y) = (ay) * x + (by) */ /* (z) (az) (bz) */ - const float a1 = c.c1(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); - const float a2 = c.c2(fRayX,fRayY,fRayZ) / c.c0(fRayX,fRayY,fRayZ); const float b1 = c.c1(fDetX,fDetY,fDetZ) - a1 * c.c0(fDetX,fDetY,fDetZ); const float b2 = c.c2(fDetX,fDetY,fDetZ) - a2 * c.c0(fDetX,fDetY,fDetZ); - const float fDistCorr = sc.scale(a1, a2); - float fVal = 0.0f; float f0 = startSlice + 0.5f; @@ -400,12 +397,13 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch, for (int s = startSlice; s < endSlice; ++s) { - fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)) * fDistCorr * fDistCorr; + fVal += dirWeights(f1, c.nDim1(dims)) * dirWeights(f2, c.nDim2(dims)); f0 += 1.0f; f1 += a1; f2 += a2; } + fVal *= fDistCorr * fDistCorr; D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fVal; } } -- cgit v1.2.3 From ce2f07278ada5c0fdf8a5ed3cb23350a782a6161 Mon Sep 17 00:00:00 2001 From: Willem Jan Palenstijn Date: Mon, 8 Feb 2016 13:45:43 +0100 Subject: Adapt 3D BP scaling to voxel size --- cuda/3d/cone_bp.cu | 6 ++++-- cuda/3d/par3d_bp.cu | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'cuda/3d') diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu index 8c35125..f077f0d 100644 --- a/cuda/3d/cone_bp.cu +++ b/cuda/3d/cone_bp.cu @@ -252,6 +252,8 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, { bindProjDataTexture(D_projArray); + float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; + for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) { unsigned int angleCount = g_MaxAngles; if (th + angleCount > dims.iProjAngles) @@ -296,9 +298,9 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (params.iRaysPerVoxelDim == 1) - dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale); + dev_cone_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale); + dev_cone_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } cudaTextForceKernelsCompletion(); diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu index 40f839d..491ee2f 100644 --- a/cuda/3d/par3d_bp.cu +++ b/cuda/3d/par3d_bp.cu @@ -232,6 +232,8 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, { bindProjDataTexture(D_projArray); + float fOutputScale = params.fOutputScale * params.fVolScaleX * params.fVolScaleY * params.fVolScaleZ; + for (unsigned int th = 0; th < dims.iProjAngles; th += g_MaxAngles) { unsigned int angleCount = g_MaxAngles; if (th + angleCount > dims.iProjAngles) @@ -275,9 +277,9 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData, for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) { // printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr); if (params.iRaysPerVoxelDim == 1) - dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale); + dev_par3D_BP<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale); else - dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale); + dev_par3D_BP_SS<<>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, fOutputScale); } cudaTextForceKernelsCompletion(); -- cgit v1.2.3