summaryrefslogtreecommitdiffstats
path: root/cuda/3d
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-02-04 13:56:06 +0100
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-02-10 17:00:42 +0100
commit559d3e599b7306e2de64f2a584d72bc5c98b692b (patch)
tree84cce1a91b3896783361a7ebbbe6c05fd1889104 /cuda/3d
parent081355b609b11faf7f2d73414de9629e78cca2c5 (diff)
downloadastra-559d3e599b7306e2de64f2a584d72bc5c98b692b.tar.gz
astra-559d3e599b7306e2de64f2a584d72bc5c98b692b.tar.bz2
astra-559d3e599b7306e2de64f2a584d72bc5c98b692b.tar.xz
astra-559d3e599b7306e2de64f2a584d72bc5c98b692b.zip
Refactor CUDA projector params into struct
Diffstat (limited to 'cuda/3d')
-rw-r--r--cuda/3d/algo3d.cu21
-rw-r--r--cuda/3d/algo3d.h5
-rw-r--r--cuda/3d/astra3d.cu128
-rw-r--r--cuda/3d/astra3d.h11
-rw-r--r--cuda/3d/cgls3d.cu2
-rw-r--r--cuda/3d/cone_bp.cu30
-rw-r--r--cuda/3d/cone_bp.h4
-rw-r--r--cuda/3d/cone_fp.cu34
-rw-r--r--cuda/3d/cone_fp.h4
-rw-r--r--cuda/3d/dims3d.h18
-rw-r--r--cuda/3d/mem3d.cu27
-rw-r--r--cuda/3d/par3d_bp.cu30
-rw-r--r--cuda/3d/par3d_bp.h4
-rw-r--r--cuda/3d/par3d_fp.cu48
-rw-r--r--cuda/3d/par3d_fp.h6
-rw-r--r--cuda/3d/sirt3d.cu2
16 files changed, 185 insertions, 189 deletions
diff --git a/cuda/3d/algo3d.cu b/cuda/3d/algo3d.cu
index cc86b70..e2c1e43 100644
--- a/cuda/3d/algo3d.cu
+++ b/cuda/3d/algo3d.cu
@@ -41,7 +41,6 @@ ReconAlgo3D::ReconAlgo3D()
coneProjs = 0;
par3DProjs = 0;
shouldAbort = false;
- fOutputScale = 1.0f;
}
ReconAlgo3D::~ReconAlgo3D()
@@ -58,10 +57,10 @@ void ReconAlgo3D::reset()
shouldAbort = false;
}
-bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, float _outputScale)
+bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProjection* _angles, const SProjectorParams3D& _params)
{
dims = _dims;
- fOutputScale = _outputScale;
+ params = _params;
coneProjs = new SConeProjection[dims.iProjAngles];
par3DProjs = 0;
@@ -71,10 +70,10 @@ bool ReconAlgo3D::setConeGeometry(const SDimensions3D& _dims, const SConeProject
return true;
}
-bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, float _outputScale)
+bool ReconAlgo3D::setPar3DGeometry(const SDimensions3D& _dims, const SPar3DProjection* _angles, const SProjectorParams3D& _params)
{
dims = _dims;
- fOutputScale = _outputScale;
+ params = _params;
par3DProjs = new SPar3DProjection[dims.iProjAngles];
coneProjs = 0;
@@ -89,10 +88,12 @@ bool ReconAlgo3D::callFP(cudaPitchedPtr& D_volumeData,
cudaPitchedPtr& D_projData,
float outputScale)
{
+ SProjectorParams3D p = params;
+ p.fOutputScale *= outputScale;
if (coneProjs) {
- return ConeFP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
+ return ConeFP(D_volumeData, D_projData, dims, coneProjs, p);
} else {
- return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
+ return Par3DFP(D_volumeData, D_projData, dims, par3DProjs, p);
}
}
@@ -100,10 +101,12 @@ bool ReconAlgo3D::callBP(cudaPitchedPtr& D_volumeData,
cudaPitchedPtr& D_projData,
float outputScale)
{
+ SProjectorParams3D p = params;
+ p.fOutputScale *= outputScale;
if (coneProjs) {
- return ConeBP(D_volumeData, D_projData, dims, coneProjs, outputScale * this->fOutputScale);
+ return ConeBP(D_volumeData, D_projData, dims, coneProjs, p);
} else {
- return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, outputScale * this->fOutputScale);
+ return Par3DBP(D_volumeData, D_projData, dims, par3DProjs, p);
}
}
diff --git a/cuda/3d/algo3d.h b/cuda/3d/algo3d.h
index 886b092..ce331ad 100644
--- a/cuda/3d/algo3d.h
+++ b/cuda/3d/algo3d.h
@@ -39,8 +39,8 @@ public:
ReconAlgo3D();
~ReconAlgo3D();
- bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, float fOutputScale);
- bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, float fOutputScale);
+ bool setConeGeometry(const SDimensions3D& dims, const SConeProjection* projs, const SProjectorParams3D& params);
+ bool setPar3DGeometry(const SDimensions3D& dims, const SPar3DProjection* projs, const SProjectorParams3D& params);
void signalAbort() { shouldAbort = true; }
@@ -55,6 +55,7 @@ protected:
float outputScale);
SDimensions3D dims;
+ SProjectorParams3D params;
SConeProjection* coneProjs;
SPar3DProjection* par3DProjs;
diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu
index 8328229..454530e 100644
--- a/cuda/3d/astra3d.cu
+++ b/cuda/3d/astra3d.cu
@@ -67,7 +67,7 @@ template<typename ProjectionT>
static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
unsigned int iProjectionAngleCount,
ProjectionT*& pProjs,
- float& fOutputScale)
+ SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjs);
@@ -94,7 +94,7 @@ static bool convertAstraGeometry_internal(const CVolumeGeometry3D* pVolGeom,
}
// CHECKME: Check factor
- fOutputScale *= pVolGeom->getPixelLengthX();
+ params.fOutputScale *= pVolGeom->getPixelLengthX();
return true;
}
@@ -108,10 +108,8 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
dims.iVolY = pVolGeom->getGridRowCount();
dims.iVolZ = pVolGeom->getGridSliceCount();
dims.iProjAngles = pProjGeom->getProjectionCount();
- dims.iProjU = pProjGeom->getDetectorColCount(),
- dims.iProjV = pProjGeom->getDetectorRowCount(),
- dims.iRaysPerDetDim = 1;
- dims.iRaysPerVoxelDim = 1;
+ dims.iProjU = pProjGeom->getDetectorColCount();
+ dims.iProjV = pProjGeom->getDetectorRowCount();
if (dims.iVolX <= 0 || dims.iVolX <= 0 || dims.iVolX <= 0)
return false;
@@ -124,7 +122,7 @@ bool convertAstraGeometry_dims(const CVolumeGeometry3D* pVolGeom,
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CParallelProjectionGeometry3D* pProjGeom,
- SPar3DProjection*& pProjs, float& fOutputScale)
+ SPar3DProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -141,16 +139,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CParallelVecProjectionGeometry3D* pProjGeom,
- SPar3DProjection*& pProjs, float& fOutputScale)
+ SPar3DProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -164,16 +160,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CConeProjectionGeometry3D* pProjGeom,
- SConeProjection*& pProjs, float& fOutputScale)
+ SConeProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -192,16 +186,14 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CConeVecProjectionGeometry3D* pProjGeom,
- SConeProjection*& pProjs, float& fOutputScale)
+ SConeProjection*& pProjs, SProjectorParams3D& params)
{
assert(pVolGeom);
assert(pProjGeom);
@@ -215,9 +207,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
- fOutputScale = 1.0f;
-
- ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
+ ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, params);
return ok;
}
@@ -227,7 +217,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CProjectionGeometry3D* pProjGeom,
SPar3DProjection*& pParProjs,
SConeProjection*& pConeProjs,
- float& fOutputScale)
+ SProjectorParams3D& params)
{
const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
@@ -240,13 +230,13 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
bool ok;
if (conegeom) {
- ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, conegeom, pConeProjs, params);
} else if (conevec3dgeom) {
- ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, conevec3dgeom, pConeProjs, params);
} else if (par3dgeom) {
- ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, par3dgeom, pParProjs, params);
} else if (parvec3dgeom) {
- ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, fOutputScale);
+ ok = convertAstraGeometry(pVolGeom, parvec3dgeom, pParProjs, params);
} else {
ok = false;
}
@@ -260,19 +250,16 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
class AstraSIRT3d_internal {
public:
SDimensions3D dims;
+ SProjectorParams3D params;
CUDAProjectionType3d projType;
float* angles;
float fOriginSourceDistance;
float fOriginDetectorDistance;
- float fSourceZ;
- float fDetSize;
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fOutputScale;
-
bool initialized;
bool setStartReconstruction;
@@ -304,12 +291,9 @@ AstraSIRT3d::AstraSIRT3d()
pData->dims.iProjAngles = 0;
pData->dims.iProjU = 0;
pData->dims.iProjV = 0;
- pData->dims.iRaysPerDetDim = 1;
- pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
pData->parprojs = 0;
- pData->fOutputScale = 1.0f;
pData->initialized = false;
pData->setStartReconstruction = false;
@@ -358,7 +342,7 @@ bool AstraSIRT3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pData->parprojs, pData->projs,
- pData->fOutputScale);
+ pData->params);
if (!ok)
return false;
@@ -383,8 +367,8 @@ bool AstraSIRT3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)
return false;
- pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- pData->dims.iRaysPerDetDim = iDetectorSuperSampling;
+ pData->params.iRaysPerVoxelDim = iVoxelSuperSampling;
+ pData->params.iRaysPerDetDim = iDetectorSuperSampling;
return true;
}
@@ -436,9 +420,9 @@ bool AstraSIRT3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
+ ok = pData->sirt.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);
} else {
- ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
+ ok = pData->sirt.setConeGeometry(pData->dims, pData->projs, pData->params);
}
if (!ok)
@@ -639,19 +623,16 @@ float AstraSIRT3d::computeDiffNorm()
class AstraCGLS3d_internal {
public:
SDimensions3D dims;
+ SProjectorParams3D params;
CUDAProjectionType3d projType;
float* angles;
float fOriginSourceDistance;
float fOriginDetectorDistance;
- float fSourceZ;
- float fDetSize;
SConeProjection* projs;
SPar3DProjection* parprojs;
- float fOutputScale;
-
bool initialized;
bool setStartReconstruction;
@@ -683,12 +664,9 @@ AstraCGLS3d::AstraCGLS3d()
pData->dims.iProjAngles = 0;
pData->dims.iProjU = 0;
pData->dims.iProjV = 0;
- pData->dims.iRaysPerDetDim = 1;
- pData->dims.iRaysPerVoxelDim = 1;
pData->projs = 0;
pData->parprojs = 0;
- pData->fOutputScale = 1.0f;
pData->initialized = false;
pData->setStartReconstruction = false;
@@ -737,7 +715,7 @@ bool AstraCGLS3d::setGeometry(const CVolumeGeometry3D* pVolGeom,
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pData->parprojs, pData->projs,
- pData->fOutputScale);
+ pData->params);
if (!ok)
return false;
@@ -761,8 +739,8 @@ bool AstraCGLS3d::enableSuperSampling(unsigned int iVoxelSuperSampling,
if (iVoxelSuperSampling == 0 || iDetectorSuperSampling == 0)
return false;
- pData->dims.iRaysPerVoxelDim = iVoxelSuperSampling;
- pData->dims.iRaysPerDetDim = iDetectorSuperSampling;
+ pData->params.iRaysPerVoxelDim = iVoxelSuperSampling;
+ pData->params.iRaysPerDetDim = iDetectorSuperSampling;
return true;
}
@@ -816,9 +794,9 @@ bool AstraCGLS3d::init()
bool ok;
if (pData->projType == PROJ_PARALLEL) {
- ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->fOutputScale);
+ ok = pData->cgls.setPar3DGeometry(pData->dims, pData->parprojs, pData->params);
} else {
- ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->fOutputScale);
+ ok = pData->cgls.setConeGeometry(pData->dims, pData->projs, pData->params);
}
if (!ok)
@@ -1026,23 +1004,23 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
Cuda3DProjectionKernel projKernel)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerDetDim = iDetectorSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerDetDim = iDetectorSuperSampling;
if (iDetectorSuperSampling == 0)
return false;
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
@@ -1080,10 +1058,10 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
if (pParProjs) {
switch (projKernel) {
case ker3d_default:
- ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DFP(D_volumeData, D_projData, dims, pParProjs, params);
break;
case ker3d_sum_square_weights:
- ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, outputScale*outputScale);
+ ok &= Par3DFP_SumSqW(D_volumeData, D_projData, dims, pParProjs, params);
break;
default:
assert(false);
@@ -1091,7 +1069,7 @@ bool astraCudaFP(const float* pfVolume, float* pfProjections,
} else {
switch (projKernel) {
case ker3d_default:
- ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeFP(D_volumeData, D_projData, dims, pConeProjs, params);
break;
default:
assert(false);
@@ -1116,21 +1094,20 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1166,9 +1143,9 @@ bool astraCudaBP(float* pfVolume, const float* pfProjections,
}
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);
ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX);
@@ -1191,21 +1168,21 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (iGPUIndex != -1) {
cudaSetDevice(iGPUIndex);
@@ -1242,9 +1219,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
processSino3D<opSet>(D_projData, 1.0f, dims);
if (pParProjs)
- ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_pixelWeight, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_pixelWeight, D_projData, dims, pConeProjs, params);
processVol3D<opInvert>(D_pixelWeight, dims);
if (!ok) {
@@ -1259,9 +1236,9 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume,
ok &= zeroVolumeData(D_volumeData, dims);
// Do BP into D_volumeData
if (pParProjs)
- ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, outputScale);
+ ok &= Par3DBP(D_volumeData, D_projData, dims, pParProjs, params);
else
- ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, outputScale);
+ ok &= ConeBP(D_volumeData, D_projData, dims, pConeProjs, params);
// Multiply with weights
processVol3D<opMul>(D_volumeData, D_pixelWeight, dims);
@@ -1301,6 +1278,9 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
int iGPUIndex, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
+
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
@@ -1310,7 +1290,6 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
if (!ok)
return false;
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
if (iVoxelSuperSampling == 0)
return false;
@@ -1354,6 +1333,7 @@ bool astraCudaFDK(float* pfVolume, const float* pfProjections,
// TODO: Offer interface for SrcZ, DetZ
+ // TODO: Voxel scaling
ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance,
fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize,
dims, pfAngles, bShortScan);
diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h
index 2782994..1f1cee2 100644
--- a/cuda/3d/astra3d.h
+++ b/cuda/3d/astra3d.h
@@ -37,11 +37,6 @@ namespace astra {
// TODO: Switch to a class hierarchy as with the 2D algorithms
-enum Cuda3DProjectionKernel {
- ker3d_default = 0,
- ker3d_sum_square_weights
-};
-
class CProjectionGeometry3D;
class CParallelProjectionGeometry3D;
class CParallelVecProjectionGeometry3D;
@@ -50,6 +45,10 @@ class CConeVecProjectionGeometry3D;
class CVolumeGeometry3D;
class AstraSIRT3d_internal;
+using astraCUDA3d::Cuda3DProjectionKernel;
+using astraCUDA3d::ker3d_default;
+using astraCUDA3d::ker3d_sum_square_weights;
+
class _AstraExport AstraSIRT3d {
public:
@@ -289,7 +288,7 @@ bool convertAstraGeometry(const CVolumeGeometry3D* pVolGeom,
const CProjectionGeometry3D* pProjGeom,
SPar3DProjection*& pParProjs,
SConeProjection*& pConeProjs,
- float& fOutputScale);
+ astraCUDA3d::SProjectorParams3D& params);
_AstraExport bool astraCudaFP(const float* pfVolume, float* pfProjections,
const CVolumeGeometry3D* pVolGeom,
diff --git a/cuda/3d/cgls3d.cu b/cuda/3d/cgls3d.cu
index dd0e8a0..0b0331e 100644
--- a/cuda/3d/cgls3d.cu
+++ b/cuda/3d/cgls3d.cu
@@ -242,7 +242,7 @@ bool doCGLS(cudaPitchedPtr& D_volumeData,
CGLS cgls;
bool ok = true;
- ok &= cgls.setConeGeometry(dims, angles, 1.0f);
+ ok &= cgls.setConeGeometry(dims, angles, SProjectorParams3D());
if (D_maskData.ptr)
ok &= cgls.enableVolumeMask();
diff --git a/cuda/3d/cone_bp.cu b/cuda/3d/cone_bp.cu
index 4a41f6a..8c35125 100644
--- a/cuda/3d/cone_bp.cu
+++ b/cuda/3d/cone_bp.cu
@@ -154,7 +154,7 @@ __global__ void dev_cone_BP(void* D_volData, unsigned int volPitch, int startAng
// supersampling version
-__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
+__global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -185,12 +185,12 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
if (endZ > dims.iVolZ)
endZ = dims.iVolZ;
- float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ const float fSubStep = 1.0f/iRaysPerVoxelDim;
- fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
@@ -216,11 +216,11 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
const float fCdc = gC_C[12*angle+11];
float fXs = fX;
- for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) {
+ for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {
float fYs = fY;
- for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) {
+ for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {
float fZs = fZ;
- for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
+ for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
const float fUNum = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
const float fVNum = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
@@ -248,7 +248,7 @@ __global__ void dev_cone_BP_SS(void* D_volData, unsigned int volPitch, int start
bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
bindProjDataTexture(D_projArray);
@@ -295,10 +295,10 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (dims.iRaysPerVoxelDim == 1)
- dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ if (params.iRaysPerVoxelDim == 1)
+ dev_cone_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale);
else
- dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ dev_cone_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -315,14 +315,14 @@ bool ConeBP_Array(cudaPitchedPtr D_volumeData,
bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
+ bool ret = ConeBP_Array(D_volumeData, cuArray, dims, angles, params);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/cone_bp.h b/cuda/3d/cone_bp.h
index 4d3d2dd..a6e2c23 100644
--- a/cuda/3d/cone_bp.h
+++ b/cuda/3d/cone_bp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool ConeBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool ConeBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index 13b184f..5a31b65 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -214,7 +214,7 @@ template<class COORD>
__global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale)
{
COORD c;
@@ -245,7 +245,7 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
if (endSlice > c.nSlices(dims))
endSlice = c.nSlices(dims);
- const float fSubStep = 1.0f/dims.iRaysPerDetDim;
+ const float fSubStep = 1.0f/iRaysPerDetDim;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
@@ -255,9 +255,9 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
float fV = 0.0f;
float fdU = detectorU - 0.5f + 0.5f*fSubStep;
- for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
+ for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
float fdV = detectorV - 0.5f + 0.5f*fSubStep;
- for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
+ for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
const float fDetX = fDetSX + fdU*fDetUX + fdV*fDetVX;
const float fDetY = fDetSY + fdU*fDetUY + fdV*fDetVY;
@@ -294,14 +294,14 @@ __global__ void cone_FP_SS_t(float* D_projData, unsigned int projPitch,
}
}
- D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim);
+ D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);
}
}
bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
const SDimensions3D& dims, unsigned int angleCount, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[angleCount];
@@ -373,22 +373,22 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ cone_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ cone_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ cone_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ cone_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
}
}
@@ -414,7 +414,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
bool ConeFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer volume to array
@@ -434,7 +434,7 @@ bool ConeFP(cudaPitchedPtr D_volumeData,
ret = ConeFP_Array_internal(D_subprojData,
dims, iEndAngle - iAngle, angles + iAngle,
- fOutputScale);
+ params);
if (!ret)
break;
}
diff --git a/cuda/3d/cone_fp.h b/cuda/3d/cone_fp.h
index 969a6d8..62c9caa 100644
--- a/cuda/3d/cone_fp.h
+++ b/cuda/3d/cone_fp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool ConeFP_Array(cudaArray *D_volArray,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool ConeFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SConeProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/dims3d.h b/cuda/3d/dims3d.h
index 5437a85..569b395 100644
--- a/cuda/3d/dims3d.h
+++ b/cuda/3d/dims3d.h
@@ -37,6 +37,13 @@ namespace astraCUDA3d {
using astra::SConeProjection;
using astra::SPar3DProjection;
+
+enum Cuda3DProjectionKernel {
+ ker3d_default = 0,
+ ker3d_sum_square_weights
+};
+
+
struct SDimensions3D {
unsigned int iVolX;
unsigned int iVolY;
@@ -44,8 +51,19 @@ struct SDimensions3D {
unsigned int iProjAngles;
unsigned int iProjU; // number of detectors in the U direction
unsigned int iProjV; // number of detectors in the V direction
+};
+
+struct SProjectorParams3D {
+ SProjectorParams3D() :
+ iRaysPerDetDim(1), iRaysPerVoxelDim(1),
+ fOutputScale(1.0f),
+ ker(ker3d_default)
+ { }
+
unsigned int iRaysPerDetDim;
unsigned int iRaysPerVoxelDim;
+ float fOutputScale;
+ Cuda3DProjectionKernel ker;
};
}
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index 6d81dc0..d09c203 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -174,17 +174,17 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel)
{
SDimensions3D dims;
+ SProjectorParams3D params;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
#if 1
- dims.iRaysPerDetDim = iDetectorSuperSampling;
+ params.iRaysPerDetDim = iDetectorSuperSampling;
if (iDetectorSuperSampling == 0)
return false;
#else
- dims.iRaysPerDetDim = 1;
astra::Cuda3DProjectionKernel projKernel = astra::ker3d_default;
#endif
@@ -192,11 +192,9 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale = 1.0f;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (pParProjs) {
#if 0
@@ -211,10 +209,10 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
switch (projKernel) {
case astra::ker3d_default:
- ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ ok &= Par3DFP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
break;
case astra::ker3d_sum_square_weights:
- ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale*outputScale);
+ ok &= Par3DFP_SumSqW(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
break;
default:
ok = false;
@@ -222,7 +220,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
} else {
switch (projKernel) {
case astra::ker3d_default:
- ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+ ok &= ConeFP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
break;
default:
ok = false;
@@ -235,30 +233,27 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con
bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling)
{
SDimensions3D dims;
+ SProjectorParams3D params;
bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
if (!ok)
return false;
#if 1
- dims.iRaysPerVoxelDim = iVoxelSuperSampling;
-#else
- dims.iRaysPerVoxelDim = 1;
+ params.iRaysPerVoxelDim = iVoxelSuperSampling;
#endif
SPar3DProjection* pParProjs;
SConeProjection* pConeProjs;
- float outputScale = 1.0f;
-
ok = convertAstraGeometry(pVolGeom, pProjGeom,
pParProjs, pConeProjs,
- outputScale);
+ params);
if (pParProjs)
- ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, outputScale);
+ ok &= Par3DBP(volData.d->ptr, projData.d->ptr, dims, pParProjs, params);
else
- ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, outputScale);
+ ok &= ConeBP(volData.d->ptr, projData.d->ptr, dims, pConeProjs, params);
return ok;
diff --git a/cuda/3d/par3d_bp.cu b/cuda/3d/par3d_bp.cu
index cafab46..40f839d 100644
--- a/cuda/3d/par3d_bp.cu
+++ b/cuda/3d/par3d_bp.cu
@@ -143,7 +143,7 @@ __global__ void dev_par3D_BP(void* D_volData, unsigned int volPitch, int startAn
}
// supersampling version
-__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, float fOutputScale)
+__global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int startAngle, int angleOffset, const SDimensions3D dims, int iRaysPerVoxelDim, float fOutputScale)
{
float* volData = (float*)D_volData;
@@ -174,13 +174,13 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
if (endZ > dims.iVolZ)
endZ = dims.iVolZ;
- float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
- float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/dims.iRaysPerVoxelDim;
+ float fX = X - 0.5f*dims.iVolX + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fY = Y - 0.5f*dims.iVolY + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
+ float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - 0.5f + 0.5f/iRaysPerVoxelDim;
- const float fSubStep = 1.0f/dims.iRaysPerVoxelDim;
+ const float fSubStep = 1.0f/iRaysPerVoxelDim;
- fOutputScale /= (dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim*dims.iRaysPerVoxelDim);
+ fOutputScale /= (iRaysPerVoxelDim*iRaysPerVoxelDim*iRaysPerVoxelDim);
for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f)
@@ -201,11 +201,11 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
const float fCvc = gC_C[8*angle+7];
float fXs = fX;
- for (int iSubX = 0; iSubX < dims.iRaysPerVoxelDim; ++iSubX) {
+ for (int iSubX = 0; iSubX < iRaysPerVoxelDim; ++iSubX) {
float fYs = fY;
- for (int iSubY = 0; iSubY < dims.iRaysPerVoxelDim; ++iSubY) {
+ for (int iSubY = 0; iSubY < iRaysPerVoxelDim; ++iSubY) {
float fZs = fZ;
- for (int iSubZ = 0; iSubZ < dims.iRaysPerVoxelDim; ++iSubZ) {
+ for (int iSubZ = 0; iSubZ < iRaysPerVoxelDim; ++iSubZ) {
const float fU = fCuc + fXs * fCux + fYs * fCuy + fZs * fCuz;
const float fV = fCvc + fXs * fCvx + fYs * fCvy + fZs * fCvz;
@@ -228,7 +228,7 @@ __global__ void dev_par3D_BP_SS(void* D_volData, unsigned int volPitch, int star
bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
bindProjDataTexture(D_projArray);
@@ -274,10 +274,10 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
for (unsigned int i = 0; i < angleCount; i += g_anglesPerBlock) {
// printf("Calling BP: %d, %dx%d, %dx%d to %p\n", i, dimBlock.x, dimBlock.y, dimGrid.x, dimGrid.y, (void*)D_volumeData.ptr);
- if (dims.iRaysPerVoxelDim == 1)
- dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ if (params.iRaysPerVoxelDim == 1)
+ dev_par3D_BP<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.fOutputScale);
else
- dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, fOutputScale);
+ dev_par3D_BP_SS<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, th, dims, params.iRaysPerVoxelDim, params.fOutputScale);
}
cudaTextForceKernelsCompletion();
@@ -293,14 +293,14 @@ bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer projections to array
cudaArray* cuArray = allocateProjectionArray(dims);
transferProjectionsToArray(D_projData, cuArray, dims);
- bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, fOutputScale);
+ bool ret = Par3DBP_Array(D_volumeData, cuArray, dims, angles, params);
cudaFreeArray(cuArray);
diff --git a/cuda/3d/par3d_bp.h b/cuda/3d/par3d_bp.h
index f1fc62d..3efad19 100644
--- a/cuda/3d/par3d_bp.h
+++ b/cuda/3d/par3d_bp.h
@@ -34,12 +34,12 @@ namespace astraCUDA3d {
_AstraExport bool Par3DBP_Array(cudaPitchedPtr D_volumeData,
cudaArray *D_projArray,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DBP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index 3ce3d42..fde9c2c 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -218,7 +218,7 @@ template<class COORD>
__global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
unsigned int startSlice,
unsigned int startAngle, unsigned int endAngle,
- const SDimensions3D dims, float fOutputScale)
+ const SDimensions3D dims, int iRaysPerDetDim, float fOutputScale)
{
COORD c;
@@ -251,7 +251,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
if (endSlice > c.nSlices(dims))
endSlice = c.nSlices(dims);
- const float fSubStep = 1.0f/dims.iRaysPerDetDim;
+ const float fSubStep = 1.0f/iRaysPerDetDim;
for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV)
{
@@ -259,9 +259,9 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
float fV = 0.0f;
float fdU = detectorU - 0.5f + 0.5f*fSubStep;
- for (int iSubU = 0; iSubU < dims.iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
+ for (int iSubU = 0; iSubU < iRaysPerDetDim; ++iSubU, fdU+=fSubStep) {
float fdV = detectorV - 0.5f + 0.5f*fSubStep;
- for (int iSubV = 0; iSubV < dims.iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
+ for (int iSubV = 0; iSubV < iRaysPerDetDim; ++iSubV, fdV+=fSubStep) {
/* Trace ray in direction Ray to (detectorU,detectorV) from */
/* X = startSlice to X = endSlice */
@@ -301,7 +301,7 @@ __global__ void par3D_FP_SS_t(float* D_projData, unsigned int projPitch,
}
}
- D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (dims.iRaysPerDetDim * dims.iRaysPerDetDim);
+ D_projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] += fV / (iRaysPerDetDim * iRaysPerDetDim);
}
}
@@ -401,7 +401,7 @@ __global__ void par3D_FP_SumSqW_t(float* D_projData, unsigned int projPitch,
bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
const SDimensions3D& dims, unsigned int angleCount, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[dims.iProjAngles];
@@ -473,22 +473,22 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
- par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ par3D_FP_SS_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.iRaysPerDetDim, params.fOutputScale);
}
}
@@ -514,7 +514,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
bool Par3DFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer volume to array
cudaArray* cuArray = allocateVolumeArray(dims);
@@ -533,7 +533,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,
ret = Par3DFP_Array_internal(D_subprojData,
dims, iEndAngle - iAngle, angles + iAngle,
- fOutputScale);
+ params);
if (!ret)
break;
}
@@ -548,7 +548,7 @@ bool Par3DFP(cudaPitchedPtr D_volumeData,
bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale)
+ const SProjectorParams3D& params)
{
// transfer angles to constant memory
float* tmp = new float[dims.iProjAngles];
@@ -620,8 +620,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
if (blockDirection == 0) {
for (unsigned int i = 0; i < dims.iVolX; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_X><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
#if 0
par3D_FP_SS_SumSqW_dirX<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
@@ -630,8 +630,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
#endif
} else if (blockDirection == 1) {
for (unsigned int i = 0; i < dims.iVolY; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_Y><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
#if 0
par3D_FP_SS_SumSqW_dirY<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
@@ -640,8 +640,8 @@ bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
#endif
} else if (blockDirection == 2) {
for (unsigned int i = 0; i < dims.iVolZ; i += g_blockSlices)
- if (dims.iRaysPerDetDim == 1)
- par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
+ if (params.iRaysPerDetDim == 1)
+ par3D_FP_SumSqW_t<DIR_Z><<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, params.fOutputScale);
else
#if 0
par3D_FP_SS_SumSqW_dirZ<<<dimGrid, dimBlock, 0, stream>>>((float*)D_projData.ptr, D_projData.pitch/sizeof(float), i, blockStart, blockEnd, dims, fOutputScale);
diff --git a/cuda/3d/par3d_fp.h b/cuda/3d/par3d_fp.h
index 41f204f..5f65cd9 100644
--- a/cuda/3d/par3d_fp.h
+++ b/cuda/3d/par3d_fp.h
@@ -34,17 +34,17 @@ namespace astraCUDA3d {
_AstraExport bool Par3DFP_Array(cudaArray *D_volArray,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DFP(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
_AstraExport bool Par3DFP_SumSqW(cudaPitchedPtr D_volumeData,
cudaPitchedPtr D_projData,
const SDimensions3D& dims, const SPar3DProjection* angles,
- float fOutputScale);
+ const SProjectorParams3D& params);
}
diff --git a/cuda/3d/sirt3d.cu b/cuda/3d/sirt3d.cu
index 484521e..4cb35c3 100644
--- a/cuda/3d/sirt3d.cu
+++ b/cuda/3d/sirt3d.cu
@@ -347,7 +347,7 @@ bool doSIRT(cudaPitchedPtr& D_volumeData,
SIRT sirt;
bool ok = true;
- ok &= sirt.setConeGeometry(dims, angles, 1.0f);
+ ok &= sirt.setConeGeometry(dims, angles, SProjectorParams3D());
if (D_maskData.ptr)
ok &= sirt.enableVolumeMask();