diff options
-rw-r--r-- | cuda/2d/astra.cu | 2 | ||||
-rw-r--r-- | cuda/2d/fft.cu | 45 | ||||
-rw-r--r-- | cuda/3d/astra3d.cu | 80 | ||||
-rw-r--r-- | cuda/3d/astra3d.h | 7 | ||||
-rw-r--r-- | cuda/3d/fdk.cu | 250 | ||||
-rw-r--r-- | cuda/3d/fdk.h | 8 | ||||
-rw-r--r-- | cuda/3d/mem3d.cu | 28 | ||||
-rw-r--r-- | cuda/3d/mem3d.h | 1 | ||||
-rw-r--r-- | include/astra/CompositeGeometryManager.h | 9 | ||||
-rw-r--r-- | src/CompositeGeometryManager.cpp | 50 | ||||
-rw-r--r-- | src/CudaFDKAlgorithm3D.cpp | 9 |
11 files changed, 240 insertions, 249 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index 7317d69..b56ddf9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -343,7 +343,7 @@ bool AstraFBP::run() dims3d.iProjV = 1; astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, - pData->fOriginDetectorDistance, 0.0f, 0.0f, + pData->fOriginDetectorDistance, 0.0f, pData->dims.fDetScale, 1.0f, // TODO: Are these correct? pData->bShortScan, dims3d, pData->angles); } diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2bfd493..a84b2cc 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -35,6 +35,7 @@ $Id$ #include <fstream> #include "../../include/astra/Logging.h" +#include "../../include/astra/Fourier.h" using namespace astra; @@ -303,6 +304,8 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, float * pfFilt = new float[_iFFTFourierDetectorCount]; float * pfW = new float[_iFFTFourierDetectorCount]; +#if 1 + for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) { float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; @@ -314,6 +317,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, // w = 2*pi*(0:size(filt,2)-1)/order pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex; } +#else + + float *pfRealIn = new float[_iFFTRealDetectorCount]; + float *pfImagIn = new float[_iFFTRealDetectorCount]; + float *pfRealOut = new float[_iFFTRealDetectorCount]; + float *pfImagOut = new float[_iFFTRealDetectorCount]; + + for (int i = 0; i < _iFFTRealDetectorCount; ++i) { + pfImagIn[i] = 0.0f; + pfRealOut[i] = 0.0f; + pfImagOut[i] = 0.0f; + + if (i & 1) { + int j = i; + if (2*j > _iFFTRealDetectorCount) + j = _iFFTRealDetectorCount - j; + float f = M_PI * j; + pfRealIn[i] = -1 / (f*f); + } else { + pfRealIn[i] = 0.0f; + } + } + + pfRealIn[0] = 0.25f; + + fastTwoPowerFourierTransform1D(_iFFTRealDetectorCount, pfRealIn, pfImagIn, + pfRealOut, pfImagOut, 1, 1, false); + + for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++) + { + float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount; + + pfFilt[iDetectorIndex] = 2.0f * pfRealOut[iDetectorIndex]; + pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex; + } + + delete[] pfRealIn; + delete[] pfImagIn; + delete[] pfRealOut; + delete[] pfImagOut; + +#endif switch(_eFilter) { diff --git a/cuda/3d/astra3d.cu b/cuda/3d/astra3d.cu index dd6d551..91f4530 100644 --- a/cuda/3d/astra3d.cu +++ b/cuda/3d/astra3d.cu @@ -1291,84 +1291,4 @@ bool astraCudaBP_SIRTWeighted(float* pfVolume, -bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling) -{ - SDimensions3D dims; - SProjectorParams3D params; - - params.iRaysPerVoxelDim = iVoxelSuperSampling; - - bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - - // TODO: Check that pVolGeom is normalized, since we don't support - // other volume geometries yet - - if (!ok) - return false; - - - if (iVoxelSuperSampling == 0) - return false; - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - return false; - } - - cudaPitchedPtr D_volumeData = allocateVolumeData(dims); - ok = D_volumeData.ptr; - if (!ok) - return false; - - cudaPitchedPtr D_projData = allocateProjectionData(dims); - ok = D_projData.ptr; - if (!ok) { - cudaFree(D_volumeData.ptr); - return false; - } - - ok &= copyProjectionsToDevice(pfProjections, D_projData, dims, dims.iProjU); - - ok &= zeroVolumeData(D_volumeData, dims); - - if (!ok) { - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - return false; - } - - float fOriginSourceDistance = pProjGeom->getOriginSourceDistance(); - float fOriginDetectorDistance = pProjGeom->getOriginDetectorDistance(); - float fDetUSize = pProjGeom->getDetectorSpacingX(); - float fDetVSize = pProjGeom->getDetectorSpacingY(); - const float *pfAngles = pProjGeom->getProjectionAngles(); - - - // TODO: Offer interface for SrcZ, DetZ - // TODO: Voxel scaling - ok &= FDK(D_volumeData, D_projData, fOriginSourceDistance, - fOriginDetectorDistance, 0, 0, fDetUSize, fDetVSize, - dims, pfAngles, bShortScan); - - ok &= copyVolumeFromDevice(pfVolume, D_volumeData, dims, dims.iVolX); - - - cudaFree(D_volumeData.ptr); - cudaFree(D_projData.ptr); - - return ok; - -} - - - - } diff --git a/cuda/3d/astra3d.h b/cuda/3d/astra3d.h index 3345ab8..93bf576 100644 --- a/cuda/3d/astra3d.h +++ b/cuda/3d/astra3d.h @@ -309,13 +309,6 @@ _AstraExport bool astraCudaBP_SIRTWeighted(float* pfVolume, const float* pfProje const CProjectionGeometry3D* pProjGeom, int iGPUIndex, int iVoxelSuperSampling); -_AstraExport bool astraCudaFDK(float* pfVolume, const float* pfProjections, - const CVolumeGeometry3D* pVolGeom, - const CConeProjectionGeometry3D* pProjGeom, - bool bShortScan, - int iGPUIndex, int iVoxelSuperSampling); - - } diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..d847eee 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -41,8 +41,11 @@ $Id$ #include "dims3d.h" #include "arith3d.h" +#include "cone_bp.h" #include "../2d/fft.h" +#include "../../include/astra/Logging.h" + typedef texture<float, 3, cudaReadModeElementType> texture3D; static texture3D gT_coneProjTexture; @@ -86,129 +89,7 @@ static bool bindProjDataTexture(const cudaArray* array) } -__global__ void devBP_FDK(void* D_volData, unsigned int volPitch, int startAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fInvDetUSize, float fInvDetVSize, const SDimensions3D dims) -{ - float* volData = (float*)D_volData; - - int endAngle = startAngle + g_anglesPerBlock; - if (endAngle > dims.iProjAngles) - endAngle = dims.iProjAngles; - - // threadIdx: x = rel x - // y = rel y - - // blockIdx: x = x + y - // y = z - - - // TO TRY: precompute part of detector intersection formulas in shared mem? - // TO TRY: inner loop over z, gather ray values in shared mem - - const int X = blockIdx.x % ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockX + threadIdx.x; - const int Y = blockIdx.x / ((dims.iVolX+g_volBlockX-1)/g_volBlockX) * g_volBlockY + threadIdx.y; - - if (X > dims.iVolX) - return; - if (Y > dims.iVolY) - return; - - const int startZ = blockIdx.y * g_volBlockZ; - int endZ = startZ + g_volBlockZ; - if (endZ > dims.iVolZ) - endZ = dims.iVolZ; - - float fX = X - 0.5f*dims.iVolX + 0.5f; - float fY = Y - 0.5f*dims.iVolY + 0.5f; - float fZ = startZ - 0.5f*dims.iVolZ + 0.5f - fSrcZ; - - const float fU_base = 0.5f*dims.iProjU - 0.5f + 0.5f; - const float fV_base = 0.5f*dims.iProjV - 0.5f + 0.5f + (fDetZ-fSrcZ); - - // Note re. fZ/rV_base: the computations below are all relative to the - // optical axis, so we do the Z-adjustments beforehand. - - for (int Z = startZ; Z < endZ; ++Z, fZ += 1.0f) - { - - float fVal = 0.0f; - float fAngle = startAngle + 0.5f; - - for (int angle = startAngle; angle < endAngle; ++angle, fAngle += 1.0f) - { - - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - - const float fR = fSrcOrigin; - const float fD = fR - fX * sin_theta + fY * cos_theta; - float fWeight = fR / fD; - fWeight *= fWeight; - - const float fScaleFactor = (fR + fDetOrigin) / fD; - const float fU = fU_base + (fX*cos_theta+fY*sin_theta) * fScaleFactor * fInvDetUSize; - const float fV = fV_base + fZ * fScaleFactor * fInvDetVSize; - - fVal += tex3D(gT_coneProjTexture, fU, fAngle, fV); - - } - - volData[(Z*dims.iVolY+Y)*volPitch+X] += fVal; -// projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU] = 10.0f; -// if (threadIdx.x == 0 && threadIdx.y == 0) { printf("%d,%d,%d [%d / %d] -> %f\n", angle, detectorU, detectorV, (angle*dims.iProjV+detectorV)*projPitch+detectorU, projPitch, projData[(angle*dims.iProjV+detectorV)*projPitch+detectorU]); } - } - -} - - -bool FDK_BP(cudaPitchedPtr D_volumeData, - cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles) -{ - // transfer projections to array - - cudaArray* cuArray = allocateProjectionArray(dims); - transferProjectionsToArray(D_projData, cuArray, dims); - - bindProjDataTexture(cuArray); - - float* angle_sin = new float[dims.iProjAngles]; - float* angle_cos = new float[dims.iProjAngles]; - - for (unsigned int i = 0; i < dims.iProjAngles; ++i) { - angle_sin[i] = sinf(angles[i]); - angle_cos[i] = cosf(angles[i]); - } - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - assert(e1 == cudaSuccess); - assert(e2 == cudaSuccess); - - delete[] angle_sin; - delete[] angle_cos; - - dim3 dimBlock(g_volBlockX, g_volBlockY); - - dim3 dimGrid(((dims.iVolX+g_volBlockX-1)/g_volBlockX)*((dims.iVolY+g_volBlockY-1)/g_volBlockY), (dims.iVolZ+g_volBlockZ-1)/g_volBlockZ); - - // timeval t; - // tic(t); - - for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { - devBP_FDK<<<dimGrid, dimBlock>>>(D_volumeData.ptr, D_volumeData.pitch/sizeof(float), i, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, 1.0f / fDetUSize, 1.0f / fDetVSize, dims); - } - - cudaTextForceKernelsCompletion(); - - cudaFreeArray(cuArray); - - // printf("%f\n", toc(t)); - - return true; -} - -__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, const SDimensions3D dims) +__global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fZShift, float fDetUSize, float fDetVSize, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -230,21 +111,26 @@ __global__ void devFDK_preweight(void* D_projData, unsigned int projPitch, unsig const float fT = fCentralRayLength * fCentralRayLength + fU * fU; - float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fDetZ - fSrcZ; + float fV = (startDetectorV - 0.5f*dims.iProjV + 0.5f) * fDetVSize + fZShift; + + //const float fW = fCentralRayLength; + //const float fW = fCentralRayLength * (M_PI / 2.0f) / (float)dims.iProjAngles; + const float fW1 = fSrcOrigin * fDetUSize * fDetVSize; + const float fW = fCentralRayLength * fW1 * fW1 * (M_PI / 2.0f) / (float)dims.iProjAngles; for (int detectorV = startDetectorV; detectorV < endDetectorV; ++detectorV) { const float fRayLength = sqrtf(fT + fV * fV); - const float fWeight = fCentralRayLength / fRayLength; + const float fWeight = fW / fRayLength; projData[(detectorV*dims.iProjAngles+angle)*projPitch+detectorU] *= fWeight; - fV += 1.0f; + fV += fDetVSize; } } -__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fSrcZ, float fDetZ, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) +__global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, unsigned int startAngle, unsigned int endAngle, float fSrcOrigin, float fDetOrigin, float fDetUSize, float fCentralFanAngle, const SDimensions3D dims) { float* projData = (float*)D_projData; int angle = startAngle + blockIdx.y * g_anglesPerWeightBlock + threadIdx.y; @@ -305,7 +191,7 @@ __global__ void devFDK_ParkerWeight(void* D_projData, unsigned int projPitch, un // Perform the FDK pre-weighting and filtering bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles) { @@ -318,23 +204,57 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, int projPitch = D_projData.pitch/sizeof(float); - devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, dims); + devFDK_preweight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fZShift, fDetUSize, fDetVSize, dims); cudaTextForceKernelsCompletion(); - if (bShortScan) { + if (bShortScan && dims.iProjAngles > 1) { + ASTRA_DEBUG("Doing Parker weighting"); // We do short-scan Parker weighting - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, angles, + // First, determine (in a very basic way) the interval that's + // been scanned. We assume angles[0] is one of the endpoints of the + // range. + float fdA = angles[1] - angles[0]; + + while (fdA < -M_PI) + fdA += 2*M_PI; + while (fdA >= M_PI) + fdA -= 2*M_PI; + + float fAngleBase; + if (fdA >= 0.0f) { + // going up from angles[0] + fAngleBase = angles[dims.iProjAngles - 1]; + } else { + // going down from angles[0] + fAngleBase = angles[0]; + } + + // We pick the highest end of the range, and then + // move all angles so they fall in (-2pi,0] + + float *fRelAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + float f = angles[i] - fAngleBase; + while (f > 0) + f -= 2*M_PI; + while (f <= -2*M_PI) + f += 2*M_PI; + fRelAngles[i] = f; + + } + + cudaError_t e1 = cudaMemcpyToSymbol(gC_angle, fRelAngles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); assert(!e1); + delete[] fRelAngles; - // TODO: detector pixel size! - float fCentralFanAngle = atanf((dims.iProjU*0.5f) / + float fCentralFanAngle = atanf(fDetUSize * (dims.iProjU*0.5f) / (fSrcOrigin + fDetOrigin)); - devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fCentralFanAngle, dims); + devFDK_ParkerWeight<<<dimGrid, dimBlock>>>(D_projData.ptr, projPitch, 0, dims.iProjAngles, fSrcOrigin, fDetOrigin, fDetUSize, fCentralFanAngle, dims); } @@ -344,10 +264,7 @@ bool FDK_PreWeight(cudaPitchedPtr D_projData, bool FDK_Filter(cudaPitchedPtr D_projData, cufftComplex * D_filter, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, - float fDetUSize, float fDetVSize, bool bShortScan, - const SDimensions3D& dims, const float* angles) + const SDimensions3D& dims) { // The filtering is a regular ramp filter per detector line. @@ -392,9 +309,8 @@ bool FDK_Filter(cudaPitchedPtr D_projData, bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan) + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan) { bool ok; // Generate filter @@ -403,12 +319,45 @@ bool FDK(cudaPitchedPtr D_volumeData, int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + + // NB: We don't support arbitrary cone_vec geometries here. + // Only those that are vertical sub-geometries + // (cf. CompositeGeometryManager) of regular cone geometries. + assert(dims.iProjAngles > 0); + const SConeProjection& p0 = angles[0]; + + // assuming U is in the XY plane, V is parallel to Z axis + float fDetCX = p0.fDetSX + 0.5*dims.iProjU*p0.fDetUX; + float fDetCY = p0.fDetSY + 0.5*dims.iProjU*p0.fDetUY; + float fDetCZ = p0.fDetSZ + 0.5*dims.iProjV*p0.fDetVZ; + + float fSrcOrigin = sqrt(p0.fSrcX*p0.fSrcX + p0.fSrcY*p0.fSrcY); + float fDetOrigin = sqrt(fDetCX*fDetCX + fDetCY*fDetCY); + float fDetUSize = sqrt(p0.fDetUX*p0.fDetUX + p0.fDetUY*p0.fDetUY); + float fDetVSize = abs(p0.fDetVZ); + + float fZShift = fDetCZ - p0.fSrcZ; + + float *pfAngles = new float[dims.iProjAngles]; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + // FIXME: Sign/order + pfAngles[i] = -atan2(angles[i].fSrcX, angles[i].fSrcY) + M_PI; + } + + +#if 1 ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + fZShift, fDetUSize, fDetVSize, + bShortScan, dims, pfAngles); +#else + ok = true; +#endif + delete[] pfAngles; + if (!ok) return false; +#if 1 cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize]; memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); @@ -425,28 +374,25 @@ bool FDK(cudaPitchedPtr D_volumeData, - ok = FDK_Filter(D_projData, D_filter, fSrcOrigin, fDetOrigin, - fSrcZ, fDetZ, fDetUSize, fDetVSize, - bShortScan, dims, angles); + ok = FDK_Filter(D_projData, D_filter, dims); // Clean up filter freeComplexOnDevice(D_filter); - +#endif if (!ok) return false; // Perform BP - ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, - fDetUSize, fDetVSize, dims, angles); + params.bFDKWeighting = true; + + //ok = FDK_BP(D_volumeData, D_projData, fSrcOrigin, fDetOrigin, 0.0f, 0.0f, fDetUSize, fDetVSize, dims, pfAngles); + ok = ConeBP(D_volumeData, D_projData, dims, angles, params); if (!ok) return false; - processVol3D<opMul>(D_volumeData, - (M_PI / 2.0f) / (float)dims.iProjAngles, dims); - return true; } diff --git a/cuda/3d/fdk.h b/cuda/3d/fdk.h index de7fe53..8d2a128 100644 --- a/cuda/3d/fdk.h +++ b/cuda/3d/fdk.h @@ -35,16 +35,14 @@ namespace astraCUDA3d { bool FDK_PreWeight(cudaPitchedPtr D_projData, float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, + float fZShift, float fDetUSize, float fDetVSize, bool bShortScan, const SDimensions3D& dims, const float* angles); bool FDK(cudaPitchedPtr D_volumeData, cudaPitchedPtr D_projData, - float fSrcOrigin, float fDetOrigin, - float fSrcZ, float fDetZ, float fDetUSize, float fDetVSize, - const SDimensions3D& dims, const float* angles, bool bShortScan); - + const SConeProjection* angles, + const SDimensions3D& dims, SProjectorParams3D params, bool bShortScan); } diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 9a239da..4fb30a2 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -38,6 +38,7 @@ $Id$ #include "cone_bp.h" #include "par3d_fp.h" #include "par3d_bp.h" +#include "fdk.h" #include "astra/Logging.h" @@ -278,6 +279,33 @@ bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con } +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan) +{ + SDimensions3D dims; + SProjectorParams3D params; + + bool ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); + if (!ok) + return false; + + SPar3DProjection* pParProjs; + SConeProjection* pConeProjs; + + ok = convertAstraGeometry(pVolGeom, pProjGeom, + pParProjs, pConeProjs, + params); + + if (!ok || !pConeProjs) + return false; + + ok &= FDK(volData.d->ptr, projData.d->ptr, pConeProjs, dims, params, bShortScan); + + return ok; + + + +} + diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 6fff80b..2d0fb27 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -95,6 +95,7 @@ bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, con bool BP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iVoxelSuperSampling); +bool FDK(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, bool bShortScan); } diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 18dd72f..064370a 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -55,6 +55,10 @@ struct SGPUParams { size_t memory; }; +struct SFDKSettings { + bool bShortScan; +}; + class _AstraExport CCompositeGeometryManager { public: @@ -127,9 +131,10 @@ public: CProjector3D *pProjector; // For a `global' geometry. It will not match // the geometries of the input and output. + SFDKSettings FDKSettings; enum { - JOB_FP, JOB_BP, JOB_NOP + JOB_FP, JOB_BP, JOB_FDK, JOB_NOP } eType; enum { MODE_ADD, MODE_SET @@ -155,6 +160,8 @@ public: CFloat32ProjectionData3DMemory *pProjData); bool doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, CFloat32ProjectionData3DMemory *pProjData); + bool doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData, bool bShortScan); bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 084ba8c..c5b4d3b 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -146,6 +146,7 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div newjob.eType = j->eType; newjob.eMode = j->eMode; newjob.pProjector = j->pProjector; + newjob.FDKSettings = j->FDKSettings; CPart* input = job.pInput->reduce(outputPart.get()); @@ -992,6 +993,25 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat return doJobs(L); } + +bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData, bool bShortScan) +{ + if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) { + ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required"); + return false; + } + + SJob job = createJobBP(pProjector, pVolData, pProjData); + job.eType = SJob::JOB_FDK; + job.FDKSettings.bShortScan = bShortScan; + + TJobList L; + L.push_back(job); + + return doJobs(L); +} + bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData) { ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume"); @@ -1185,7 +1205,9 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + switch (j.eType) { + case CCompositeGeometryManager::SJob::JOB_FP: + { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); @@ -1194,7 +1216,10 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); if (!ok) ASTRA_ERROR("Error performing sub-FP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + } + break; + case CCompositeGeometryManager::SJob::JOB_BP: + { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); @@ -1203,7 +1228,26 @@ static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); if (!ok) ASTRA_ERROR("Error performing sub-BP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { + } + break; + case CCompositeGeometryManager::SJob::JOB_FDK: + { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + + if (srcdims.subx || srcdims.suby) { + ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK"); + ok = false; + } else { + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK"); + + ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, j.FDKSettings.bShortScan); + if (!ok) ASTRA_ERROR("Error performing sub-FDK"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done"); + } + } + break; + default: assert(false); } diff --git a/src/CudaFDKAlgorithm3D.cpp b/src/CudaFDKAlgorithm3D.cpp index e101a42..c7c8ed5 100644 --- a/src/CudaFDKAlgorithm3D.cpp +++ b/src/CudaFDKAlgorithm3D.cpp @@ -32,6 +32,7 @@ $Id$ #include "astra/CudaProjector3D.h" #include "astra/ConeProjectionGeometry3D.h" +#include "astra/CompositeGeometryManager.h" #include "astra/Logging.h" @@ -206,6 +207,7 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations) ASTRA_ASSERT(pReconMem); +#if 0 bool ok = true; ok = astraCudaFDK(pReconMem->getData(), pSinoMem->getDataConst(), @@ -213,6 +215,13 @@ void CCudaFDKAlgorithm3D::run(int _iNrIterations) m_bShortScan, m_iGPUIndex, m_iVoxelSuperSampling); ASTRA_ASSERT(ok); +#endif + + CCompositeGeometryManager cgm; + + cgm.doFDK(m_pProjector, pReconMem, pSinoMem, m_bShortScan); + + } //---------------------------------------------------------------------------------------- |