diff options
author | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-07-29 12:03:38 +0200 |
---|---|---|
committer | Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl> | 2016-07-29 12:11:52 +0200 |
commit | b1ffc11d930c19bd73af9837a08bc8dde9fe8e72 (patch) | |
tree | 40ce622ffdc8d75c885135db1ce4773c9670e2c3 | |
parent | b2611a03577c285ddf48edab0d05dafa09ab362c (diff) | |
download | astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.gz astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.bz2 astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.tar.xz astra-b1ffc11d930c19bd73af9837a08bc8dde9fe8e72.zip |
Add CUDA parvec support
36 files changed, 1179 insertions, 1715 deletions
diff --git a/astra_vc09.vcproj b/astra_vc09.vcproj index f2cf62a..428bf8d 100644 --- a/astra_vc09.vcproj +++ b/astra_vc09.vcproj @@ -1077,6 +1077,10 @@ > </File> <File + RelativePath=".\include\astra\ParallelVecProjectionGeometry2D.h" + > + </File> + <File RelativePath=".\include\astra\ParallelVecProjectionGeometry3D.h" > </File> @@ -1121,6 +1125,10 @@ > </File> <File + RelativePath=".\src\GeometryUtil2D.cpp" + > + </File> + <File RelativePath=".\src\GeometryUtil3D.cpp" > </File> @@ -1133,6 +1141,10 @@ > </File> <File + RelativePath=".\src\ParallelVecProjectionGeometry2D.cpp" + > + </File> + <File RelativePath=".\src\ParallelVecProjectionGeometry3D.cpp" > </File> @@ -2185,6 +2197,10 @@ > </File> <File + RelativePath=".\cuda\2d\fbp.h" + > + </File> + <File RelativePath=".\cuda\2d\fft.h" > </File> @@ -2557,6 +2573,42 @@ </FileConfiguration> </File> <File + RelativePath=".\cuda\2d\fbp.cu" + > + <FileConfiguration + Name="Debug|Win32" + ExcludedFromBuild="true" + > + <Tool + Name="Cudart Build Rule" + /> + </FileConfiguration> + <FileConfiguration + Name="Debug|x64" + ExcludedFromBuild="true" + > + <Tool + Name="Cudart Build Rule" + /> + </FileConfiguration> + <FileConfiguration + Name="Release|Win32" + ExcludedFromBuild="true" + > + <Tool + Name="Cudart Build Rule" + /> + </FileConfiguration> + <FileConfiguration + Name="Release|x64" + ExcludedFromBuild="true" + > + <Tool + Name="Cudart Build Rule" + /> + </FileConfiguration> + </File> + <File RelativePath=".\cuda\2d\fft.cu" > <FileConfiguration diff --git a/astra_vc11.vcxproj b/astra_vc11.vcxproj index cd73076..d48796b 100644 --- a/astra_vc11.vcxproj +++ b/astra_vc11.vcxproj @@ -529,6 +529,7 @@ <ClCompile Include="src\Float32VolumeData3DMemory.cpp" /> <ClCompile Include="src\ForwardProjectionAlgorithm.cpp" /> <ClCompile Include="src\Fourier.cpp" /> + <ClCompile Include="src\GeometryUtil2D.cpp" /> <ClCompile Include="src\GeometryUtil3D.cpp" /> <ClCompile Include="src\Globals.cpp" /> <ClCompile Include="src\Logging.cpp" /> @@ -569,6 +570,7 @@ <ClInclude Include="cuda\2d\em.h" /> <ClInclude Include="cuda\2d\fan_bp.h" /> <ClInclude Include="cuda\2d\fan_fp.h" /> + <ClInclude Include="cuda\2d\fbp.h" /> <ClInclude Include="cuda\2d\fbp_filters.h" /> <ClInclude Include="cuda\2d\fft.h" /> <ClInclude Include="cuda\2d\par_bp.h" /> @@ -652,8 +654,8 @@ <ClInclude Include="include\astra\ParallelBeamStripKernelProjector2D.h" /> <ClInclude Include="include\astra\ParallelProjectionGeometry2D.h" /> <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h" /> - <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" /> <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h" /> + <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" /> <ClInclude Include="include\astra\PlatformDepSystemCode.h" /> <ClInclude Include="include\astra\PluginAlgorithm.h" /> <ClInclude Include="include\astra\ProjectionGeometry2D.h" /> @@ -727,6 +729,12 @@ <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> </CudaCompile> + <CudaCompile Include="cuda\2d\fbp.cu"> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild> + <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild> + </CudaCompile> <CudaCompile Include="cuda\2d\fft.cu"> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild> <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild> diff --git a/astra_vc11.vcxproj.filters b/astra_vc11.vcxproj.filters index 4143cd9..d6748f1 100644 --- a/astra_vc11.vcxproj.filters +++ b/astra_vc11.vcxproj.filters @@ -25,6 +25,9 @@ <CudaCompile Include="cuda\2d\fan_fp.cu"> <Filter>CUDA\cuda source</Filter> </CudaCompile> + <CudaCompile Include="cuda\2d\fbp.cu"> + <Filter>CUDA\cuda source</Filter> + </CudaCompile> <CudaCompile Include="cuda\2d\fft.cu"> <Filter>CUDA\cuda source</Filter> </CudaCompile> @@ -198,6 +201,9 @@ <ClCompile Include="src\FanFlatVecProjectionGeometry2D.cpp"> <Filter>Geometries\source</Filter> </ClCompile> + <ClCompile Include="src\GeometryUtil2D.cpp"> + <Filter>Geometries\source</Filter> + </ClCompile> <ClCompile Include="src\GeometryUtil3D.cpp"> <Filter>Geometries\source</Filter> </ClCompile> @@ -207,6 +213,9 @@ <ClCompile Include="src\ParallelProjectionGeometry3D.cpp"> <Filter>Geometries\source</Filter> </ClCompile> + <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp"> + <Filter>Geometries\source</Filter> + </ClCompile> <ClCompile Include="src\ParallelVecProjectionGeometry3D.cpp"> <Filter>Geometries\source</Filter> </ClCompile> @@ -321,10 +330,6 @@ <ClCompile Include="src\CudaSirtAlgorithm3D.cpp"> <Filter>CUDA\astra source</Filter> </ClCompile> - <ClCompile Include="src\GeometryUtil3D.cpp" /> - <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp"> - <Filter>Geometries\source</Filter> - </ClCompile> </ItemGroup> <ItemGroup> <ClInclude Include="include\astra\Algorithm.h"> @@ -474,6 +479,9 @@ <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h"> <Filter>Geometries\headers</Filter> </ClInclude> + <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h"> + <Filter>Geometries\headers</Filter> + </ClInclude> <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h"> <Filter>Geometries\headers</Filter> </ClInclude> @@ -615,6 +623,9 @@ <ClInclude Include="cuda\2d\fbp_filters.h"> <Filter>CUDA\cuda headers</Filter> </ClInclude> + <ClInclude Include="cuda\2d\fbp.h"> + <Filter>CUDA\cuda headers</Filter> + </ClInclude> <ClInclude Include="cuda\2d\fft.h"> <Filter>CUDA\cuda headers</Filter> </ClInclude> @@ -675,11 +686,6 @@ <ClInclude Include="cuda\3d\util3d.h"> <Filter>CUDA\cuda headers</Filter> </ClInclude> - <ClInclude Include="include\astra\GeometryUtil2D.h" /> - <ClInclude Include="include\astra\GeometryUtil3D.h" /> - <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h"> - <Filter>Geometries\headers</Filter> - </ClInclude> </ItemGroup> <ItemGroup> <None Include="include\astra\DataProjectorPolicies.inl"> diff --git a/build/linux/Makefile.in b/build/linux/Makefile.in index c72e20f..2af705f 100644 --- a/build/linux/Makefile.in +++ b/build/linux/Makefile.in @@ -139,6 +139,7 @@ BASE_OBJECTS=\ src/Float32VolumeData3DMemory.lo \ src/ForwardProjectionAlgorithm.lo \ src/Fourier.lo \ + src/GeometryUtil2D.lo \ src/GeometryUtil3D.lo \ src/Globals.lo \ src/Logging.lo \ @@ -197,6 +198,7 @@ CUDA_OBJECTS=\ cuda/2d/par_bp.lo \ cuda/2d/fan_fp.lo \ cuda/2d/fan_bp.lo \ + cuda/2d/fbp.lo \ cuda/2d/sirt.lo \ cuda/2d/sart.lo \ cuda/2d/cgls.lo \ diff --git a/build/msvc/gen.py b/build/msvc/gen.py index cc69a62..57f4539 100644 --- a/build/msvc/gen.py +++ b/build/msvc/gen.py @@ -156,6 +156,7 @@ P_astra["filters"]["CUDA\\cuda source"] = [ "cuda\\2d\\em.cu", "cuda\\2d\\fan_bp.cu", "cuda\\2d\\fan_fp.cu", +"cuda\\2d\\fbp.cu", "cuda\\2d\\fft.cu", "cuda\\2d\\par_bp.cu", "cuda\\2d\\par_fp.cu", @@ -225,9 +226,11 @@ P_astra["filters"]["Geometries\\source"] = [ "src\\ConeVecProjectionGeometry3D.cpp", "src\\FanFlatProjectionGeometry2D.cpp", "src\\FanFlatVecProjectionGeometry2D.cpp", +"src\\GeometryUtil2D.cpp", "src\\GeometryUtil3D.cpp", "src\\ParallelProjectionGeometry2D.cpp", "src\\ParallelProjectionGeometry3D.cpp", +"src\\ParallelVecProjectionGeometry2D.cpp", "src\\ParallelVecProjectionGeometry3D.cpp", "src\\ProjectionGeometry2D.cpp", "src\\ProjectionGeometry3D.cpp", @@ -285,6 +288,7 @@ P_astra["filters"]["CUDA\\cuda headers"] = [ "cuda\\2d\\fan_bp.h", "cuda\\2d\\fan_fp.h", "cuda\\2d\\fbp_filters.h", +"cuda\\2d\\fbp.h", "cuda\\2d\\fft.h", "cuda\\2d\\par_bp.h", "cuda\\2d\\par_fp.h", @@ -366,6 +370,7 @@ P_astra["filters"]["Geometries\\headers"] = [ "include\\astra\\GeometryUtil3D.h", "include\\astra\\ParallelProjectionGeometry2D.h", "include\\astra\\ParallelProjectionGeometry3D.h", +"include\\astra\\ParallelVecProjectionGeometry2D.h", "include\\astra\\ParallelVecProjectionGeometry3D.h", "include\\astra\\ProjectionGeometry2D.h", "include\\astra\\ProjectionGeometry3D.h", diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu index dc74e51..bde4f42 100644 --- a/cuda/2d/algo.cu +++ b/cuda/2d/algo.cu @@ -35,13 +35,13 @@ $Id$ #include "fan_bp.h" #include "util.h" #include "arith.h" +#include "astra.h" namespace astraCUDA { ReconAlgo::ReconAlgo() { - angles = 0; - TOffsets = 0; + parProjs = 0; fanProjs = 0; shouldAbort = false; @@ -66,8 +66,7 @@ ReconAlgo::~ReconAlgo() void ReconAlgo::reset() { - delete[] angles; - delete[] TOffsets; + delete[] parProjs; delete[] fanProjs; if (freeGPUMemory) { @@ -77,8 +76,7 @@ void ReconAlgo::reset() cudaFree(D_volumeData); } - angles = 0; - TOffsets = 0; + parProjs = 0; fanProjs = 0; shouldAbort = false; @@ -124,46 +122,40 @@ bool ReconAlgo::enableSinogramMask() } -bool ReconAlgo::setGeometry(const SDimensions& _dims, const float* _angles) +bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom, + const astra::CProjectionGeometry2D* pProjGeom) { - dims = _dims; + bool ok; - angles = new float[dims.iProjAngles]; + ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - memcpy(angles, _angles, sizeof(angles[0]) * dims.iProjAngles); + if (!ok) + return false; + delete[] parProjs; + parProjs = 0; delete[] fanProjs; fanProjs = 0; - return true; -} - -bool ReconAlgo::setFanGeometry(const SDimensions& _dims, - const SFanProjection* _projs) -{ - dims = _dims; - fanProjs = new SFanProjection[dims.iProjAngles]; - - memcpy(fanProjs, _projs, sizeof(fanProjs[0]) * dims.iProjAngles); - - delete[] angles; - angles = 0; + fOutputScale = 1.0f; + ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale); + if (!ok) + return false; return true; } - -bool ReconAlgo::setTOffsets(const float* _TOffsets) +bool ReconAlgo::setSuperSampling(int raysPerDet, int raysPerPixelDim) { - // TODO: determine if they're all zero? - TOffsets = new float[dims.iProjAngles]; - memcpy(TOffsets, _TOffsets, sizeof(angles[0]) * dims.iProjAngles); + if (raysPerDet <= 0 || raysPerPixelDim <= 0) + return false; + + dims.iRaysPerDet = raysPerDet; + dims.iRaysPerPixelDim = raysPerPixelDim; return true; } - - bool ReconAlgo::setVolumeMask(float* _D_maskData, unsigned int _maskPitch) { assert(useVolumeMask); @@ -324,14 +316,14 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, float outputScale) { - if (angles) { + if (parProjs) { assert(!fanProjs); return FP(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets, outputScale); + dims, parProjs, fOutputScale * outputScale); } else { assert(fanProjs); return FanFP(D_volumeData, volumePitch, D_projData, projPitch, - dims, fanProjs, outputScale); + dims, fanProjs, fOutputScale * outputScale); } } @@ -339,10 +331,10 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, float outputScale) { - if (angles) { + if (parProjs) { assert(!fanProjs); return BP(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets, outputScale); + dims, parProjs, outputScale); } else { assert(fanProjs); return FanBP(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h index 99959c8..0b6a066 100644 --- a/cuda/2d/algo.h +++ b/cuda/2d/algo.h @@ -31,6 +31,17 @@ $Id$ #include "util.h" +namespace astra { + +class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D; +class CFanFlatProjectionGeometry2D; +class CFanFlatVecProjectionGeometry2D; +class CVolumeGeometry2D; +class CProjectionGeometry2D; + +} + namespace astraCUDA { class _AstraExport ReconAlgo { @@ -40,11 +51,10 @@ public: bool setGPUIndex(int iGPUIndex); - bool setGeometry(const SDimensions& dims, const float* angles); - bool setFanGeometry(const SDimensions& dims, const SFanProjection* projs); + bool setGeometry(const astra::CVolumeGeometry2D* pVolGeom, + const astra::CProjectionGeometry2D* pProjGeom); - // setTOffsets should (optionally) be called after setGeometry - bool setTOffsets(const float* TOffsets); + bool setSuperSampling(int raysPerDet, int raysPerPixelDim); void signalAbort() { shouldAbort = true; } @@ -123,9 +133,9 @@ protected: SDimensions dims; - float* angles; - float* TOffsets; + SParProjection* parProjs; SFanProjection* fanProjs; + float fOutputScale; volatile bool shouldAbort; diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu index c1e6566..b39e0a9 100644 --- a/cuda/2d/astra.cu +++ b/cuda/2d/astra.cu @@ -42,8 +42,10 @@ $Id$ #include <fstream> #include <cuda.h> +#include "../../include/astra/GeometryUtil2D.h" #include "../../include/astra/VolumeGeometry2D.h" #include "../../include/astra/ParallelProjectionGeometry2D.h" +#include "../../include/astra/ParallelVecProjectionGeometry2D.h" #include "../../include/astra/FanFlatProjectionGeometry2D.h" #include "../../include/astra/FanFlatVecProjectionGeometry2D.h" @@ -64,515 +66,6 @@ enum CUDAProjectionType { }; -class AstraFBP_internal { -public: - SDimensions dims; - float* angles; - float* TOffsets; - astraCUDA::SFanProjection* fanProjections; - - float fOriginSourceDistance; - float fOriginDetectorDistance; - - float fPixelSize; - - bool bFanBeam; - bool bShortScan; - - bool initialized; - bool setStartReconstruction; - - float* D_sinoData; - unsigned int sinoPitch; - - float* D_volumeData; - unsigned int volumePitch; - - cufftComplex * m_pDevFilter; -}; - -AstraFBP::AstraFBP() -{ - pData = new AstraFBP_internal(); - - pData->angles = 0; - pData->fanProjections = 0; - pData->TOffsets = 0; - pData->D_sinoData = 0; - pData->D_volumeData = 0; - - pData->dims.iVolWidth = 0; - pData->dims.iProjAngles = 0; - pData->dims.fDetScale = 1.0f; - pData->dims.iRaysPerDet = 1; - pData->dims.iRaysPerPixelDim = 1; - - pData->initialized = false; - pData->setStartReconstruction = false; - - pData->m_pDevFilter = NULL; -} - -AstraFBP::~AstraFBP() -{ - delete[] pData->angles; - pData->angles = 0; - - delete[] pData->TOffsets; - pData->TOffsets = 0; - - delete[] pData->fanProjections; - pData->fanProjections = 0; - - cudaFree(pData->D_sinoData); - pData->D_sinoData = 0; - - cudaFree(pData->D_volumeData); - pData->D_volumeData = 0; - - if(pData->m_pDevFilter != NULL) - { - freeComplexOnDevice(pData->m_pDevFilter); - pData->m_pDevFilter = NULL; - } - - delete pData; - pData = 0; -} - -bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth, - unsigned int iVolHeight, - float fPixelSize) -{ - if (pData->initialized) - return false; - - pData->dims.iVolWidth = iVolWidth; - pData->dims.iVolHeight = iVolHeight; - - pData->fPixelSize = fPixelSize; - - return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f); -} - -bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles, - unsigned int iProjDets, - const float* pfAngles, - float fDetSize) -{ - if (pData->initialized) - return false; - - pData->dims.iProjAngles = iProjAngles; - pData->dims.iProjDets = iProjDets; - pData->dims.fDetScale = fDetSize / pData->fPixelSize; - - if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) - return false; - - pData->angles = new float[iProjAngles]; - memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0])); - - pData->bFanBeam = false; - - return true; -} - -bool AstraFBP::setFanGeometry(unsigned int iProjAngles, - unsigned int iProjDets, - const astraCUDA::SFanProjection *fanProjs, - const float* pfAngles, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetSize, - bool bShortScan) -{ - // Slightly abusing setProjectionGeometry for this... - if (!setProjectionGeometry(iProjAngles, iProjDets, pfAngles, fDetSize)) - return false; - - pData->fOriginSourceDistance = fOriginSourceDistance; - pData->fOriginDetectorDistance = fOriginDetectorDistance; - - pData->fanProjections = new astraCUDA::SFanProjection[iProjAngles]; - memcpy(pData->fanProjections, fanProjs, iProjAngles * sizeof(fanProjs[0])); - - pData->bFanBeam = true; - pData->bShortScan = bShortScan; - - return true; -} - - -bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling) -{ - if (pData->initialized) - return false; - - if (iPixelSuperSampling == 0) - return false; - - pData->dims.iRaysPerPixelDim = iPixelSuperSampling; - - return true; -} - - -bool AstraFBP::setTOffsets(const float* pfTOffsets) -{ - if (pData->initialized) - return false; - - if (pfTOffsets == 0) - return false; - - pData->TOffsets = new float[pData->dims.iProjAngles]; - memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0])); - - return true; -} - -bool AstraFBP::init(int iGPUIndex) -{ - if (pData->initialized) - { - return false; - } - - if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0) - { - return false; - } - - if (iGPUIndex != -1) { - cudaSetDevice(iGPUIndex); - cudaError_t err = cudaGetLastError(); - - // Ignore errors caused by calling cudaSetDevice multiple times - if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) - { - return false; - } - } - - bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); - if (!ok) - { - return false; - } - - ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims); - if (!ok) - { - cudaFree(pData->D_volumeData); - pData->D_volumeData = 0; - return false; - } - - pData->initialized = true; - - return true; -} - -bool AstraFBP::setSinogram(const float* pfSinogram, - unsigned int iSinogramPitch) -{ - if (!pData->initialized) - return false; - if (!pfSinogram) - return false; - - bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch, - pData->dims, - pData->D_sinoData, pData->sinoPitch); - if (!ok) - return false; - - // rescale sinogram to adjust for pixel size - processSino<opMul>(pData->D_sinoData, - 1.0f/(pData->fPixelSize*pData->fPixelSize), - pData->sinoPitch, pData->dims); - - pData->setStartReconstruction = false; - - return true; -} - -static int calcNextPowerOfTwo(int _iValue) -{ - int iOutput = 1; - - while(iOutput < _iValue) - { - iOutput *= 2; - } - - return iOutput; -} - -bool AstraFBP::run() -{ - if (!pData->initialized) - { - return false; - } - - zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims); - - bool ok = false; - - if (pData->bFanBeam) { - // Call FDK_PreWeight to handle fan beam geometry. We treat - // this as a cone beam setup of a single slice: - - // TODO: TOffsets affects this preweighting... - - // We create a fake cudaPitchedPtr - cudaPitchedPtr tmp; - tmp.ptr = pData->D_sinoData; - tmp.pitch = pData->sinoPitch * sizeof(float); - tmp.xsize = pData->dims.iProjDets; - tmp.ysize = pData->dims.iProjAngles; - // and a fake Dimensions3D - astraCUDA3d::SDimensions3D dims3d; - dims3d.iVolX = pData->dims.iVolWidth; - dims3d.iVolY = pData->dims.iVolHeight; - dims3d.iVolZ = 1; - dims3d.iProjAngles = pData->dims.iProjAngles; - dims3d.iProjU = pData->dims.iProjDets; - dims3d.iProjV = 1; - dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1; - - astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance, - pData->fOriginDetectorDistance, 0.0f, 0.0f, - pData->dims.fDetScale, 1.0f, // TODO: Are these correct? - pData->bShortScan, dims3d, pData->angles); - } - - if (pData->m_pDevFilter) { - - int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); - int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount); - - cufftComplex * pDevComplexSinogram = NULL; - - allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); - - runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); - - applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter); - - runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); - - freeComplexOnDevice(pDevComplexSinogram); - - } - - float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles; - - if (pData->bFanBeam) { - ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale); - - } else { - ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale); - } - if(!ok) - { - return false; - } - - return true; -} - -bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const -{ - if (!pData->initialized) - return false; - - bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch, - pData->dims, - pData->D_volumeData, pData->volumePitch); - if (!ok) - return false; - - return true; -} - -int AstraFBP::calcFourierFilterSize(int _iDetectorCount) -{ - int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); - int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - - // CHECKME: Matlab makes this at least 64. Do we also need to? - return iFreqBinCount; -} - -bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) -{ - if(pData->m_pDevFilter != 0) - { - freeComplexOnDevice(pData->m_pDevFilter); - pData->m_pDevFilter = 0; - } - - if (_eFilter == FILTER_NONE) - return true; // leave pData->m_pDevFilter set to 0 - - - int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); - int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); - - cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount]; - memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount); - - allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter)); - - switch(_eFilter) - { - case FILTER_NONE: - // handled above - break; - case FILTER_RAMLAK: - case FILTER_SHEPPLOGAN: - case FILTER_COSINE: - case FILTER_HAMMING: - case FILTER_HANN: - case FILTER_TUKEY: - case FILTER_LANCZOS: - case FILTER_TRIANGULAR: - case FILTER_GAUSSIAN: - case FILTER_BARTLETTHANN: - case FILTER_BLACKMAN: - case FILTER_NUTTALL: - case FILTER_BLACKMANHARRIS: - case FILTER_BLACKMANNUTTALL: - case FILTER_FLATTOP: - case FILTER_PARZEN: - { - genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); - uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); - - break; - } - case FILTER_PROJECTION: - { - // make sure the offered filter has the correct size - assert(_iFilterWidth == iFreqBinCount); - - for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) - { - float fValue = _pfHostFilter[iFreqBinIndex]; - - for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) - { - pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; - pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; - } - } - uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); - break; - } - case FILTER_SINOGRAM: - { - // make sure the offered filter has the correct size - assert(_iFilterWidth == iFreqBinCount); - - for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) - { - for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) - { - float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; - - pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; - pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; - } - } - uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); - break; - } - case FILTER_RPROJECTION: - { - int iProjectionCount = pData->dims.iProjAngles; - int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; - float * pfHostRealFilter = new float[iRealFilterElementCount]; - memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - - int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); - int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; - int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - - int iFilterShiftSize = _iFilterWidth / 2; - - for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) - { - int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; - float fValue = _pfHostFilter[iDetectorIndex]; - - for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) - { - pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; - } - } - - float* pfDevRealFilter = NULL; - cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors - cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); - delete[] pfHostRealFilter; - - runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - - cudaFree(pfDevRealFilter); - - break; - } - case FILTER_RSINOGRAM: - { - int iProjectionCount = pData->dims.iProjAngles; - int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; - float* pfHostRealFilter = new float[iRealFilterElementCount]; - memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); - - int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); - int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; - int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; - - int iFilterShiftSize = _iFilterWidth / 2; - - for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) - { - int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; - - for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) - { - float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; - pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; - } - } - - float* pfDevRealFilter = NULL; - cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors - cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); - delete[] pfHostRealFilter; - - runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); - - cudaFree(pfDevRealFilter); - - break; - } - default: - { - ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested"); - delete [] pHostFilter; - return false; - } - } - - delete [] pHostFilter; - - return true; -} - BPalgo::BPalgo() { @@ -617,18 +110,17 @@ float BPalgo::computeDiffNorm() bool astraCudaFP(const float* pfVolume, float* pfSinogram, unsigned int iVolWidth, unsigned int iVolHeight, unsigned int iProjAngles, unsigned int iProjDets, - const float *pfAngles, const float *pfOffsets, - float fDetSize, unsigned int iDetSuperSampling, + const SParProjection *pAngles, + unsigned int iDetSuperSampling, float fOutputScale, int iGPUIndex) { SDimensions dims; - if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) + if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0) return false; dims.iProjAngles = iProjAngles; dims.iProjDets = iProjDets; - dims.fDetScale = fDetSize; if (iDetSuperSampling == 0) return false; @@ -678,7 +170,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram, } zeroProjectionData(D_sinoData, sinoPitch, dims); - ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, fOutputScale); + ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, fOutputScale); if (!ok) { cudaFree(D_volumeData); cudaFree(D_sinoData); @@ -713,7 +205,6 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, dims.iProjAngles = iProjAngles; dims.iProjDets = iProjDets; - dims.fDetScale = 1.0f; // TODO? if (iDetSuperSampling == 0) return false; @@ -789,118 +280,99 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, } -bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, - const CParallelProjectionGeometry2D* pProjGeom, - float*& detectorOffsets, float*& projectionAngles, - float& detSize, float& outputScale) +// adjust pProjs to normalize volume geometry +template<typename ProjectionT> +static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, + unsigned int iProjectionAngleCount, + ProjectionT*& pProjs, + float& fOutputScale) { - assert(pVolGeom); - assert(pProjGeom); - assert(pProjGeom->getProjectionAngles()); - + // TODO: Make EPS relative const float EPS = 0.00001f; - int nth = pProjGeom->getProjectionAngleCount(); - // Check if pixels are square if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS) return false; + float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; + float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; - // Scale volume pixels to 1x1 - detSize = pProjGeom->getDetectorWidth() / pVolGeom->getPixelLengthX(); + float factor = 1.0f / pVolGeom->getPixelLengthX(); - // Copy angles - float *angles = new float[nth]; - for (int i = 0; i < nth; ++i) - angles[i] = pProjGeom->getProjectionAngles()[i]; - projectionAngles = angles; - - // Check if we need to translate - bool offCenter = false; - if (abs(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) > EPS || - abs(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) > EPS) - { - offCenter = true; + for (int i = 0; i < iProjectionAngleCount; ++i) { + // CHECKME: Order of scaling and translation + pProjs[i].translate(dx, dy); + pProjs[i].scale(factor); } + // CHECKME: Check factor + fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY(); - // If there are existing detector offsets, or if we need to translate, - // we need to return offsets - if (offCenter) - { - float* offset = new float[nth]; + return true; +} - for (int i = 0; i < nth; ++i) - offset[i] = 0.0f; - if (offCenter) { - float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; - float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; - // CHECKME: Is d in pixels or in units? +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, + const CParallelProjectionGeometry2D* pProjGeom, + SParProjection*& pProjs, + float& fOutputScale) +{ + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionAngles()); - for (int i = 0; i < nth; ++i) { - float d = dx * cos(angles[i]) + dy * sin(angles[i]); - offset[i] += d; - } - } + int nth = pProjGeom->getProjectionAngleCount(); - // CHECKME: Order of scaling and translation + pProjs = genParProjections(nth, + pProjGeom->getDetectorCount(), + pProjGeom->getDetectorWidth(), + pProjGeom->getProjectionAngles(), 0); - // Scale volume pixels to 1x1 - for (int i = 0; i < nth; ++i) { - //offset[i] /= pVolGeom->getPixelLengthX(); - //offset[i] *= detSize; - } + bool ok; + fOutputScale = 1.0f; + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); - detectorOffsets = offset; - } else { - detectorOffsets = 0; + if (!ok) { + delete[] pProjs; + pProjs = 0; } - outputScale = pVolGeom->getPixelLengthX(); - outputScale *= outputScale; - - return true; + return ok; } -static void convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom, - unsigned int iProjectionAngleCount, - astraCUDA::SFanProjection*& pProjs, - float& outputScale) +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, + const CParallelVecProjectionGeometry2D* pProjGeom, + SParProjection*& pProjs, + float& fOutputScale) { - // Translate - float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2; - float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2; + assert(pVolGeom); + assert(pProjGeom); + assert(pProjGeom->getProjectionVectors()); - for (int i = 0; i < iProjectionAngleCount; ++i) { - pProjs[i].fSrcX -= dx; - pProjs[i].fSrcY -= dy; - pProjs[i].fDetSX -= dx; - pProjs[i].fDetSY -= dy; + int nth = pProjGeom->getProjectionAngleCount(); + + pProjs = new SParProjection[nth]; + + for (int i = 0; i < nth; ++i) { + pProjs[i] = pProjGeom->getProjectionVectors()[i]; } - // CHECKME: Order of scaling and translation + bool ok; + fOutputScale = 1.0f; - // Scale - float factor = 1.0f / pVolGeom->getPixelLengthX(); - for (int i = 0; i < iProjectionAngleCount; ++i) { - pProjs[i].fSrcX *= factor; - pProjs[i].fSrcY *= factor; - pProjs[i].fDetSX *= factor; - pProjs[i].fDetSY *= factor; - pProjs[i].fDetUX *= factor; - pProjs[i].fDetUY *= factor; + ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale); + if (!ok) { + delete[] pProjs; + pProjs = 0; } - // CHECKME: Check factor - outputScale = pVolGeom->getPixelLengthX(); -// outputScale *= outputScale; + return ok; } + bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, const CFanFlatProjectionGeometry2D* pProjGeom, astraCUDA::SFanProjection*& pProjs, @@ -910,6 +382,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, assert(pProjGeom); assert(pProjGeom->getProjectionAngles()); + // TODO: Make EPS relative const float EPS = 0.00001f; int nth = pProjGeom->getProjectionAngleCount(); @@ -928,23 +401,9 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, float fDetSize = pProjGeom->getDetectorWidth(); const float *pfAngles = pProjGeom->getProjectionAngles(); - pProjs = new SFanProjection[nth]; - - float fSrcX0 = 0.0f; - float fSrcY0 = -fOriginSourceDistance; - float fDetUX0 = fDetSize; - float fDetUY0 = 0.0f; - float fDetSX0 = pProjGeom->getDetectorCount() * fDetUX0 / -2.0f; - float fDetSY0 = fOriginDetectorDistance; - -#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) - for (int i = 0; i < nth; ++i) { - ROTATE0(Src, i, pfAngles[i]); - ROTATE0(DetS, i, pfAngles[i]); - ROTATE0(DetU, i, pfAngles[i]); - } - -#undef ROTATE0 + pProjs = genFanProjections(nth, pProjGeom->getDetectorCount(), + fOriginSourceDistance, fOriginDetectorDistance, + fDetSize, pfAngles); convertAstraGeometry_internal(pVolGeom, nth, pProjs, outputScale); @@ -961,6 +420,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, assert(pProjGeom); assert(pProjGeom->getProjectionVectors()); + // TODO: Make EPS relative const float EPS = 0.00001f; int nx = pVolGeom->getGridColCount(); @@ -983,6 +443,52 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, } +bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, + const CProjectionGeometry2D* pProjGeom, + astraCUDA::SParProjection*& pParProjs, + astraCUDA::SFanProjection*& pFanProjs, + float& outputScale) +{ + const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<const CParallelProjectionGeometry2D*>(pProjGeom); + const CParallelVecProjectionGeometry2D* parVecProjGeom = dynamic_cast<const CParallelVecProjectionGeometry2D*>(pProjGeom); + const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<const CFanFlatProjectionGeometry2D*>(pProjGeom); + const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<const CFanFlatVecProjectionGeometry2D*>(pProjGeom); + + bool ok = false; + + if (parProjGeom) { + ok = convertAstraGeometry(pVolGeom, parProjGeom, pParProjs, outputScale); + } else if (parVecProjGeom) { + ok = convertAstraGeometry(pVolGeom, parVecProjGeom, pParProjs, outputScale); + } else if (fanProjGeom) { + ok = convertAstraGeometry(pVolGeom, fanProjGeom, pFanProjs, outputScale); + } else if (fanVecProjGeom) { + ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, pFanProjs, outputScale); + } else { + ok = false; + } + + return ok; +} + +bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, + const CProjectionGeometry2D* pProjGeom, + SDimensions& dims) +{ + dims.iVolWidth = pVolGeom->getGridColCount(); + dims.iVolHeight = pVolGeom->getGridRowCount(); + + dims.iProjAngles = pProjGeom->getProjectionAngleCount(); + dims.iProjDets = pProjGeom->getDetectorCount(); + + dims.iRaysPerDet = 1; + dims.iRaysPerPixelDim = 1; + + return true; +} + + + } diff --git a/cuda/2d/astra.h b/cuda/2d/astra.h index 617883b..8e91feb 100644 --- a/cuda/2d/astra.h +++ b/cuda/2d/astra.h @@ -29,7 +29,6 @@ $Id$ #ifndef _CUDA_ASTRA_H #define _CUDA_ASTRA_H -#include "fft.h" #include "fbp_filters.h" #include "dims.h" #include "algo.h" @@ -43,140 +42,12 @@ enum Cuda2DProjectionKernel { }; class CParallelProjectionGeometry2D; +class CParallelVecProjectionGeometry2D; class CFanFlatProjectionGeometry2D; class CFanFlatVecProjectionGeometry2D; class CVolumeGeometry2D; +class CProjectionGeometry2D; -class AstraFBP_internal; - -class _AstraExport AstraFBP { -public: - // Constructor - AstraFBP(); - - // Destructor - ~AstraFBP(); - - // Set the size of the reconstruction rectangle. - // Volume pixels are currently assumed to be 1x1 squares. - bool setReconstructionGeometry(unsigned int iVolWidth, - unsigned int iVolHeight, - float fPixelSize = 1.0f); - - // Set the projection angles and number of detector pixels per angle. - // pfAngles must be a float array of length iProjAngles. - // fDetSize indicates the size of a detector pixel compared to a - // volume pixel edge. - // - // pfAngles will only be read from during this call. - bool setProjectionGeometry(unsigned int iProjAngles, - unsigned int iProjDets, - const float *pfAngles, - float fDetSize = 1.0f); - // Set the projection angles and number of detector pixels per angle. - // pfAngles must be a float array of length iProjAngles. - // fDetSize indicates the size of a detector pixel compared to a - // volume pixel edge. - // - // pfAngles, fanProjs will only be read from during this call. - bool setFanGeometry(unsigned int iProjAngles, - unsigned int iProjDets, - const astraCUDA::SFanProjection *fanProjs, - const float *pfAngles, - float fOriginSourceDistance, - float fOriginDetectorDistance, - float fDetSize = 1.0f, - bool bShortScan = false); - - // Set linear supersampling factor for the BP. - // (The number of rays is the square of this) - // - // This may optionally be called before init(). - bool setPixelSuperSampling(unsigned int iPixelSuperSampling); - - // Set per-detector shifts. - // - // pfTOffsets will only be read from during this call. - bool setTOffsets(const float *pfTOffsets); - - // Returns the required size of a filter in the fourier domain - // when multiplying it with the fft of the projection data. - // Its value is equal to the smallest power of two larger than - // or equal to twice the number of detectors in the spatial domain. - // - // _iDetectorCount is the number of detectors in the spatial domain. - static int calcFourierFilterSize(int _iDetectorCount); - - // Sets the filter type. Some filter types require the user to supply an - // array containing the filter. - // The number of elements in a filter in the fourier domain should be equal - // to the value returned by calcFourierFilterSize(). - // The following types require a filter: - // - // - FILTER_PROJECTION: - // The filter size should be equal to the output of - // calcFourierFilterSize(). The filtered sinogram is - // multiplied with the supplied filter. - // - // - FILTER_SINOGRAM: - // Same as FILTER_PROJECTION, but now the filter should contain a row for - // every projection direction. - // - // - FILTER_RPROJECTION: - // The filter should now contain one kernel (= ifft of filter), with the - // peak in the center. The filter width - // can be any value. If odd, the peak is assumed to be in the center, if - // even, it is assumed to be at floor(filter-width/2). - // - // - FILTER_RSINOGRAM - // Same as FILTER_RPROJECTION, but now the supplied filter should contain a - // row for every projection direction. - // - // A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, - // FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) - // have a D variable, which gives the cutoff point in the frequency domain. - // Setting this value to 1.0 will include the whole filter - bool setFilter(E_FBPFILTER _eFilter, - const float * _pfHostFilter = NULL, - int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); - - // Initialize CUDA, allocate GPU buffers and - // precompute geometry-specific data. - // - // CUDA is set up to use GPU number iGPUIndex. - // - // This must be called after calling setReconstructionGeometry() and - // setProjectionGeometry(). - bool init(int iGPUIndex = 0); - - // Setup input sinogram for a slice. - // pfSinogram must be a float array of size iProjAngles*iSinogramPitch. - // NB: iSinogramPitch is measured in floats, not in bytes. - // - // This must be called after init(), and before iterate(). It may be - // called again after iterate()/getReconstruction() to start a new slice. - // - // pfSinogram will only be read from during this call. - bool setSinogram(const float* pfSinogram, unsigned int iSinogramPitch); - - // Runs an FBP reconstruction. - // This must be called after setSinogram(). - // - // run can be called before setFilter, but will then use the default Ram-Lak filter - bool run(); - - // Get the reconstructed slice. - // pfReconstruction must be a float array of size - // iVolHeight*iReconstructionPitch. - // NB: iReconstructionPitch is measured in floats, not in bytes. - // - // This may be called after run(). - bool getReconstruction(float* pfReconstruction, - unsigned int iReconstructionPitch) const; - -private: - AstraFBP_internal* pData; -}; class _AstraExport BPalgo : public astraCUDA::ReconAlgo { public: @@ -199,8 +70,8 @@ public: _AstraExport bool astraCudaFP(const float* pfVolume, float* pfSinogram, unsigned int iVolWidth, unsigned int iVolHeight, unsigned int iProjAngles, unsigned int iProjDets, - const float *pfAngles, const float *pfOffsets, - float fDetSize = 1.0f, unsigned int iDetSuperSampling = 1, + const SParProjection *pAngles, + unsigned int iDetSuperSampling = 1, float fOutputScale = 1.0f, int iGPUIndex = 0); _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, @@ -213,8 +84,14 @@ _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, const CParallelProjectionGeometry2D* pProjGeom, - float*& pfDetectorOffsets, float*& pfProjectionAngles, - float& fDetSize, float& fOutputScale); + astraCUDA::SParProjection*& pProjs, + float& fOutputScale); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, + const CParallelVecProjectionGeometry2D* pProjGeom, + astraCUDA::SParProjection*& pProjs, + float& fOutputScale); + _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, const CFanFlatProjectionGeometry2D* pProjGeom, @@ -226,6 +103,15 @@ _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, astraCUDA::SFanProjection*& pProjs, float& outputScale); +_AstraExport bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom, + const CProjectionGeometry2D* pProjGeom, + astraCUDA::SDimensions& dims); + +_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom, + const CProjectionGeometry2D* pProjGeom, + astraCUDA::SParProjection*& pParProjs, + astraCUDA::SFanProjection*& pFanProjs, + float& outputScale); } #endif diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu index f402914..3f06581 100644 --- a/cuda/2d/cgls.cu +++ b/cuda/2d/cgls.cu @@ -207,42 +207,6 @@ float CGLS::computeDiffNorm() return sqrt(s); } -bool doCGLS(float* D_volumeData, unsigned int volumePitch, - float* D_sinoData, unsigned int sinoPitch, - const SDimensions& dims, /*const SAugmentedData& augs,*/ - const float* angles, const float* TOffsets, unsigned int iterations) -{ - CGLS cgls; - bool ok = true; - - ok &= cgls.setGeometry(dims, angles); -#if 0 - if (D_maskData) - ok &= cgls.enableVolumeMask(); -#endif - if (TOffsets) - ok &= cgls.setTOffsets(TOffsets); - - if (!ok) - return false; - - ok = cgls.init(); - if (!ok) - return false; - -#if 0 - if (D_maskData) - ok &= cgls.setVolumeMask(D_maskData, maskPitch); -#endif - - ok &= cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); - if (!ok) - return false; - - ok = cgls.iterate(iterations); - - return ok; -} } diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h index e870da5..f74d0e4 100644 --- a/cuda/2d/dims.h +++ b/cuda/2d/dims.h @@ -34,9 +34,11 @@ $Id$ namespace astraCUDA { +using astra::SParProjection; using astra::SFanProjection; + struct SDimensions { // Width, height of reconstruction volume unsigned int iVolWidth; @@ -48,9 +50,6 @@ struct SDimensions { // Number of detector pixels unsigned int iProjDets; - // size of detector compared to volume pixels - float fDetScale; - // in FP, number of rays to trace per detector pixel. // This should usually be set to 1. // If fDetScale > 1, this should be set to an integer of roughly diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu index 8593b08..47ec548 100644 --- a/cuda/2d/em.cu +++ b/cuda/2d/em.cu @@ -170,34 +170,6 @@ float EM::computeDiffNorm() } -bool doEM(float* D_volumeData, unsigned int volumePitch, - float* D_sinoData, unsigned int sinoPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, unsigned int iterations) -{ - EM em; - bool ok = true; - - ok &= em.setGeometry(dims, angles); - if (TOffsets) - ok &= em.setTOffsets(TOffsets); - - if (!ok) - return false; - - ok = em.init(); - if (!ok) - return false; - - ok &= em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); - if (!ok) - return false; - - ok = em.iterate(iterations); - - return ok; -} - } #ifdef STANDALONE diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu new file mode 100644 index 0000000..2feac7d --- /dev/null +++ b/cuda/2d/fbp.cu @@ -0,0 +1,347 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "fbp.h" +#include "fft.h" +#include "par_bp.h" +#include "fan_bp.h" + +// For fan-beam preweighting +#include "../3d/fdk.h" + +#include "astra/Logging.h" + +#include <cuda.h> + +namespace astraCUDA { + + + +static int calcNextPowerOfTwo(int n) +{ + int x = 1; + while (x < n) + x *= 2; + + return x; +} + +// static +int FBP::calcFourierFilterSize(int _iDetectorCount) +{ + int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); + int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + + // CHECKME: Matlab makes this at least 64. Do we also need to? + return iFreqBinCount; +} + + + + +FBP::FBP() : ReconAlgo() +{ + D_filter = 0; + +} + +FBP::~FBP() +{ + reset(); +} + +void FBP::reset() +{ + if (D_filter) { + freeComplexOnDevice((cufftComplex *)D_filter); + D_filter = 0; + } +} + +bool FBP::init() +{ + return true; +} + +bool FBP::setFilter(astra::E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) +{ + if (D_filter) + { + freeComplexOnDevice((cufftComplex*)D_filter); + D_filter = 0; + } + + if (_eFilter == astra::FILTER_NONE) + return true; // leave D_filter set to 0 + + + int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); + int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount); + + cufftComplex * pHostFilter = new cufftComplex[dims.iProjAngles * iFreqBinCount]; + memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iFreqBinCount); + + allocateComplexOnDevice(dims.iProjAngles, iFreqBinCount, (cufftComplex**)&D_filter); + + switch(_eFilter) + { + case astra::FILTER_NONE: + // handled above + break; + case astra::FILTER_RAMLAK: + case astra::FILTER_SHEPPLOGAN: + case astra::FILTER_COSINE: + case astra::FILTER_HAMMING: + case astra::FILTER_HANN: + case astra::FILTER_TUKEY: + case astra::FILTER_LANCZOS: + case astra::FILTER_TRIANGULAR: + case astra::FILTER_GAUSSIAN: + case astra::FILTER_BARTLETTHANN: + case astra::FILTER_BLACKMAN: + case astra::FILTER_NUTTALL: + case astra::FILTER_BLACKMANHARRIS: + case astra::FILTER_BLACKMANNUTTALL: + case astra::FILTER_FLATTOP: + case astra::FILTER_PARZEN: + { + genFilter(_eFilter, _fD, dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); + uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); + + break; + } + case astra::FILTER_PROJECTION: + { + // make sure the offered filter has the correct size + assert(_iFilterWidth == iFreqBinCount); + + for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) + { + float fValue = _pfHostFilter[iFreqBinIndex]; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) + { + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; + } + } + uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); + break; + } + case astra::FILTER_SINOGRAM: + { + // make sure the offered filter has the correct size + assert(_iFilterWidth == iFreqBinCount); + + for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) + { + for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) + { + float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; + + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; + } + } + uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter); + break; + } + case astra::FILTER_RPROJECTION: + { + int iProjectionCount = dims.iProjAngles; + int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; + float * pfHostRealFilter = new float[iRealFilterElementCount]; + memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + + int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); + int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; + int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + + int iFilterShiftSize = _iFilterWidth / 2; + + for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) + { + int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + float fValue = _pfHostFilter[iDetectorIndex]; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) + { + pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; + } + } + + float* pfDevRealFilter = NULL; + cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors + cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); + delete[] pfHostRealFilter; + + runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + + cudaFree(pfDevRealFilter); + + break; + } + case astra::FILTER_RSINOGRAM: + { + int iProjectionCount = dims.iProjAngles; + int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; + float* pfHostRealFilter = new float[iRealFilterElementCount]; + memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + + int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); + int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; + int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + + int iFilterShiftSize = _iFilterWidth / 2; + + for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) + { + int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++) + { + float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; + pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; + } + } + + float* pfDevRealFilter = NULL; + cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors + cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); + delete[] pfHostRealFilter; + + runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter); + + cudaFree(pfDevRealFilter); + + break; + } + default: + { + ASTRA_ERROR("FBP::setFilter: Unknown filter type requested"); + delete [] pHostFilter; + return false; + } + } + + delete [] pHostFilter; + + return true; +} + +bool FBP::iterate(unsigned int iterations) +{ + zeroVolumeData(D_volumeData, volumePitch, dims); + + bool ok = false; + + if (fanProjs) { + // Call FDK_PreWeight to handle fan beam geometry. We treat + // this as a cone beam setup of a single slice: + + // TODO: TOffsets affects this preweighting... + + // TODO: We take the fan parameters from the last projection here + // without checking if they're the same in all projections + + float *pfAngles = new float[dims.iProjAngles]; + + float fOriginSource, fOriginDetector, fDetSize, fOffset; + for (unsigned int i = 0; i < dims.iProjAngles; ++i) { + bool ok = astra::getFanParameters(fanProjs[i], dims.iProjDets, + pfAngles[i], + fOriginSource, fOriginDetector, + fDetSize, fOffset); + if (!ok) { + ASTRA_ERROR("FBP_CUDA: Failed to extract circular fan beam parameters from fan beam geometry"); + return false; + } + } + + // We create a fake cudaPitchedPtr + cudaPitchedPtr tmp; + tmp.ptr = D_sinoData; + tmp.pitch = sinoPitch * sizeof(float); + tmp.xsize = dims.iProjDets; + tmp.ysize = dims.iProjAngles; + // and a fake Dimensions3D + astraCUDA3d::SDimensions3D dims3d; + dims3d.iVolX = dims.iVolWidth; + dims3d.iVolY = dims.iVolHeight; + dims3d.iVolZ = 1; + dims3d.iProjAngles = dims.iProjAngles; + dims3d.iProjU = dims.iProjDets; + dims3d.iProjV = 1; + dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1; + + astraCUDA3d::FDK_PreWeight(tmp, fOriginSource, + fOriginDetector, 0.0f, 0.0f, + fDetSize, 1.0f, + m_bShortScan, dims3d, pfAngles); + } else { + // TODO: How should different detector pixel size in different + // projections be handled? + } + + if (D_filter) { + + int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets); + int iFFTFourDetCount = calcFFTFourierSize(iFFTRealDetCount); + + cufftComplex * pDevComplexSinogram = NULL; + + allocateComplexOnDevice(dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); + + runCudaFFT(dims.iProjAngles, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); + + applyFilter(dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, (cufftComplex*)D_filter); + + runCudaIFFT(dims.iProjAngles, pDevComplexSinogram, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); + + freeComplexOnDevice(pDevComplexSinogram); + + } + + float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles; + + if (fanProjs) { + ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale); + + } else { + ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale); + } + if(!ok) + { + return false; + } + + return true; +} + + +} diff --git a/cuda/2d/fbp.h b/cuda/2d/fbp.h new file mode 100644 index 0000000..8b4d64d --- /dev/null +++ b/cuda/2d/fbp.h @@ -0,0 +1,98 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "algo.h" +#include "fbp_filters.h" + +namespace astraCUDA { + +class _AstraExport FBP : public ReconAlgo { +public: + FBP(); + ~FBP(); + + virtual bool useSinogramMask() { return false; } + virtual bool useVolumeMask() { return false; } + + // Returns the required size of a filter in the fourier domain + // when multiplying it with the fft of the projection data. + // Its value is equal to the smallest power of two larger than + // or equal to twice the number of detectors in the spatial domain. + // + // _iDetectorCount is the number of detectors in the spatial domain. + static int calcFourierFilterSize(int _iDetectorCount); + + // Sets the filter type. Some filter types require the user to supply an + // array containing the filter. + // The number of elements in a filter in the fourier domain should be equal + // to the value returned by calcFourierFilterSize(). + // The following types require a filter: + // + // - FILTER_PROJECTION: + // The filter size should be equal to the output of + // calcFourierFilterSize(). The filtered sinogram is + // multiplied with the supplied filter. + // + // - FILTER_SINOGRAM: + // Same as FILTER_PROJECTION, but now the filter should contain a row for + // every projection direction. + // + // - FILTER_RPROJECTION: + // The filter should now contain one kernel (= ifft of filter), with the + // peak in the center. The filter width + // can be any value. If odd, the peak is assumed to be in the center, if + // even, it is assumed to be at floor(filter-width/2). + // + // - FILTER_RSINOGRAM + // Same as FILTER_RPROJECTION, but now the supplied filter should contain a + // row for every projection direction. + // + // A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN, + // FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN) + // have a D variable, which gives the cutoff point in the frequency domain. + // Setting this value to 1.0 will include the whole filter + bool setFilter(astra::E_FBPFILTER _eFilter, + const float * _pfHostFilter = NULL, + int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f); + + bool setShortScan(bool ss) { m_bShortScan = ss; return true; } + + virtual bool init(); + + virtual bool iterate(unsigned int iterations); + + virtual float computeDiffNorm() { return 0.0f; } // TODO + +protected: + void reset(); + + void* D_filter; // cufftComplex* + bool m_bShortScan; +}; + +} diff --git a/cuda/2d/fbp_filters.h b/cuda/2d/fbp_filters.h index b206a5d..71c7572 100644 --- a/cuda/2d/fbp_filters.h +++ b/cuda/2d/fbp_filters.h @@ -29,6 +29,8 @@ $Id$ #ifndef FBP_FILTERS_H #define FBP_FILTERS_H +namespace astra { + enum E_FBPFILTER { FILTER_NONE, //< no filter (regular BP) @@ -55,4 +57,6 @@ enum E_FBPFILTER FILTER_RSINOGRAM, //< sinogram filter in real space }; +} + #endif /* FBP_FILTERS_H */ diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu index 2d259a9..8a7cc10 100644 --- a/cuda/2d/fft.cu +++ b/cuda/2d/fft.cu @@ -64,6 +64,8 @@ using namespace astra; } } while (0) +namespace astraCUDA { + __global__ static void applyFilter_kernel(int _iProjectionCount, int _iFreqBinCount, cufftComplex * _pSinogram, @@ -276,11 +278,11 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, // Because the input is real, the Fourier transform is symmetric. // CUFFT only outputs the first half (ignoring the redundant second half), // and expects the same as input for the IFFT. -int calcFFTFourSize(int _iFFTRealSize) +int calcFFTFourierSize(int _iFFTRealSize) { - int iFFTFourSize = _iFFTRealSize / 2 + 1; + int iFFTFourierSize = _iFFTRealSize / 2 + 1; - return iFFTFourSize; + return iFFTFourierSize; } void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter, @@ -695,6 +697,10 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, delete[] pfW; } + +} + + #ifdef STANDALONE __global__ static void doubleFourierOutput_kernel(int _iProjectionCount, diff --git a/cuda/2d/fft.h b/cuda/2d/fft.h index e985fc6..b29f17a 100644 --- a/cuda/2d/fft.h +++ b/cuda/2d/fft.h @@ -34,6 +34,8 @@ $Id$ #include "fbp_filters.h" +namespace astraCUDA { + bool allocateComplexOnDevice(int _iProjectionCount, int _iDetectorCount, cufftComplex ** _ppDevComplex); @@ -57,13 +59,15 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex, void applyFilter(int _iProjectionCount, int _iFreqBinCount, cufftComplex * _pSinogram, cufftComplex * _pFilter); -int calcFFTFourSize(int _iFFTRealSize); +int calcFFTFourierSize(int _iFFTRealSize); -void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, +void genFilter(astra::E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount, float _fParameter = -1.0f); void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount); +} + #endif /* FFT_H */ diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu index d9f7325..cf0a684 100644 --- a/cuda/2d/par_bp.cu +++ b/cuda/2d/par_bp.cu @@ -53,8 +53,8 @@ const unsigned int g_blockSlices = 16; const unsigned int g_MaxAngles = 2560; -__constant__ float gC_angle_sin[g_MaxAngles]; -__constant__ float gC_angle_cos[g_MaxAngles]; +__constant__ float gC_angle_scaled_sin[g_MaxAngles]; +__constant__ float gC_angle_scaled_cos[g_MaxAngles]; __constant__ float gC_angle_offset[g_MaxAngles]; static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder) @@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi return true; } -__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -87,47 +87,30 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star if (X >= dims.iVolWidth || Y >= dims.iVolHeight) return; - const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; - const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; + const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); + const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ); float* volData = (float*)D_volData; float fVal = 0.0f; float fA = startAngle + 0.5f; - const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; - if (offsets) { - - for (int angle = startAngle; angle < endAngle; ++angle) - { - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - const float TOffset = gC_angle_offset[angle]; - - const float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; - fVal += tex2D(gT_projTexture, fT, fA); - fA += 1.0f; - } - - } else { - - for (int angle = startAngle; angle < endAngle; ++angle) - { - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - - const float fT = fT_base + fX * cos_theta - fY * sin_theta; - fVal += tex2D(gT_projTexture, fT, fA); - fA += 1.0f; - } + for (int angle = startAngle; angle < endAngle; ++angle) + { + const float scaled_cos_theta = gC_angle_scaled_cos[angle]; + const float scaled_sin_theta = gC_angle_scaled_sin[angle]; + const float TOffset = gC_angle_offset[angle]; + const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset; + fVal += tex2D(gT_projTexture, fT, fA); + fA += 1.0f; } volData[Y*volPitch+X] += fVal * fOutputScale; } // supersampling version -__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale) +__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale) { const int relX = threadIdx.x; const int relY = threadIdx.y; @@ -141,61 +124,35 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s if (X >= dims.iVolWidth || Y >= dims.iVolHeight) return; - const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; - const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale; + const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); + const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim); - const float fSubStep = 1.0f/(dims.iRaysPerPixelDim * dims.fDetScale); + const float fSubStep = 1.0f/(dims.iRaysPerPixelDim); // * dims.fDetScale); float* volData = (float*)D_volData; float fVal = 0.0f; float fA = startAngle + 0.5f; - const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim); - if (offsets) { - - for (int angle = startAngle; angle < endAngle; ++angle) - { - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; - const float TOffset = gC_angle_offset[angle]; - - float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset; - - for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { - float fTy = fT; - fT += fSubStep * cos_theta; - for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { - fVal += tex2D(gT_projTexture, fTy, fA); - fTy -= fSubStep * sin_theta; - } - } - fA += 1.0f; - } - - } else { + for (int angle = startAngle; angle < endAngle; ++angle) + { + const float cos_theta = gC_angle_scaled_cos[angle]; + const float sin_theta = gC_angle_scaled_sin[angle]; + const float TOffset = gC_angle_offset[angle]; - for (int angle = startAngle; angle < endAngle; ++angle) - { - const float cos_theta = gC_angle_cos[angle]; - const float sin_theta = gC_angle_sin[angle]; + float fT = fX * cos_theta - fY * sin_theta + TOffset; - float fT = fT_base + fX * cos_theta - fY * sin_theta; - - for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { - float fTy = fT; - fT += fSubStep * cos_theta; - for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { - fVal += tex2D(gT_projTexture, fTy, fA); - fTy -= fSubStep * sin_theta; - } + for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) { + float fTy = fT; + fT += fSubStep * cos_theta; + for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) { + fVal += tex2D(gT_projTexture, fTy, fA); + fTy -= fSubStep * sin_theta; } - fA += 1.0f; - } - + fA += 1.0f; } volData[Y*volPitch+X] += fVal * fOutputScale; @@ -212,12 +169,10 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset if (X >= dims.iVolWidth || Y >= dims.iVolHeight) return; - const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale; - const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale; - - const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f; + const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ); + const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ); - const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset; + const float fT = fX * angle_cos - fY * angle_sin + offset; const float fVal = tex2D(gT_projTexture, fT, 0.5f); D_volData[Y*volPitch+X] += fVal * fOutputScale; @@ -226,32 +181,35 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset bool BP_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) + const SDimensions& dims, const SParProjection* angles, + float fOutputScale) { - // TODO: process angles block by block assert(dims.iProjAngles <= g_MaxAngles); - float* angle_sin = new float[dims.iProjAngles]; - float* angle_cos = new float[dims.iProjAngles]; + float* angle_scaled_sin = new float[dims.iProjAngles]; + float* angle_scaled_cos = new float[dims.iProjAngles]; + float* angle_offset = new float[dims.iProjAngles]; bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles); for (unsigned int i = 0; i < dims.iProjAngles; ++i) { - angle_sin[i] = sinf(angles[i]); - angle_cos[i] = cosf(angles[i]); + double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX; + angle_scaled_cos[i] = angles[i].fRayY / d; + angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs + angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d; } - cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + + cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); + cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); assert(e1 == cudaSuccess); assert(e2 == cudaSuccess); + assert(e3 == cudaSuccess); - if (TOffsets) { - cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - assert(e3 == cudaSuccess); - } - delete[] angle_sin; - delete[] angle_cos; + delete[] angle_scaled_sin; + delete[] angle_scaled_cos; + delete[] angle_offset; dim3 dimBlock(g_blockSlices, g_blockSliceSize); dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, @@ -263,9 +221,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) { if (dims.iRaysPerPixelDim > 1) - devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); + devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); else - devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale); + devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale); } cudaThreadSynchronize(); @@ -278,7 +236,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch, bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale) + const SDimensions& dims, const SParProjection* angles, float fOutputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -290,9 +248,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch, bool ret; ret = BP_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, - subdims, angles + iAngle, - TOffsets ? TOffsets + iAngle : 0, - fOutputScale); + subdims, angles + iAngle, fOutputScale); if (!ret) return false; } @@ -303,25 +259,23 @@ bool BP(float* D_volumeData, unsigned int volumePitch, bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets, float fOutputScale) + const SParProjection* angles, float fOutputScale) { // Only one angle. // We need to Clamp to the border pixels instead of to zero, because // SART weights with ray length. bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp); - float angle_sin = sinf(angles[angle]); - float angle_cos = cosf(angles[angle]); - - float offset = 0.0f; - if (TOffsets) - offset = TOffsets[angle]; + double d = angles[angle].fDetUX * angles[angle].fRayY - angles[angle].fDetUY * angles[angle].fRayX; + float angle_scaled_cos = angles[angle].fRayY / d; + float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs + float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d; dim3 dimBlock(g_blockSlices, g_blockSliceSize); dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices, (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize); - devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale); + devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale); cudaThreadSynchronize(); cudaTextForceKernelsCompletion(); diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h index 64bcd34..102cb2b 100644 --- a/cuda/2d/par_bp.h +++ b/cuda/2d/par_bp.h @@ -35,13 +35,13 @@ namespace astraCUDA { _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, float fOutputScale); + const SDimensions& dims, const SParProjection* angles, + float fOutputScale); _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, const SDimensions& dims, - const float* angles, const float* TOffsets, float fOutputScale); + const SParProjection* angles, float fOutputScale); } diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu index bb8b909..3c010b3 100644 --- a/cuda/2d/par_fp.cu +++ b/cuda/2d/par_fp.cu @@ -30,6 +30,7 @@ $Id$ #include <cassert> #include <iostream> #include <list> +#include <cmath> #include "util.h" #include "arith.h" @@ -51,6 +52,7 @@ namespace astraCUDA { static const unsigned g_MaxAngles = 2560; __constant__ float gC_angle[g_MaxAngles]; __constant__ float gC_angle_offset[g_MaxAngles]; +__constant__ float gC_angle_detsize[g_MaxAngles]; // optimization parameters @@ -63,10 +65,6 @@ static const unsigned int g_blockSlices = 64; #define iPREC_FACTOR 16 -// if necessary, a buffer of zeroes of size g_MaxAngles -static float* g_pfZeroes = 0; - - static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned int pitch, unsigned int width, unsigned int height) { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>(); @@ -89,9 +87,10 @@ static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned i return true; } + // projection for angles that are roughly horizontal -// theta between 45 and 135 degrees -__global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly vertical) +__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) { const int relDet = threadIdx.x; const int relAngle = threadIdx.y; @@ -106,33 +105,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned const float sin_theta = __sinf(theta); // compute start detector for this block/angle: - // (The same for all threadIdx.x) - // ------------------------------------- - - const int midSlice = startSlice + g_blockSlices / 2; - - // ASSUMPTION: fDetScale >= 1.0f - // problem: detector regions get skipped because slice blocks aren't large - // enough - const unsigned int g_blockSliceSize = g_detBlockSize; - - // project (midSlice,midRegion) on this thread's detector - - const float fBase = 0.5f*dims.iProjDets - 0.5f + - ( - (midSlice - 0.5f*dims.iVolWidth + 0.5f) * cos_theta - - (g_blockSliceSize/2 - 0.5f*dims.iVolHeight + 0.5f) * sin_theta - + gC_angle_offset[angle] - ) / dims.fDetScale; - int iBase = (int)floorf(fBase * fPREC_FACTOR); - int iInc = (int)floorf(g_blockSliceSize * sin_theta / dims.fDetScale * -fPREC_FACTOR); - - // ASSUMPTION: 16 > regionOffset / fDetScale - const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - const int detPrevRegion = (iBase + (blockIdx.y - regionOffset - 1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - - if (blockIdx.y > 0 && detRegion == detPrevRegion) - return; + const int detRegion = blockIdx.y; const int detector = detRegion * g_detBlockSize + relDet; @@ -142,7 +115,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned if (detector < 0 || detector >= dims.iProjDets) return; - const float fDetStep = -dims.fDetScale / sin_theta; + const float fDetStep = -gC_angle_detsize[angle] / sin_theta; float fSliceStep = cos_theta / sin_theta; float fDistCorr; if (sin_theta > 0.0f) @@ -192,9 +165,10 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned D_projData[angle*projPitch+detector] += fVal * fDistCorr; } + // projection for angles that are roughly vertical -// theta between 0 and 45, or 135 and 180 degrees -__global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale) +// (detector roughly horizontal) +__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) { const int relDet = threadIdx.x; const int relAngle = threadIdx.y; @@ -209,33 +183,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i const float sin_theta = __sinf(theta); // compute start detector for this block/angle: - // (The same for all threadIdx.x) - // ------------------------------------- - - const int midSlice = startSlice + g_blockSlices / 2; - - // project (midSlice,midRegion) on this thread's detector - - // ASSUMPTION: fDetScale >= 1.0f - // problem: detector regions get skipped because slice blocks aren't large - // enough - const unsigned int g_blockSliceSize = g_detBlockSize; - - const float fBase = 0.5f*dims.iProjDets - 0.5f + - ( - (g_blockSliceSize/2 - 0.5f*dims.iVolWidth + 0.5f) * cos_theta - - (midSlice - 0.5f*dims.iVolHeight + 0.5f) * sin_theta - + gC_angle_offset[angle] - ) / dims.fDetScale; - int iBase = (int)floorf(fBase * fPREC_FACTOR); - int iInc = (int)floorf(g_blockSliceSize * cos_theta / dims.fDetScale * fPREC_FACTOR); - - // ASSUMPTION: 16 > regionOffset / fDetScale - const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - const int detPrevRegion = (iBase + (blockIdx.y - regionOffset-1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16; - - if (blockIdx.y > 0 && detRegion == detPrevRegion) - return; + const int detRegion = blockIdx.y; const int detector = detRegion * g_detBlockSize + relDet; @@ -245,7 +193,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i if (detector < 0 || detector >= dims.iProjDets) return; - const float fDetStep = dims.fDetScale / cos_theta; + const float fDetStep = gC_angle_detsize[angle] / cos_theta; float fSliceStep = sin_theta / cos_theta; float fDistCorr; if (cos_theta < 0.0f) @@ -293,183 +241,67 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i D_projData[angle*projPitch+detector] += fVal * fDistCorr; } -// projection for angles that are roughly horizontal -// (detector roughly vertical) -__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) -{ - const int relDet = threadIdx.x; - const int relAngle = threadIdx.y; - - int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - - if (angle >= endAngle) - return; - - const float theta = gC_angle[angle]; - const float cos_theta = __cosf(theta); - const float sin_theta = __sinf(theta); - - // compute start detector for this block/angle: - const int detRegion = blockIdx.y; - - const int detector = detRegion * g_detBlockSize + relDet; - - // Now project the part of the ray to angle,detector through - // slices startSlice to startSlice+g_blockSlices-1 - - if (detector < 0 || detector >= dims.iProjDets) - return; - - const float fDetStep = -dims.fDetScale / sin_theta; - float fSliceStep = cos_theta / sin_theta; - float fDistCorr; - if (sin_theta > 0.0f) - fDistCorr = -fDetStep; - else - fDistCorr = fDetStep; - fDistCorr *= outputScale; - - float fVal = 0.0f; - // project detector on slice - float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolWidth + 0.5f) * fSliceStep + 0.5f*dims.iVolHeight - 0.5f + 0.5f; - float fS = startSlice + 0.5f; - int endSlice = startSlice + g_blockSlices; - if (endSlice > dims.iVolWidth) - endSlice = dims.iVolWidth; - if (dims.iRaysPerDet > 1) { - fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; - const float fSubDetStep = fDetStep / dims.iRaysPerDet; - fDistCorr /= dims.iRaysPerDet; - fSliceStep -= dims.iRaysPerDet * fSubDetStep; +// Coordinates of center of detector pixel number t: +// x = (t - 0.5*nDets + 0.5 - fOffset) * fSize * cos(fAngle) +// y = - (t - 0.5*nDets + 0.5 - fOffset) * fSize * sin(fAngle) - for (int slice = startSlice; slice < endSlice; ++slice) - { - for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { - fVal += tex2D(gT_volumeTexture, fS, fP); - fP += fSubDetStep; - } - fP += fSliceStep; - fS += 1.0f; - } - - } else { - for (int slice = startSlice; slice < endSlice; ++slice) - { - fVal += tex2D(gT_volumeTexture, fS, fP); - fP += fSliceStep; - fS += 1.0f; - } - - - } - - D_projData[angle*projPitch+detector] += fVal * fDistCorr; -} - - -// projection for angles that are roughly vertical -// (detector roughly horizontal) -__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale) +static void convertAndUploadAngles(const SParProjection *projs, unsigned int nth, unsigned int ndets) { - const int relDet = threadIdx.x; - const int relAngle = threadIdx.y; - - int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle; - - if (angle >= endAngle) - return; - - const float theta = gC_angle[angle]; - const float cos_theta = __cosf(theta); - const float sin_theta = __sinf(theta); - - // compute start detector for this block/angle: - const int detRegion = blockIdx.y; - - const int detector = detRegion * g_detBlockSize + relDet; + float *angles = new float[nth]; + float *offset = new float[nth]; + float *detsizes = new float[nth]; - // Now project the part of the ray to angle,detector through - // slices startSlice to startSlice+g_blockSlices-1 + for (int i = 0; i < nth; ++i) { - if (detector < 0 || detector >= dims.iProjDets) - return; +#warning TODO: Replace by getParParamaters - const float fDetStep = dims.fDetScale / cos_theta; - float fSliceStep = sin_theta / cos_theta; - float fDistCorr; - if (cos_theta < 0.0f) - fDistCorr = -fDetStep; - else - fDistCorr = fDetStep; - fDistCorr *= outputScale; + // Take part of DetU orthogonal to Ray + double ux = projs[i].fDetUX; + double uy = projs[i].fDetUY; - float fVal = 0.0f; - float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f; - float fS = startSlice+0.5f; - int endSlice = startSlice + g_blockSlices; - if (endSlice > dims.iVolHeight) - endSlice = dims.iVolHeight; + double t = (ux * projs[i].fRayX + uy * projs[i].fRayY) / (projs[i].fRayX * projs[i].fRayX + projs[i].fRayY * projs[i].fRayY); - if (dims.iRaysPerDet > 1) { + ux -= t * projs[i].fRayX; + uy -= t * projs[i].fRayY; - fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep; - const float fSubDetStep = fDetStep / dims.iRaysPerDet; - fDistCorr /= dims.iRaysPerDet; + double angle = atan2(uy, ux); - fSliceStep -= dims.iRaysPerDet * fSubDetStep; + angles[i] = angle; - for (int slice = startSlice; slice < endSlice; ++slice) - { - for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) { - fVal += tex2D(gT_volumeTexture, fP, fS); - fP += fSubDetStep; - } - fP += fSliceStep; - fS += 1.0f; - } + double norm2 = uy * uy + ux * ux; - } else { - - for (int slice = startSlice; slice < endSlice; ++slice) - { - fVal += tex2D(gT_volumeTexture, fP, fS); - fP += fSliceStep; - fS += 1.0f; - } + detsizes[i] = sqrt(norm2); + // CHECKME: SIGNS? + offset[i] = -0.5*ndets - (projs[i].fDetSY*uy + projs[i].fDetSX*ux) / norm2; } - D_projData[angle*projPitch+detector] += fVal * fDistCorr; + cudaMemcpyToSymbol(gC_angle, angles, nth*sizeof(float), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(gC_angle_offset, offset, nth*sizeof(float), 0, cudaMemcpyHostToDevice); + cudaMemcpyToSymbol(gC_angle_detsize, detsizes, nth*sizeof(float), 0, cudaMemcpyHostToDevice); } bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, float outputScale) + const SDimensions& dims, const SParProjection* angles, + float outputScale) { - // TODO: load angles into constant memory in smaller blocks assert(dims.iProjAngles <= g_MaxAngles); + assert(angles); + cudaArray* D_dataArray; bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); - cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - if (TOffsets) { - cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - } else { - if (!g_pfZeroes) { - g_pfZeroes = new float[g_MaxAngles]; - memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); - } - cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - } + convertAndUploadAngles(angles, dims.iProjAngles, dims.iProjDets); + dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // detector block size, angles @@ -492,7 +324,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, // Maybe we should detect corner cases and put them in the optimal // group of angles. if (a != dims.iProjAngles) - vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); + vertical = (fabsf(angles[a].fRayX) <= fabsf(angles[a].fRayY)); if (a == dims.iProjAngles || vertical != blockVertical) { // block done @@ -536,8 +368,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch, bool FP_simple(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, float outputScale) + const SDimensions& dims, const SParProjection* angles, + float outputScale) { for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) { SDimensions subdims = dims; @@ -550,7 +382,7 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch, ret = FP_simple_internal(D_volumeData, volumePitch, D_projData + iAngle * projPitch, projPitch, subdims, angles + iAngle, - TOffsets ? TOffsets + iAngle : 0, outputScale); + outputScale); if (!ret) return false; } @@ -559,106 +391,12 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch, bool FP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, float outputScale) + const SDimensions& dims, const SParProjection* angles, + float outputScale) { return FP_simple(D_volumeData, volumePitch, D_projData, projPitch, - dims, angles, TOffsets, outputScale); - - // TODO: Fix bug in this non-simple FP with large detscale and TOffsets -#if 0 - - // TODO: load angles into constant memory in smaller blocks - assert(dims.iProjAngles <= g_MaxAngles); - - // TODO: compute region size dynamically to resolve these two assumptions - // ASSUMPTION: 16 > regionOffset / fDetScale - const unsigned int g_blockSliceSize = g_detBlockSize; - assert(16 > (g_blockSlices / g_blockSliceSize) / dims.fDetScale); - // ASSUMPTION: fDetScale >= 1.0f - assert(dims.fDetScale > 0.9999f); - - cudaArray* D_dataArray; - bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight); - - cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - - if (TOffsets) { - cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - } else { - if (!g_pfZeroes) { - g_pfZeroes = new float[g_MaxAngles]; - memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float)); - } - cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice); - } - - int regionOffset = g_blockSlices / g_blockSliceSize; - - dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // region size, angles - - std::list<cudaStream_t> streams; - + dims, angles, outputScale); - // Run over all angles, grouping them into groups of the same - // orientation (roughly horizontal vs. roughly vertical). - // Start a stream of grids for each such group. - - // TODO: Check if it's worth it to store this info instead - // of recomputing it every FP. - - unsigned int blockStart = 0; - unsigned int blockEnd = 0; - bool blockVertical = false; - for (unsigned int a = 0; a <= dims.iProjAngles; ++a) { - bool vertical; - // TODO: Having <= instead of < below causes a 5% speedup. - // Maybe we should detect corner cases and put them in the optimal - // group of angles. - if (a != dims.iProjAngles) - vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a]))); - if (a == dims.iProjAngles || vertical != blockVertical) { - // block done - - blockEnd = a; - if (blockStart != blockEnd) { - unsigned int length = dims.iVolHeight; - if (blockVertical) - length = dims.iVolWidth; - dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock, - (length+g_blockSliceSize-1)/g_blockSliceSize+2*regionOffset); // angle blocks, regions - // TODO: check if we can't immediately - // destroy the stream after use - cudaStream_t stream; - cudaStreamCreate(&stream); - streams.push_back(stream); - //printf("angle block: %d to %d, %d\n", blockStart, blockEnd, blockVertical); - if (!blockVertical) - for (unsigned int i = 0; i < dims.iVolWidth; i += g_blockSlices) - FPhorizontal<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); - else - for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices) - FPvertical<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale); - } - blockVertical = vertical; - blockStart = a; - } - } - - for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter) - cudaStreamDestroy(*iter); - - streams.clear(); - - cudaThreadSynchronize(); - - cudaTextForceKernelsCompletion(); - - cudaFreeArray(D_dataArray); - - - return true; -#endif } diff --git a/cuda/2d/par_fp.h b/cuda/2d/par_fp.h index 010d064..5fd593c 100644 --- a/cuda/2d/par_fp.h +++ b/cuda/2d/par_fp.h @@ -33,8 +33,8 @@ namespace astraCUDA { _AstraExport bool FP(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, float fOutputScale); + const SDimensions& dims, const SParProjection* angles, + float fOutputScale); } diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu index c8608a3..a1085cd 100644 --- a/cuda/2d/sart.cu +++ b/cuda/2d/sart.cu @@ -254,10 +254,10 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch, { SDimensions d = dims; d.iProjAngles = 1; - if (angles) { + if (parProjs) { assert(!fanProjs); return FP(D_volumeData, volumePitch, D_projData, projPitch, - d, &angles[angle], TOffsets, outputScale); + d, &parProjs[angle], outputScale); } else { assert(fanProjs); return FanFP(D_volumeData, volumePitch, D_projData, projPitch, @@ -269,10 +269,10 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch, float* D_projData, unsigned int projPitch, unsigned int angle, float outputScale) { - if (angles) { + if (parProjs) { assert(!fanProjs); return BP_SART(D_volumeData, volumePitch, D_projData, projPitch, - angle, dims, angles, TOffsets, outputScale); + angle, dims, parProjs, outputScale); } else { assert(fanProjs); return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch, diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu index 4baaccb..9dc4f64 100644 --- a/cuda/2d/sirt.cu +++ b/cuda/2d/sirt.cu @@ -152,7 +152,7 @@ bool SIRT::precomputeWeights() bool SIRT::doSlabCorrections() { // This function compensates for effectively infinitely large slab-like - // objects of finite thickness 1. + // objects of finite thickness 1 in a parallel beam geometry. // Each ray through the object has an intersection of length d/cos(alpha). // The length of the ray actually intersecting the reconstruction volume is @@ -170,6 +170,10 @@ bool SIRT::doSlabCorrections() if (useVolumeMask || useSinogramMask) return false; + // Parallel-beam only + if (!parProjs) + return false; + // multiply by line weights processSino<opDiv>(D_sinoData, D_lineWeight, projPitch, dims); @@ -181,7 +185,10 @@ bool SIRT::doSlabCorrections() float bound = cosf(1.3963f); float* t = (float*)D_sinoData; for (int i = 0; i < dims.iProjAngles; ++i) { - float f = fabs(cosf(angles[i])); + // TODO: Checkme + // TODO: Replace by getParParameters + double angle = atan2(parProjs[i].fRayX, -parProjs[i].fRayY); + float f = fabs(cosf(angle)); if (f < bound) f = bound; @@ -189,7 +196,6 @@ bool SIRT::doSlabCorrections() processSino<opMul>(t, f, sinoPitch, subdims); t += sinoPitch; } - return true; } @@ -299,40 +305,6 @@ float SIRT::computeDiffNorm() } -bool doSIRT(float* D_volumeData, unsigned int volumePitch, - float* D_sinoData, unsigned int sinoPitch, - float* D_maskData, unsigned int maskPitch, - const SDimensions& dims, const float* angles, - const float* TOffsets, unsigned int iterations) -{ - SIRT sirt; - bool ok = true; - - ok &= sirt.setGeometry(dims, angles); - if (D_maskData) - ok &= sirt.enableVolumeMask(); - if (TOffsets) - ok &= sirt.setTOffsets(TOffsets); - - if (!ok) - return false; - - ok = sirt.init(); - if (!ok) - return false; - - if (D_maskData) - ok &= sirt.setVolumeMask(D_maskData, maskPitch); - - ok &= sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch); - if (!ok) - return false; - - ok = sirt.iterate(iterations); - - return ok; -} - } #ifdef STANDALONE diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu index 0e13be1..e7418a2 100644 --- a/cuda/3d/fdk.cu +++ b/cuda/3d/fdk.cu @@ -353,7 +353,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData, // The filtering is a regular ramp filter per detector line. int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); - int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount); int projPitch = D_projData.pitch/sizeof(float); @@ -361,22 +361,22 @@ bool FDK_Filter(cudaPitchedPtr D_projData, float* D_sinoData = (float*)D_projData.ptr; cufftComplex * D_sinoFFT = NULL; - allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT); + astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT); bool ok = true; for (int v = 0; v < dims.iProjV; ++v) { - ok = runCudaFFT(dims.iProjAngles, D_sinoData, projPitch, + ok = astraCUDA::runCudaFFT(dims.iProjAngles, D_sinoData, projPitch, dims.iProjU, iPaddedDetCount, iHalfFFTSize, D_sinoFFT); if (!ok) break; - applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter); + astraCUDA::applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter); - ok = runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch, + ok = astraCUDA::runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch, dims.iProjU, iPaddedDetCount, iHalfFFTSize); if (!ok) break; @@ -384,7 +384,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData, D_sinoData += (dims.iProjAngles * projPitch); } - freeComplexOnDevice(D_sinoFFT); + astraCUDA::freeComplexOnDevice(D_sinoFFT); return ok; } @@ -401,7 +401,7 @@ bool FDK(cudaPitchedPtr D_volumeData, // TODO: Check errors cufftComplex * D_filter; int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU); - int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount); + int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount); ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin, fSrcZ, fDetZ, fDetUSize, fDetVSize, @@ -412,11 +412,11 @@ bool FDK(cudaPitchedPtr D_volumeData, cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize]; memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize); - genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize); + astraCUDA::genFilter(astra::FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize); - allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter); - uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter); + astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter); + astraCUDA::uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter); delete [] pHostFilter; @@ -430,7 +430,7 @@ bool FDK(cudaPitchedPtr D_volumeData, bShortScan, dims, angles); // Clean up filter - freeComplexOnDevice(D_filter); + astraCUDA::freeComplexOnDevice(D_filter); if (!ok) diff --git a/include/astra/CudaFilteredBackProjectionAlgorithm.h b/include/astra/CudaFilteredBackProjectionAlgorithm.h index cf1f19f..180e001 100644 --- a/include/astra/CudaFilteredBackProjectionAlgorithm.h +++ b/include/astra/CudaFilteredBackProjectionAlgorithm.h @@ -26,28 +26,24 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. $Id$ */ -#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM2_H -#define CUDAFILTEREDBACKPROJECTIONALGORITHM2_H +#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM_H +#define CUDAFILTEREDBACKPROJECTIONALGORITHM_H #include <astra/Float32ProjectionData2D.h> #include <astra/Float32VolumeData2D.h> -#include <astra/ReconstructionAlgorithm2D.h> +#include <astra/CudaReconstructionAlgorithm2D.h> #include "../../cuda/2d/astra.h" namespace astra { -class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CReconstructionAlgorithm2D +class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CCudaReconstructionAlgorithm2D { public: static std::string type; private: - CFloat32ProjectionData2D * m_pSinogram; - CFloat32VolumeData2D * m_pReconstruction; - int m_iGPUIndex; - int m_iPixelSuperSampling; E_FBPFILTER m_eFilter; float * m_pfFilter; int m_iFilterWidth; // number of elements per projection direction in filter @@ -64,15 +60,6 @@ public: virtual bool initialize(const Config& _cfg); bool initialize(CFloat32ProjectionData2D * _pSinogram, CFloat32VolumeData2D * _pReconstruction, E_FBPFILTER _eFilter, const float * _pfFilter = NULL, int _iFilterWidth = 0, int _iGPUIndex = -1, float _fFilterParameter = -1.0f); - virtual void run(int _iNrIterations = 0); - - static int calcIdealRealFilterWidth(int _iDetectorCount); - static int calcIdealFourierFilterWidth(int _iDetectorCount); - - //debug - static void testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount); - static int getGPUCount(); - /** Get a description of the class. * * @return description string @@ -82,12 +69,7 @@ public: protected: bool check(); - AstraFBP* m_pFBP; - - bool m_bAstraFBPInit; - - void initializeFromProjector(); - virtual bool requiresProjector() const { return false; } + virtual void initCUDAAlgorithm(); }; // inline functions @@ -95,4 +77,4 @@ inline std::string CCudaFilteredBackProjectionAlgorithm::description() const { r } -#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM2_H */ +#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM_H */ diff --git a/include/astra/GeometryUtil2D.h b/include/astra/GeometryUtil2D.h index 680cecd..2f03062 100644 --- a/include/astra/GeometryUtil2D.h +++ b/include/astra/GeometryUtil2D.h @@ -32,27 +32,72 @@ $Id$ namespace astra { struct SParProjection { - // the ray direction - float fRayX, fRayY; + // the ray direction + float fRayX, fRayY; + + // the start of the (linear) detector + float fDetSX, fDetSY; + + // the length of a single detector pixel + float fDetUX, fDetUY; + + + void translate(double dx, double dy) { + fDetSX += dx; + fDetSY += dy; + } + void scale(double factor) { + fRayX *= factor; + fRayY *= factor; + fDetSX *= factor; + fDetSY *= factor; + fDetUX *= factor; + fDetUY *= factor; + } +}; - // the start of the (linear) detector - float fDetSX, fDetSY; - // the length of a single detector pixel - float fDetUX, fDetUY; +struct SFanProjection { + // the source + float fSrcX, fSrcY; + + // the start of the (linear) detector + float fDetSX, fDetSY; + + // the length of a single detector pixel + float fDetUX, fDetUY; + + void translate(double dx, double dy) { + fSrcX += dx; + fSrcY += dy; + fDetSX += dx; + fDetSY += dy; + } + void scale(double factor) { + fSrcX *= factor; + fSrcY *= factor; + fDetSX *= factor; + fDetSY *= factor; + fDetUX *= factor; + fDetUY *= factor; + } }; -struct SFanProjection { - // the source - float fSrcX, fSrcY; - // the start of the (linear) detector - float fDetSX, fDetSY; +SParProjection* genParProjections(unsigned int iProjAngles, + unsigned int iProjDets, + double fDetSize, + const float *pfAngles, + const float *pfExtraOffsets); - // the length of a single detector pixel - float fDetUX, fDetUY; -}; +SFanProjection* genFanProjections(unsigned int iProjAngles, + unsigned int iProjDets, + double fOriginSource, double fOriginDetector, + double fDetSize, + const float *pfAngles); + +bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset); } diff --git a/include/astra/ParallelProjectionGeometry2D.h b/include/astra/ParallelProjectionGeometry2D.h index 36b4b6f..ca67ba7 100644 --- a/include/astra/ParallelProjectionGeometry2D.h +++ b/include/astra/ParallelProjectionGeometry2D.h @@ -84,8 +84,7 @@ public: CParallelProjectionGeometry2D(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets = 0); + const float32* _pfProjectionAngles); /** Copy constructor. */ @@ -117,8 +116,7 @@ public: bool initialize(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets = 0); + const float32* _pfProjectionAngles); /** Create a hard copy. */ diff --git a/include/astra/ProjectionGeometry2D.h b/include/astra/ProjectionGeometry2D.h index d26e6a7..b656d97 100644 --- a/include/astra/ProjectionGeometry2D.h +++ b/include/astra/ProjectionGeometry2D.h @@ -90,8 +90,7 @@ protected: CProjectionGeometry2D(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets = 0); + const float32* _pfProjectionAngles); /** Copy constructor. */ @@ -117,8 +116,7 @@ protected: bool _initialize(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets = 0); + const float32* _pfProjectionAngles); public: diff --git a/samples/matlab/s014_FBP.m b/samples/matlab/s014_FBP.m index b73149c..faf91fb 100644 --- a/samples/matlab/s014_FBP.m +++ b/samples/matlab/s014_FBP.m @@ -9,7 +9,7 @@ % ----------------------------------------------------------------------- vol_geom = astra_create_vol_geom(256, 256); -proj_geom = astra_create_proj_geom('parallel', 1.0, 384, linspace2(0,pi,180)); +proj_geom = astra_create_proj_geom('fanflat', 1.0, 384, linspace2(0,2*pi,1800), 500, 0); % As before, create a sinogram from a phantom P = phantom(256); diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp index aa97eec..9d23253 100644 --- a/src/CudaFilteredBackProjectionAlgorithm.cpp +++ b/src/CudaFilteredBackProjectionAlgorithm.cpp @@ -33,6 +33,7 @@ $Id$ #include "astra/AstraObjectManager.h" #include "astra/CudaProjector2D.h" #include "../cuda/2d/astra.h" +#include "../cuda/2d/fbp.h" #include "astra/Logging.h" @@ -44,8 +45,7 @@ string CCudaFilteredBackProjectionAlgorithm::type = "FBP_CUDA"; CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm() { m_bIsInitialized = false; - CReconstructionAlgorithm2D::_clear(); - m_pFBP = 0; + CCudaReconstructionAlgorithm2D::_clear(); m_pfFilter = NULL; m_fFilterParameter = -1.0f; m_fFilterD = 1.0f; @@ -53,35 +53,8 @@ CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm() CCudaFilteredBackProjectionAlgorithm::~CCudaFilteredBackProjectionAlgorithm() { - if(m_pfFilter != NULL) - { - delete [] m_pfFilter; - m_pfFilter = NULL; - } - - if(m_pFBP != NULL) - { - delete m_pFBP; - m_pFBP = NULL; - } -} - -void CCudaFilteredBackProjectionAlgorithm::initializeFromProjector() -{ - m_iPixelSuperSampling = 1; - m_iGPUIndex = -1; - - // Projector - CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector); - if (!pCudaProjector) { - if (m_pProjector) { - ASTRA_WARN("non-CUDA Projector2D passed to FBP_CUDA"); - } - } else { - m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling(); - m_iGPUIndex = pCudaProjector->getGPUIndex(); - } - + delete[] m_pfFilter; + m_pfFilter = NULL; } bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg) @@ -92,54 +65,28 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg) // if already initialized, clear first if (m_bIsInitialized) { +#warning FIXME Necessary? clear(); } - // Projector - XMLNode node = _cfg.self.getSingleNode("ProjectorId"); - CCudaProjector2D* pCudaProjector = 0; - if (node) { - int id = node.getContentInt(); - CProjector2D *projector = CProjector2DManager::getSingleton().get(id); - pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector); - if (!pCudaProjector) { - ASTRA_WARN("non-CUDA Projector2D passed"); - } - } - CC.markNodeParsed("ProjectorId"); - - - // sinogram data - node = _cfg.self.getSingleNode("ProjectionDataId"); - ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ProjectionDataId tag specified."); - int id = node.getContentInt(); - m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id)); - CC.markNodeParsed("ProjectionDataId"); + m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_cfg); + if (!m_bIsInitialized) + return false; - // reconstruction data - node = _cfg.self.getSingleNode("ReconstructionDataId"); - ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ReconstructionDataId tag specified."); - id = node.getContentInt(); - m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id)); - CC.markNodeParsed("ReconstructionDataId"); // filter type - node = _cfg.self.getSingleNode("FilterType"); + XMLNode node = _cfg.self.getSingleNode("FilterType"); if (node) - { m_eFilter = _convertStringToFilter(node.getContent().c_str()); - } else - { m_eFilter = FILTER_RAMLAK; - } CC.markNodeParsed("FilterType"); // filter node = _cfg.self.getSingleNode("FilterSinogramId"); if (node) { - id = node.getContentInt(); + int id = node.getContentInt(); const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id)); m_iFilterWidth = pFilterData->getGeometry()->getDetectorCount(); int iFilterProjectionCount = pFilterData->getGeometry()->getProjectionAngleCount(); @@ -188,20 +135,9 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg) initializeFromProjector(); - // Deprecated options - m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling); - CC.markOptionParsed("PixelSuperSampling"); - // GPU number - m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1); - m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex); - CC.markOptionParsed("GPUIndex"); - if (!_cfg.self.hasOption("GPUIndex")) - CC.markOptionParsed("GPUindex"); - - - m_pFBP = new AstraFBP; - m_bAstraFBPInit = false; + m_pAlgo = new astraCUDA::FBP(); + m_bAlgoInit = false; return check(); } @@ -226,9 +162,8 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D * // success m_bIsInitialized = true; - m_pFBP = new AstraFBP; - - m_bAstraFBPInit = false; + m_pAlgo = new astraCUDA::FBP(); + m_bAlgoInit = false; if(_pfFilter != NULL) { @@ -256,107 +191,26 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D * return check(); } -void CCudaFilteredBackProjectionAlgorithm::run(int _iNrIterations /* = 0 */) -{ - // check initialized - ASTRA_ASSERT(m_bIsInitialized); - - if (!m_bAstraFBPInit) { - - const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); - const CParallelProjectionGeometry2D* parprojgeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); - const CFanFlatProjectionGeometry2D* fanprojgeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); - - bool ok = true; - - // TODO: non-square pixels? - ok &= m_pFBP->setReconstructionGeometry(volgeom.getGridColCount(), - volgeom.getGridRowCount(), - volgeom.getPixelLengthX()); - int iDetectorCount; - if (parprojgeom) { - - float *offsets, *angles, detSize, outputScale; - - ok = convertAstraGeometry(&volgeom, parprojgeom, offsets, angles, detSize, outputScale); - - - ok &= m_pFBP->setProjectionGeometry(parprojgeom->getProjectionAngleCount(), - parprojgeom->getDetectorCount(), - angles, - parprojgeom->getDetectorWidth()); - iDetectorCount = parprojgeom->getDetectorCount(); - // TODO: Are detSize and outputScale handled correctly? - - if (offsets) - ok &= m_pFBP->setTOffsets(offsets); - ASTRA_ASSERT(ok); - - delete[] offsets; - delete[] angles; - - } else if (fanprojgeom) { - - astraCUDA::SFanProjection* projs; - float outputScale; - - // FIXME: Implement this, and clean up the interface to AstraFBP. - if (abs(volgeom.getWindowMinX() + volgeom.getWindowMaxX()) > 0.00001 * volgeom.getPixelLengthX()) { - // Off-center volume geometry isn't supported yet - ASTRA_ASSERT(false); - } - if (abs(volgeom.getWindowMinY() + volgeom.getWindowMaxY()) > 0.00001 * volgeom.getPixelLengthY()) { - // Off-center volume geometry isn't supported yet - ASTRA_ASSERT(false); - } - - ok = convertAstraGeometry(&volgeom, fanprojgeom, projs, outputScale); - - // CHECKME: outputScale? - - ok &= m_pFBP->setFanGeometry(fanprojgeom->getProjectionAngleCount(), - fanprojgeom->getDetectorCount(), - projs, - fanprojgeom->getProjectionAngles(), - fanprojgeom->getOriginSourceDistance(), - fanprojgeom->getOriginDetectorDistance(), - - fanprojgeom->getDetectorWidth(), - m_bShortScan); - - iDetectorCount = fanprojgeom->getDetectorCount(); - - delete[] projs; - } else { - assert(false); - } - - ok &= m_pFBP->setPixelSuperSampling(m_iPixelSuperSampling); - - ASTRA_ASSERT(ok); +void CCudaFilteredBackProjectionAlgorithm::initCUDAAlgorithm() +{ + CCudaReconstructionAlgorithm2D::initCUDAAlgorithm(); - ok &= m_pFBP->init(m_iGPUIndex); - ASTRA_ASSERT(ok); + astraCUDA::FBP* pFBP = dynamic_cast<astraCUDA::FBP*>(m_pAlgo); - ok &= m_pFBP->setSinogram(m_pSinogram->getDataConst(), iDetectorCount); + bool ok = pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter); + if (!ok) { + ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set filter"); ASTRA_ASSERT(ok); - - ok &= m_pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter); - ASTRA_ASSERT(ok); - - m_bAstraFBPInit = true; } - bool ok = m_pFBP->run(); - ASTRA_ASSERT(ok); - - const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); - ok &= m_pFBP->getReconstruction(m_pReconstruction->getData(), volgeom.getGridColCount()); - - ASTRA_ASSERT(ok); + ok &= pFBP->setShortScan(m_bShortScan); + if (!ok) { + ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set short-scan mode"); + } } + bool CCudaFilteredBackProjectionAlgorithm::check() { // check pointers @@ -395,16 +249,6 @@ static int calcNextPowerOfTwo(int _iValue) return iOutput; } -int CCudaFilteredBackProjectionAlgorithm::calcIdealRealFilterWidth(int _iDetectorCount) -{ - return calcNextPowerOfTwo(_iDetectorCount); -} - -int CCudaFilteredBackProjectionAlgorithm::calcIdealFourierFilterWidth(int _iDetectorCount) -{ - return (calcNextPowerOfTwo(_iDetectorCount) / 2 + 1); -} - static bool stringCompareLowerCase(const char * _stringA, const char * _stringB) { int iCmpReturn = 0; @@ -518,12 +362,3 @@ E_FBPFILTER CCudaFilteredBackProjectionAlgorithm::_convertStringToFilter(const c return output; } -void CCudaFilteredBackProjectionAlgorithm::testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount) -{ - genFilter(_eFilter, _fD, _iProjectionCount, _pFilter, _iFFTRealDetectorCount, _iFFTFourierDetectorCount); -} - -int CCudaFilteredBackProjectionAlgorithm::getGPUCount() -{ - return 0; -} diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp index 80f2e02..bdd7dd0 100644 --- a/src/CudaForwardProjectionAlgorithm.cpp +++ b/src/CudaForwardProjectionAlgorithm.cpp @@ -221,56 +221,48 @@ void CCudaForwardProjectionAlgorithm::run(int) // check initialized assert(m_bIsInitialized); - CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry(); - const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); - const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); - const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry()); + bool ok; - bool ok = false; - if (parProjGeom) { + const CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry(); + const CProjectionGeometry2D* pProjGeom = m_pSinogram->getGeometry(); + astraCUDA::SDimensions dims; - float *offsets, *angles, detSize, outputScale; - ok = convertAstraGeometry(pVolGeom, parProjGeom, offsets, angles, detSize, outputScale); + ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims); - ASTRA_ASSERT(ok); // FIXME + if (!ok) + return; - // FIXME: Output scaling + astraCUDA::SParProjection* pParProjs = 0; + astraCUDA::SFanProjection* pFanProjs = 0; + float fOutputScale = 1.0f; - ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(), - pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), - parProjGeom->getProjectionAngleCount(), - parProjGeom->getDetectorCount(), - angles, offsets, detSize, - m_iDetectorSuperSampling, 1.0f * outputScale, m_iGPUIndex); - - delete[] offsets; - delete[] angles; + ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pFanProjs, fOutputScale); + if (!ok) + return; - } else if (fanProjGeom || fanVecProjGeom) { + if (pParProjs) { + assert(!pFanProjs); - astraCUDA::SFanProjection* projs; - float outputScale; + ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(), + pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), + pProjGeom->getProjectionAngleCount(), + pProjGeom->getDetectorCount(), + pParProjs, + m_iDetectorSuperSampling, 1.0f * fOutputScale, m_iGPUIndex); - if (fanProjGeom) { - ok = convertAstraGeometry(pVolGeom, fanProjGeom, projs, outputScale); - } else { - ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, projs, outputScale); - } + delete[] pParProjs; - ASTRA_ASSERT(ok); + } else { + assert(pFanProjs); ok = astraCudaFanFP(m_pVolume->getDataConst(), m_pSinogram->getData(), pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), - m_pSinogram->getGeometry()->getProjectionAngleCount(), - m_pSinogram->getGeometry()->getDetectorCount(), - projs, - m_iDetectorSuperSampling, outputScale, m_iGPUIndex); - - delete[] projs; - - } else { + pProjGeom->getProjectionAngleCount(), + pProjGeom->getDetectorCount(), + pFanProjs, + m_iDetectorSuperSampling, fOutputScale, m_iGPUIndex); - ASTRA_ASSERT(false); + delete[] pFanProjs; } diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp index 2798434..326ef1e 100644 --- a/src/CudaReconstructionAlgorithm2D.cpp +++ b/src/CudaReconstructionAlgorithm2D.cpp @@ -250,69 +250,14 @@ bool CCudaReconstructionAlgorithm2D::setupGeometry() ok = m_pAlgo->setGPUIndex(m_iGPUIndex); if (!ok) return false; - astraCUDA::SDimensions dims; - const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry(); + const CProjectionGeometry2D& projgeom = *m_pSinogram->getGeometry(); - // TODO: non-square pixels? - dims.iVolWidth = volgeom.getGridColCount(); - dims.iVolHeight = volgeom.getGridRowCount(); - float fPixelSize = volgeom.getPixelLengthX(); - - dims.iRaysPerDet = m_iDetectorSuperSampling; - dims.iRaysPerPixelDim = m_iPixelSuperSampling; - - - const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry()); - const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry()); - const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry()); - - if (parProjGeom) { - - float *offsets, *angles, detSize, outputScale; - - ok = convertAstraGeometry(&volgeom, parProjGeom, offsets, angles, detSize, outputScale); - - dims.iProjAngles = parProjGeom->getProjectionAngleCount(); - dims.iProjDets = parProjGeom->getDetectorCount(); - dims.fDetScale = parProjGeom->getDetectorWidth() / fPixelSize; - - ok = m_pAlgo->setGeometry(dims, parProjGeom->getProjectionAngles()); - ok &= m_pAlgo->setTOffsets(offsets); - - // CHECKME: outputScale? detSize? - - delete[] offsets; - delete[] angles; - - } else if (fanProjGeom || fanVecProjGeom) { - - astraCUDA::SFanProjection* projs; - float outputScale; - - if (fanProjGeom) { - ok = convertAstraGeometry(&volgeom, fanProjGeom, projs, outputScale); - } else { - ok = convertAstraGeometry(&volgeom, fanVecProjGeom, projs, outputScale); - } - - dims.iProjAngles = m_pSinogram->getGeometry()->getProjectionAngleCount(); - dims.iProjDets = m_pSinogram->getGeometry()->getDetectorCount(); - dims.fDetScale = m_pSinogram->getGeometry()->getDetectorWidth() / fPixelSize; - - ok = m_pAlgo->setFanGeometry(dims, projs); - - // CHECKME: outputScale? - - delete[] projs; - - } else { - - ASTRA_ASSERT(false); - - } + ok = m_pAlgo->setGeometry(&volgeom, &projgeom); if (!ok) return false; + ok = m_pAlgo->setSuperSampling(m_iDetectorSuperSampling, m_iPixelSuperSampling); + if (!ok) return false; if (m_bUseReconstructionMask) ok &= m_pAlgo->enableVolumeMask(); diff --git a/src/FanFlatProjectionGeometry2D.cpp b/src/FanFlatProjectionGeometry2D.cpp index 3aab582..4eec9c4 100644 --- a/src/FanFlatProjectionGeometry2D.cpp +++ b/src/FanFlatProjectionGeometry2D.cpp @@ -28,6 +28,8 @@ $Id$ #include "astra/FanFlatProjectionGeometry2D.h" +#include "astra/GeometryUtil2D.h" + #include <cstring> #include <sstream> @@ -218,16 +220,12 @@ Config* CFanFlatProjectionGeometry2D::getConfiguration() const //---------------------------------------------------------------------------------------- CFanFlatVecProjectionGeometry2D* CFanFlatProjectionGeometry2D::toVectorGeometry() { - SFanProjection* vectors = new SFanProjection[m_iProjectionAngleCount]; - for (int i = 0; i < m_iProjectionAngleCount; ++i) - { - vectors[i].fSrcX = sinf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance; - vectors[i].fSrcY = -cosf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance; - vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth; - vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth; - vectors[i].fDetSX = -sinf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUX; - vectors[i].fDetSY = cosf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUY; - } + SFanProjection* vectors = genFanProjections(m_iProjectionAngleCount, + m_iDetectorCount, + m_fOriginSourceDistance, + m_fOriginDetectorDistance, + m_fDetectorWidth, + m_pfProjectionAngles); CFanFlatVecProjectionGeometry2D* vecGeom = new CFanFlatVecProjectionGeometry2D(); vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors); diff --git a/src/GeometryUtil2D.cpp b/src/GeometryUtil2D.cpp new file mode 100644 index 0000000..1bca2bc --- /dev/null +++ b/src/GeometryUtil2D.cpp @@ -0,0 +1,161 @@ +/* +----------------------------------------------------------------------- +Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp + 2014-2015, CWI, Amsterdam + +Contact: astra@uantwerpen.be +Website: http://sf.net/projects/astra-toolbox + +This file is part of the ASTRA Toolbox. + + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include "astra/GeometryUtil2D.h" + +#include <cmath> + +namespace astra { + +SParProjection* genParProjections(unsigned int iProjAngles, + unsigned int iProjDets, + double fDetSize, + const float *pfAngles, + const float *pfExtraOffsets) +{ + SParProjection base; + base.fRayX = 0.0f; + base.fRayY = 1.0f; + + base.fDetSX = iProjDets * fDetSize * -0.5f; + base.fDetSY = 0.0f; + + base.fDetUX = fDetSize; + base.fDetUY = 0.0f; + + SParProjection* p = new SParProjection[iProjAngles]; + +#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); } while(0) + + for (unsigned int i = 0; i < iProjAngles; ++i) { + if (pfExtraOffsets) { + // TODO + } + + ROTATE0(Ray, i, pfAngles[i]); + ROTATE0(DetS, i, pfAngles[i]); + ROTATE0(DetU, i, pfAngles[i]); + + if (pfExtraOffsets) { + float d = pfExtraOffsets[i]; + p[i].fDetSX -= d * p[i].fDetUX; + p[i].fDetSY -= d * p[i].fDetUY; + } + } + +#undef ROTATE0 + + return p; +} + + +SFanProjection* genFanProjections(unsigned int iProjAngles, + unsigned int iProjDets, + double fOriginSource, double fOriginDetector, + double fDetSize, + const float *pfAngles) +// const float *pfExtraOffsets) +{ + SFanProjection *pProjs = new SFanProjection[iProjAngles]; + + float fSrcX0 = 0.0f; + float fSrcY0 = -fOriginSource; + float fDetUX0 = fDetSize; + float fDetUY0 = 0.0f; + float fDetSX0 = iProjDets * fDetUX0 / -2.0f; + float fDetSY0 = fOriginDetector; + +#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) + for (unsigned int i = 0; i < iProjAngles; ++i) { + ROTATE0(Src, i, pfAngles[i]); + ROTATE0(DetS, i, pfAngles[i]); + ROTATE0(DetU, i, pfAngles[i]); + } + +#undef ROTATE0 + + return pProjs; +} + +// Convert a SParProjection back into its set of "standard" circular parallel +// beam parameters. This is always possible. +bool getParParameters(const SParProjection &proj /* , ... */) +{ + // angle + // det size + // offset + + // (see convertAndUploadAngles in par_fp.cu) + + return true; +} + +// Convert a SFanProjection back into its set of "standard" circular fan beam +// parameters. This will return false if it can not be represented in this way. +bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset) +{ + // angle + // det size + // offset + // origin-source + // origin-detector + + // Need to check if line source-origin is orthogonal to vector ux,uy + // (including the case source==origin) + + // (equivalent: source and origin project to same point on detector) + + double dp = proj.fSrcX * proj.fDetUX + proj.fSrcY * proj.fDetUY; + + double rel = (proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY) * (proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY); + rel = sqrt(rel); + + if (std::abs(dp) > rel * 0.0001) + return false; + + fOriginSource = sqrt(proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY); + + fDetSize = sqrt(proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY); + + // project origin on detector line ( == project source on detector line) + + double t = (- proj.fDetSX) * proj.fDetUX + (- proj.fDetSY) * proj.fDetUY; + + fOffset = (float)t - 0.5*iProjDets; + + // TODO: CHECKME + fOriginDetector = sqrt((proj.fDetSX + t * proj.fDetUX)*(proj.fDetSX + t * proj.fDetUX) + (proj.fDetSY + t * proj.fDetUY)*(proj.fDetSY + t * proj.fDetUY)); + + //float fAngle = atan2(proj.fDetSX + t * proj.fDetUX - proj.fSrcX, proj.fDetSY + t * proj.fDetUY); // TODO: Fix order + sign + fAngle = atan2(proj.fDetUY, proj.fDetUX); // TODO: Check order + sign + + return true; +} + + +} diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp index 105e24b..e1f93aa 100644 --- a/src/ParallelProjectionGeometry2D.cpp +++ b/src/ParallelProjectionGeometry2D.cpp @@ -28,6 +28,8 @@ $Id$ #include "astra/ParallelProjectionGeometry2D.h" +#include "astra/GeometryUtil2D.h" + #include <cstring> using namespace std; @@ -48,15 +50,13 @@ CParallelProjectionGeometry2D::CParallelProjectionGeometry2D() : CParallelProjectionGeometry2D::CParallelProjectionGeometry2D(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets) + const float32* _pfProjectionAngles) { _clear(); initialize(_iProjectionAngleCount, _iDetectorCount, _fDetectorWidth, - _pfProjectionAngles, - _pfExtraDetectorOffsets); + _pfProjectionAngles); } //---------------------------------------------------------------------------------------- @@ -115,14 +115,12 @@ bool CParallelProjectionGeometry2D::initialize(const Config& _cfg) bool CParallelProjectionGeometry2D::initialize(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets) + const float32* _pfProjectionAngles) { _initialize(_iProjectionAngleCount, _iDetectorCount, _fDetectorWidth, - _pfProjectionAngles, - _pfExtraDetectorOffsets); + _pfProjectionAngles); // success m_bInitialized = _check(); @@ -178,11 +176,6 @@ Config* CParallelProjectionGeometry2D::getConfiguration() const cfg->self.addChildNode("DetectorCount", getDetectorCount()); cfg->self.addChildNode("DetectorWidth", getDetectorWidth()); cfg->self.addChildNode("ProjectionAngles", m_pfProjectionAngles, m_iProjectionAngleCount); - if(m_pfExtraDetectorOffset!=NULL){ - XMLNode opt = cfg->self.addChildNode("Option"); - opt.addAttribute("key","ExtraDetectorOffset"); - opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount); - } return cfg; } @@ -203,17 +196,11 @@ CVector3D CParallelProjectionGeometry2D::getProjectionDirection(int _iProjection //---------------------------------------------------------------------------------------- CParallelVecProjectionGeometry2D* CParallelProjectionGeometry2D::toVectorGeometry() { - SParProjection* vectors = new SParProjection[m_iProjectionAngleCount]; - for (int i = 0; i < m_iProjectionAngleCount; ++i) - { - vectors[i].fRayX = sinf(m_pfProjectionAngles[i]); - vectors[i].fRayY = -cosf(m_pfProjectionAngles[i]); - vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth; - vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth; - vectors[i].fDetSX = -0.5f * m_iDetectorCount * vectors[i].fDetUX; - vectors[i].fDetSY = -0.5f * m_iDetectorCount * vectors[i].fDetUY; - } - + SParProjection* vectors = genParProjections(m_iProjectionAngleCount, + m_iDetectorCount, + m_fDetectorWidth, + m_pfProjectionAngles, 0); + // TODO: ExtraOffsets? CParallelVecProjectionGeometry2D* vecGeom = new CParallelVecProjectionGeometry2D(); vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors); delete[] vectors; diff --git a/src/ProjectionGeometry2D.cpp b/src/ProjectionGeometry2D.cpp index 03fbd22..a306b48 100644 --- a/src/ProjectionGeometry2D.cpp +++ b/src/ProjectionGeometry2D.cpp @@ -45,11 +45,10 @@ CProjectionGeometry2D::CProjectionGeometry2D() : configCheckData(0) CProjectionGeometry2D::CProjectionGeometry2D(int _iAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets) : configCheckData(0) + const float32* _pfProjectionAngles) : configCheckData(0) { _clear(); - _initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles,_pfExtraDetectorOffsets); + _initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles); } //---------------------------------------------------------------------------------------- @@ -156,8 +155,7 @@ bool CProjectionGeometry2D::initialize(const Config& _cfg) bool CProjectionGeometry2D::_initialize(int _iProjectionAngleCount, int _iDetectorCount, float32 _fDetectorWidth, - const float32* _pfProjectionAngles, - const float32* _pfExtraDetectorOffsets) + const float32* _pfProjectionAngles) { if (m_bInitialized) { clear(); |