diff options
author | Willem Jan Palenstijn <WillemJan.Palenstijn@uantwerpen.be> | 2013-07-01 22:34:11 +0000 |
---|---|---|
committer | wpalenst <WillemJan.Palenstijn@uantwerpen.be> | 2013-07-01 22:34:11 +0000 |
commit | b2fc6c70434674d74551c3a6c01ffb3233499312 (patch) | |
tree | b17f080ebc504ab85ebb7c3d89f917fd87ce9e00 /cuda/2d/astra.cu | |
download | astra-b2fc6c70434674d74551c3a6c01ffb3233499312.tar.gz astra-b2fc6c70434674d74551c3a6c01ffb3233499312.tar.bz2 astra-b2fc6c70434674d74551c3a6c01ffb3233499312.tar.xz astra-b2fc6c70434674d74551c3a6c01ffb3233499312.zip |
Update version to 1.3
Diffstat (limited to 'cuda/2d/astra.cu')
-rw-r--r-- | cuda/2d/astra.cu | 824 |
1 files changed, 824 insertions, 0 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu new file mode 100644 index 0000000..71ed025 --- /dev/null +++ b/cuda/2d/astra.cu @@ -0,0 +1,824 @@ +/* +----------------------------------------------------------------------- +Copyright 2012 iMinds-Vision Lab, University of Antwerp + +Contact: astra@ua.ac.be +Website: http://astra.ua.ac.be + + +This file is part of the +All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). + +The ASTRA Toolbox is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +The ASTRA Toolbox is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. + +----------------------------------------------------------------------- +$Id$ +*/ + +#include <cstdio> +#include <cassert> + +#include "util.h" +#include "par_fp.h" +#include "fan_fp.h" +#include "par_bp.h" +#include "arith.h" +#include "astra.h" + +#include "fft.h" + +#include <fstream> +#include <cuda.h> + +#include "../../include/astra/Logger.h" + +using namespace astraCUDA; +using namespace std; + + +namespace astra { + +enum CUDAProjectionType { + PROJ_PARALLEL, + PROJ_FAN +}; + + +class AstraFBP_internal { +public: + SDimensions dims; + float* angles; + float* TOffsets; + + float fPixelSize; + + bool initialized; + bool setStartReconstruction; + + float* D_sinoData; + unsigned int sinoPitch; + + float* D_volumeData; + unsigned int volumePitch; + + cufftComplex * m_pDevFilter; +}; + +AstraFBP::AstraFBP() +{ + pData = new AstraFBP_internal(); + + pData->angles = 0; + pData->D_sinoData = 0; + pData->D_volumeData = 0; + + pData->dims.iVolWidth = 0; + pData->dims.iProjAngles = 0; + pData->dims.fDetScale = 1.0f; + pData->dims.iRaysPerDet = 1; + pData->dims.iRaysPerPixelDim = 1; + + pData->initialized = false; + pData->setStartReconstruction = false; + + pData->m_pDevFilter = NULL; +} + +AstraFBP::~AstraFBP() +{ + delete[] pData->angles; + pData->angles = 0; + + delete[] pData->TOffsets; + pData->TOffsets = 0; + + cudaFree(pData->D_sinoData); + pData->D_sinoData = 0; + + cudaFree(pData->D_volumeData); + pData->D_volumeData = 0; + + if(pData->m_pDevFilter != NULL) + { + freeComplexOnDevice(pData->m_pDevFilter); + pData->m_pDevFilter = NULL; + } + + delete pData; + pData = 0; +} + +bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth, + unsigned int iVolHeight, + float fPixelSize) +{ + if (pData->initialized) + return false; + + pData->dims.iVolWidth = iVolWidth; + pData->dims.iVolHeight = iVolHeight; + + pData->fPixelSize = fPixelSize; + + return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f); +} + +bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles, + unsigned int iProjDets, + const float* pfAngles, + float fDetSize) +{ + if (pData->initialized) + return false; + + pData->dims.iProjAngles = iProjAngles; + pData->dims.iProjDets = iProjDets; + pData->dims.fDetScale = fDetSize / pData->fPixelSize; + + if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) + return false; + + pData->angles = new float[iProjAngles]; + memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0])); + + return true; +} + +bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling) +{ + if (pData->initialized) + return false; + + if (iPixelSuperSampling == 0) + return false; + + pData->dims.iRaysPerPixelDim = iPixelSuperSampling; + + return true; +} + + +bool AstraFBP::setTOffsets(const float* pfTOffsets) +{ + if (pData->initialized) + return false; + + if (pfTOffsets == 0) + return false; + + pData->TOffsets = new float[pData->dims.iProjAngles]; + memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0])); + + return true; +} + +bool AstraFBP::init(int iGPUIndex) +{ + if (pData->initialized) + { + return false; + } + + if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0) + { + return false; + } + + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + { + return false; + } + + bool ok = allocateVolume(pData->D_volumeData, pData->dims.iVolWidth+2, pData->dims.iVolHeight+2, pData->volumePitch); + if (!ok) + { + return false; + } + + ok = allocateVolume(pData->D_sinoData, pData->dims.iProjDets+2, pData->dims.iProjAngles, pData->sinoPitch); + if (!ok) + { + cudaFree(pData->D_volumeData); + pData->D_volumeData = 0; + return false; + } + + pData->initialized = true; + + return true; +} + +bool AstraFBP::setSinogram(const float* pfSinogram, + unsigned int iSinogramPitch) +{ + if (!pData->initialized) + return false; + if (!pfSinogram) + return false; + + bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch, + pData->dims.iProjDets, + pData->dims.iProjAngles, + pData->D_sinoData, pData->sinoPitch); + if (!ok) + return false; + + // rescale sinogram to adjust for pixel size + processVol<opMul,SINO>(pData->D_sinoData, + 1.0f/(pData->fPixelSize*pData->fPixelSize), + pData->sinoPitch, + pData->dims.iProjDets, pData->dims.iProjAngles); + + pData->setStartReconstruction = false; + + return true; +} + +static int calcNextPowerOfTwo(int _iValue) +{ + int iOutput = 1; + + while(iOutput < _iValue) + { + iOutput *= 2; + } + + return iOutput; +} + +bool AstraFBP::run() +{ + if (!pData->initialized) + { + return false; + } + + zeroVolume(pData->D_volumeData, pData->volumePitch, pData->dims.iVolWidth+2, pData->dims.iVolHeight+2); + + bool ok = false; + + if (pData->m_pDevFilter) { + + int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); + int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount); + + cufftComplex * pDevComplexSinogram = NULL; + + allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram); + + runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, 1, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram); + + applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter); + + runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, 1, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount); + + freeComplexOnDevice(pDevComplexSinogram); + + } + + ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets); + if(!ok) + { + return false; + } + + processVol<opMul,VOL>(pData->D_volumeData, + (M_PI / 2.0f) / (float)pData->dims.iProjAngles, + pData->volumePitch, + pData->dims.iVolWidth, pData->dims.iVolHeight); + + return true; +} + +bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const +{ + if (!pData->initialized) + return false; + + bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch, + pData->dims.iVolWidth, + pData->dims.iVolHeight, + pData->D_volumeData, pData->volumePitch); + if (!ok) + return false; + + return true; +} + +int AstraFBP::calcFourierFilterSize(int _iDetectorCount) +{ + int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount); + int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); + + // CHECKME: Matlab makes this at least 64. Do we also need to? + return iFreqBinCount; +} + +bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */) +{ + if(pData->m_pDevFilter != 0) + { + freeComplexOnDevice(pData->m_pDevFilter); + pData->m_pDevFilter = 0; + } + + if (_eFilter == FILTER_NONE) + return true; // leave pData->m_pDevFilter set to 0 + + + int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets); + int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount); + + cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount]; + memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount); + + allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter)); + + switch(_eFilter) + { + case FILTER_NONE: + // handled above + break; + case FILTER_RAMLAK: + case FILTER_SHEPPLOGAN: + case FILTER_COSINE: + case FILTER_HAMMING: + case FILTER_HANN: + case FILTER_TUKEY: + case FILTER_LANCZOS: + case FILTER_TRIANGULAR: + case FILTER_GAUSSIAN: + case FILTER_BARTLETTHANN: + case FILTER_BLACKMAN: + case FILTER_NUTTALL: + case FILTER_BLACKMANHARRIS: + case FILTER_BLACKMANNUTTALL: + case FILTER_FLATTOP: + { + genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter); + uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); + + break; + } + case FILTER_PROJECTION: + { + // make sure the offered filter has the correct size + assert(_iFilterWidth == iFreqBinCount); + + for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) + { + float fValue = _pfHostFilter[iFreqBinIndex]; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) + { + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; + } + } + uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); + break; + } + case FILTER_SINOGRAM: + { + // make sure the offered filter has the correct size + assert(_iFilterWidth == iFreqBinCount); + + for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++) + { + for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) + { + float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth]; + + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue; + pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f; + } + } + uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter); + break; + } + case FILTER_RPROJECTION: + { + int iProjectionCount = pData->dims.iProjAngles; + int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; + float * pfHostRealFilter = new float[iRealFilterElementCount]; + memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + + int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); + int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; + int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + + int iFilterShiftSize = _iFilterWidth / 2; + + for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) + { + int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + float fValue = _pfHostFilter[iDetectorIndex]; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) + { + pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; + } + } + + float* pfDevRealFilter = NULL; + cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors + cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); + delete[] pfHostRealFilter; + + runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, 0, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); + + cudaFree(pfDevRealFilter); + + break; + } + case FILTER_RSINOGRAM: + { + int iProjectionCount = pData->dims.iProjAngles; + int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount; + float* pfHostRealFilter = new float[iRealFilterElementCount]; + memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount); + + int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount); + int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2; + int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth; + + int iFilterShiftSize = _iFilterWidth / 2; + + for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++) + { + int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount; + + for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++) + { + float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth]; + pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue; + } + } + + float* pfDevRealFilter = NULL; + cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors + cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice); + delete[] pfHostRealFilter; + + runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, 0, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter); + + cudaFree(pfDevRealFilter); + + break; + } + default: + { + fprintf(stderr, "AstraFBP::setFilter: Weird filter type requested"); + delete [] pHostFilter; + return false; + } + } + + delete [] pHostFilter; + + return true; +} + +BPalgo::BPalgo() +{ + +} + +BPalgo::~BPalgo() +{ + +} + +bool BPalgo::init() +{ + return true; +} + +bool BPalgo::iterate(unsigned int) +{ + // TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant + zeroVolume(D_volumeData, volumePitch, dims.iVolWidth+2, dims.iVolHeight+2); + callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch); + return true; +} + +float BPalgo::computeDiffNorm() +{ + float *D_projData; + unsigned int projPitch; + + allocateVolume(D_projData, dims.iProjDets+2, dims.iProjAngles, projPitch); + + cudaMemcpy2D(D_projData, sizeof(float)*projPitch, D_sinoData, sizeof(float)*sinoPitch, sizeof(float)*(dims.iProjDets+2), dims.iProjAngles, cudaMemcpyDeviceToDevice); + callFP(D_volumeData, volumePitch, D_projData, projPitch, -1.0f); + + float s = dotProduct2D(D_projData, projPitch, dims.iProjDets, dims.iProjAngles, 1, 0); + + cudaFree(D_projData); + + return sqrt(s); +} + + +bool astraCudaFP(const float* pfVolume, float* pfSinogram, + unsigned int iVolWidth, unsigned int iVolHeight, + unsigned int iProjAngles, unsigned int iProjDets, + const float *pfAngles, const float *pfOffsets, + float fDetSize, unsigned int iDetSuperSampling, + int iGPUIndex) +{ + SDimensions dims; + + if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) + return false; + + dims.iProjAngles = iProjAngles; + dims.iProjDets = iProjDets; + dims.fDetScale = fDetSize; + + if (iDetSuperSampling == 0) + return false; + + dims.iRaysPerDet = iDetSuperSampling; + + if (iVolWidth <= 0 || iVolHeight <= 0) + return false; + + dims.iVolWidth = iVolWidth; + dims.iVolHeight = iVolHeight; + + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + return false; + + + bool ok; + + float* D_volumeData; + unsigned int volumePitch; + + ok = allocateVolume(D_volumeData, dims.iVolWidth+2, dims.iVolHeight+2, volumePitch); + if (!ok) + return false; + + float* D_sinoData; + unsigned int sinoPitch; + + ok = allocateVolume(D_sinoData, dims.iProjDets+2, dims.iProjAngles, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + return false; + } + + ok = copyVolumeToDevice(pfVolume, dims.iVolWidth, + dims.iVolWidth, dims.iVolHeight, + D_volumeData, volumePitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + zeroVolume(D_sinoData, sinoPitch, dims.iProjDets+2, dims.iProjAngles); + ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, 1.0f); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + ok = copySinogramFromDevice(pfSinogram, dims.iProjDets, + dims.iProjDets, + dims.iProjAngles, + D_sinoData, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return true; +} + +bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, + unsigned int iVolWidth, unsigned int iVolHeight, + unsigned int iProjAngles, unsigned int iProjDets, + const float *pfAngles, float fOriginSourceDistance, + float fOriginDetectorDistance, float fPixelSize, + float fDetSize, + unsigned int iDetSuperSampling, + int iGPUIndex) +{ + SDimensions dims; + + if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0) + return false; + + dims.iProjAngles = iProjAngles; + dims.iProjDets = iProjDets; + + if (iDetSuperSampling == 0) + return false; + + dims.iRaysPerDet = iDetSuperSampling; + + if (iVolWidth <= 0 || iVolHeight <= 0) + return false; + + dims.iVolWidth = iVolWidth; + dims.iVolHeight = iVolHeight; + + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + return false; + + + bool ok; + + float* D_volumeData; + unsigned int volumePitch; + + ok = allocateVolume(D_volumeData, dims.iVolWidth+2, dims.iVolHeight+2, volumePitch); + if (!ok) + return false; + + float* D_sinoData; + unsigned int sinoPitch; + + ok = allocateVolume(D_sinoData, dims.iProjDets+2, dims.iProjAngles, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + return false; + } + + ok = copyVolumeToDevice(pfVolume, dims.iVolWidth, + dims.iVolWidth, dims.iVolHeight, + D_volumeData, volumePitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + zeroVolume(D_sinoData, sinoPitch, dims.iProjDets+2, dims.iProjAngles); + + // TODO: Turn this geometry conversion into a util function + SFanProjection* projs = new SFanProjection[dims.iProjAngles]; + + float fSrcX0 = 0.0f; + float fSrcY0 = -fOriginSourceDistance / fPixelSize; + float fDetUX0 = fDetSize / fPixelSize; + float fDetUY0 = 0.0f; + float fDetSX0 = dims.iProjDets * fDetUX0 / -2.0f; + float fDetSY0 = fOriginDetectorDistance / fPixelSize; + +#define ROTATE0(name,i,alpha) do { projs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); projs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0) + for (int i = 0; i < dims.iProjAngles; ++i) { + ROTATE0(Src, i, pfAngles[i]); + ROTATE0(DetS, i, pfAngles[i]); + ROTATE0(DetU, i, pfAngles[i]); + } + +#undef ROTATE0 + + ok = FanFP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, projs, 1.0f); + delete[] projs; + + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + ok = copySinogramFromDevice(pfSinogram, dims.iProjDets, + dims.iProjDets, + dims.iProjAngles, + D_sinoData, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + cudaFree(D_volumeData); + cudaFree(D_sinoData); + + return true; + +} + + +bool astraCudaFanFP(const float* pfVolume, float* pfSinogram, + unsigned int iVolWidth, unsigned int iVolHeight, + unsigned int iProjAngles, unsigned int iProjDets, + const SFanProjection *pAngles, + unsigned int iDetSuperSampling, + int iGPUIndex) +{ + SDimensions dims; + + if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0) + return false; + + dims.iProjAngles = iProjAngles; + dims.iProjDets = iProjDets; + dims.fDetScale = 1.0f; // TODO? + + if (iDetSuperSampling == 0) + return false; + + dims.iRaysPerDet = iDetSuperSampling; + + if (iVolWidth <= 0 || iVolHeight <= 0) + return false; + + dims.iVolWidth = iVolWidth; + dims.iVolHeight = iVolHeight; + + cudaSetDevice(iGPUIndex); + cudaError_t err = cudaGetLastError(); + + // Ignore errors caused by calling cudaSetDevice multiple times + if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess) + return false; + + + bool ok; + + float* D_volumeData; + unsigned int volumePitch; + + ok = allocateVolume(D_volumeData, dims.iVolWidth+2, dims.iVolHeight+2, volumePitch); + if (!ok) + return false; + + float* D_sinoData; + unsigned int sinoPitch; + + ok = allocateVolume(D_sinoData, dims.iProjDets+2, dims.iProjAngles, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + return false; + } + + ok = copyVolumeToDevice(pfVolume, dims.iVolWidth, + dims.iVolWidth, dims.iVolHeight, + D_volumeData, volumePitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + zeroVolume(D_sinoData, sinoPitch, dims.iProjDets+2, dims.iProjAngles); + + ok = FanFP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, 1.0f); + + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + ok = copySinogramFromDevice(pfSinogram, dims.iProjDets, + dims.iProjDets, + dims.iProjAngles, + D_sinoData, sinoPitch); + if (!ok) { + cudaFree(D_volumeData); + cudaFree(D_sinoData); + return false; + } + + cudaFree(D_volumeData); + cudaFree(D_sinoData); + + return true; + +} + + +} |