summaryrefslogtreecommitdiffstats
path: root/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'cuda')
-rw-r--r--cuda/2d/astra.cu7
-rw-r--r--cuda/2d/darthelper.cu13
-rw-r--r--cuda/2d/fft.cu45
-rw-r--r--cuda/2d/par_fp.cu2
-rw-r--r--cuda/2d/util.cu8
-rw-r--r--cuda/3d/cone_fp.cu2
-rw-r--r--cuda/3d/par3d_fp.cu2
-rw-r--r--cuda/3d/util3d.cu12
8 files changed, 47 insertions, 44 deletions
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 5e2a07a..4c69628 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -42,12 +42,13 @@ $Id$
#include <fstream>
#include <cuda.h>
-#include "../../include/astra/Logger.h"
#include "../../include/astra/VolumeGeometry2D.h"
#include "../../include/astra/ParallelProjectionGeometry2D.h"
#include "../../include/astra/FanFlatProjectionGeometry2D.h"
#include "../../include/astra/FanFlatVecProjectionGeometry2D.h"
+#include "../../include/astra/Logging.h"
+
// For fan beam FBP weighting
#include "../3d/fdk.h"
@@ -536,7 +537,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* =
int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
int iFilterShiftSize = _iFilterWidth / 2;
-
+
for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
{
int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
@@ -561,7 +562,7 @@ bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* =
}
default:
{
- fprintf(stderr, "AstraFBP::setFilter: Unknown filter type requested");
+ ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested");
delete [] pHostFilter;
return false;
}
diff --git a/cuda/2d/darthelper.cu b/cuda/2d/darthelper.cu
index 28ca557..1d10d49 100644
--- a/cuda/2d/darthelper.cu
+++ b/cuda/2d/darthelper.cu
@@ -57,7 +57,7 @@ void roiSelect(float* out, float radius, unsigned int width, unsigned int height
// We abuse dims here...
SDimensions dims;
dims.iVolWidth = width;
- dims.iVolHeight = width;
+ dims.iVolHeight = height;
allocateVolumeData(D_data, pitch, dims);
copyVolumeToDevice(out, width, dims, D_data, pitch);
@@ -245,7 +245,7 @@ void dartMask(float* mask, const float* segmentation, unsigned int conn, unsigne
// We abuse dims here...
SDimensions dims;
dims.iVolWidth = width;
- dims.iVolHeight = width;
+ dims.iVolHeight = height;
allocateVolumeData(D_segmentationData, pitch, dims);
copyVolumeToDevice(segmentation, width, dims, D_segmentationData, pitch);
@@ -278,7 +278,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns
unsigned int x = threadIdx.x + 16*blockIdx.x;
unsigned int y = threadIdx.y + 16*blockIdx.y;
- // Sacrifice the border pixels to simplify the implementation.
+ // Sacrifice the border pixels to simplify the implementation.
if (x > radius-1 && x < width - radius && y > radius-1 && y < height - radius)
{
float* d = (float*)in;
@@ -286,9 +286,10 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns
unsigned int o2 = y*pitch+x;
int r = radius;
+ float count = 4*r*(r+1);
float res = -d[o2];
- for (int row = -r; row < r; row++)
+ for (int row = -r; row <= r; row++)
{
unsigned int o1 = (y+row)*pitch+x;
for (int col = -r; col <= r; col++)
@@ -297,7 +298,7 @@ __global__ void devDartSmoothingRadius(float* out, const float* in, float b, uns
}
}
- res *= b / 4*r*(r+1);
+ res *= b / count;
res += (1.0f-b) * d[o2];
m[o2] = res;
@@ -333,7 +334,7 @@ void dartSmoothing(float* out, const float* in, float b, unsigned int radius, un
// We abuse dims here...
SDimensions dims;
dims.iVolWidth = width;
- dims.iVolHeight = width;
+ dims.iVolHeight = height;
allocateVolumeData(D_inData, pitch, dims);
copyVolumeToDevice(in, width, dims, D_inData, pitch);
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index d105e29..2bfd493 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -34,7 +34,7 @@ $Id$
#include <cuda.h>
#include <fstream>
-#include "../../include/astra/Logger.h"
+#include "../../include/astra/Logging.h"
using namespace astra;
@@ -43,25 +43,22 @@ using namespace astra;
#define CHECK_ERROR(errorMessage) do { \
cudaError_t err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \
- errorMessage, __FILE__, __LINE__, cudaGetErrorString( err) );\
- CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \
+ ASTRA_ERROR("Cuda error %s : %s", \
+ errorMessage,cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} } while (0)
#define SAFE_CALL( call) do { \
cudaError err = call; \
if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
- __FILE__, __LINE__, cudaGetErrorString( err) ); \
- CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \
+ ASTRA_ERROR("Cuda error: %s ", \
+ cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} \
err = cudaThreadSynchronize(); \
if( cudaSuccess != err) { \
- fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
- __FILE__, __LINE__, cudaGetErrorString( err) ); \
- CLogger::writeTerminalCUDAError(__FILE__, __LINE__, cudaGetErrorString( err)); \
+ ASTRA_ERROR("Cuda error: %s : ", \
+ cudaGetErrorString( err)); \
exit(EXIT_FAILURE); \
} } while (0)
@@ -140,7 +137,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,
result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_R2C, _iProjectionCount);
if(result != CUFFT_SUCCESS)
{
- std::cerr << "Failed to plan 1d r2c fft" << std::endl;
+ ASTRA_ERROR("Failed to plan 1d r2c fft");
return false;
}
@@ -149,7 +146,7 @@ static bool invokeCudaFFT(int _iProjectionCount, int _iDetectorCount,
if(result != CUFFT_SUCCESS)
{
- std::cerr << "Failed to exec 1d r2c fft" << std::endl;
+ ASTRA_ERROR("Failed to exec 1d r2c fft");
return false;
}
@@ -166,18 +163,18 @@ static bool invokeCudaIFFT(int _iProjectionCount, int _iDetectorCount,
result = cufftPlan1d(&plan, _iDetectorCount, CUFFT_C2R, _iProjectionCount);
if(result != CUFFT_SUCCESS)
{
- std::cerr << "Failed to plan 1d c2r fft" << std::endl;
+ ASTRA_ERROR("Failed to plan 1d c2r fft");
return false;
}
// todo: why do we have to get rid of the const qualifier?
result = cufftExecC2R(plan, (cufftComplex *)_pDevSourceComplex,
- (cufftReal *)_pfDevTarget);
+ (cufftReal *)_pfDevTarget);
cufftDestroy(plan);
if(result != CUFFT_SUCCESS)
{
- std::cerr << "Failed to exec 1d c2r fft" << std::endl;
+ ASTRA_ERROR("Failed to exec 1d c2r fft");
return false;
}
@@ -257,7 +254,7 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
}
rescaleInverseFourier(_iProjectionCount, _iFFTRealDetectorCount,
- pfDevRealFFTTarget);
+ pfDevRealFFTTarget);
SAFE_CALL(cudaMemset(_pfRealTarget, 0, sizeof(float) * _iProjectionCount * _iTargetPitch));
@@ -460,7 +457,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
const float fA1 = 0.48f;
const float fA2 = 0.38f;
float fNMinusOne = (float)(_iFFTFourierDetectorCount) - 1.0f;
-
+
for(int iDetectorIndex = 1; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
float fSmallN = (float)iDetectorIndex;
@@ -633,7 +630,7 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
}
default:
{
- std::cerr << "Cannot serve requested filter" << std::endl;
+ ASTRA_ERROR("Cannot serve requested filter");
}
}
@@ -746,7 +743,7 @@ void testCudaFFT()
{
for(int iDetectorIndex = 0; iDetectorIndex < iDetectorCount; iDetectorIndex++)
{
-// int
+// int
// pfHostProj[iIndex] = (float)rand() / (float)RAND_MAX;
}
@@ -767,13 +764,13 @@ void testCudaFFT()
result = cufftPlan1d(&plan, iDetectorCount, CUFFT_R2C, iProjectionCount);
if(result != CUFFT_SUCCESS)
{
- cerr << "Failed to plan 1d r2c fft" << endl;
+ ASTRA_ERROR("Failed to plan 1d r2c fft");
}
result = cufftExecR2C(plan, pfDevProj, pDevFourProj);
if(result != CUFFT_SUCCESS)
{
- cerr << "Failed to exec 1d r2c fft" << endl;
+ ASTRA_ERROR("Failed to exec 1d r2c fft");
}
cufftDestroy(plan);
@@ -787,7 +784,7 @@ void testCudaFFT()
float * pfHostFourProjImaginary = new float[iTotalElementCount];
convertComplexToRealImg(pHostFourProj, iTotalElementCount, pfHostFourProjReal, pfHostFourProjImaginary);
-
+
writeToMatlabFile("proj_four_real.mat", pfHostFourProjReal, iProjectionCount, iDetectorCount);
writeToMatlabFile("proj_four_imaginary.mat", pfHostFourProjImaginary, iProjectionCount, iDetectorCount);
@@ -797,13 +794,13 @@ void testCudaFFT()
result = cufftPlan1d(&plan, iDetectorCount, CUFFT_C2R, iProjectionCount);
if(result != CUFFT_SUCCESS)
{
- cerr << "Failed to plan 1d c2r fft" << endl;
+ ASTRA_ERROR("Failed to plan 1d c2r fft");
}
result = cufftExecC2R(plan, pDevFourProj, pfDevInFourProj);
if(result != CUFFT_SUCCESS)
{
- cerr << "Failed to exec 1d c2r fft" << endl;
+ ASTRA_ERROR("Failed to exec 1d c2r fft");
}
cufftDestroy(plan);
diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu
index d0ca7ff..bb8b909 100644
--- a/cuda/2d/par_fp.cu
+++ b/cuda/2d/par_fp.cu
@@ -487,7 +487,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
unsigned int blockEnd = 0;
bool blockVertical = false;
for (unsigned int a = 0; a <= dims.iProjAngles; ++a) {
- bool vertical;
+ bool vertical = false;
// TODO: Having <= instead of < below causes a 5% speedup.
// Maybe we should detect corner cases and put them in the optimal
// group of angles.
diff --git a/cuda/2d/util.cu b/cuda/2d/util.cu
index 81e368f..a4f8f3e 100644
--- a/cuda/2d/util.cu
+++ b/cuda/2d/util.cu
@@ -30,6 +30,8 @@ $Id$
#include <cassert>
#include "util.h"
+#include "../../include/astra/Logging.h"
+
namespace astraCUDA {
bool copyVolumeToDevice(const float* in_data, unsigned int in_pitch,
@@ -91,7 +93,7 @@ bool allocateVolume(float*& ptr, unsigned int width, unsigned int height, unsign
cudaError_t ret = cudaMallocPitch((void**)&ptr, &p, sizeof(float)*width, height);
if (ret != cudaSuccess) {
reportCudaError(ret);
- fprintf(stderr, "Failed to allocate %dx%d GPU buffer\n", width, height);
+ ASTRA_ERROR("Failed to allocate %dx%d GPU buffer", width, height);
return false;
}
@@ -259,7 +261,7 @@ bool cudaTextForceKernelsCompletion()
cudaError_t returnedCudaError = cudaThreadSynchronize();
if(returnedCudaError != cudaSuccess) {
- fprintf(stderr, "Failed to force completion of cuda kernels: %d: %s.\n", returnedCudaError, cudaGetErrorString(returnedCudaError));
+ ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError));
return false;
}
@@ -269,7 +271,7 @@ bool cudaTextForceKernelsCompletion()
void reportCudaError(cudaError_t err)
{
if(err != cudaSuccess)
- fprintf(stderr, "CUDA error %d: %s.\n", err, cudaGetErrorString(err));
+ ASTRA_ERROR("CUDA error %d: %s.", err, cudaGetErrorString(err));
}
diff --git a/cuda/3d/cone_fp.cu b/cuda/3d/cone_fp.cu
index bda71ba..b36d2bc 100644
--- a/cuda/3d/cone_fp.cu
+++ b/cuda/3d/cone_fp.cu
@@ -340,7 +340,7 @@ bool ConeFP_Array_internal(cudaPitchedPtr D_projData,
// tic(t);
for (unsigned int a = 0; a <= angleCount; ++a) {
- int dir;
+ int dir = -1;
if (a != angleCount) {
float dX = fabsf(angles[a].fSrcX - (angles[a].fDetSX + dims.iProjU*angles[a].fDetUX*0.5f + dims.iProjV*angles[a].fDetVX*0.5f));
float dY = fabsf(angles[a].fSrcY - (angles[a].fDetSY + dims.iProjU*angles[a].fDetUY*0.5f + dims.iProjV*angles[a].fDetVY*0.5f));
diff --git a/cuda/3d/par3d_fp.cu b/cuda/3d/par3d_fp.cu
index 8d44540..b14c494 100644
--- a/cuda/3d/par3d_fp.cu
+++ b/cuda/3d/par3d_fp.cu
@@ -440,7 +440,7 @@ bool Par3DFP_Array_internal(cudaPitchedPtr D_projData,
// tic(t);
for (unsigned int a = 0; a <= angleCount; ++a) {
- int dir;
+ int dir = -1;
if (a != dims.iProjAngles) {
float dX = fabsf(angles[a].fRayX);
float dY = fabsf(angles[a].fRayY);
diff --git a/cuda/3d/util3d.cu b/cuda/3d/util3d.cu
index d85a928..537ed69 100644
--- a/cuda/3d/util3d.cu
+++ b/cuda/3d/util3d.cu
@@ -31,6 +31,8 @@ $Id$
#include "util3d.h"
#include "../2d/util.h"
+#include "../../include/astra/Logging.h"
+
namespace astraCUDA3d {
@@ -46,7 +48,7 @@ cudaPitchedPtr allocateVolumeData(const SDimensions3D& dims)
cudaError err = cudaMalloc3D(&volData, extentV);
if (err != cudaSuccess) {
astraCUDA::reportCudaError(err);
- fprintf(stderr, "Failed to allocate %dx%dx%d GPU buffer\n", dims.iVolX, dims.iVolY, dims.iVolZ);
+ ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iVolX, dims.iVolY, dims.iVolZ);
volData.ptr = 0;
// TODO: return 0 somehow?
}
@@ -65,7 +67,7 @@ cudaPitchedPtr allocateProjectionData(const SDimensions3D& dims)
cudaError err = cudaMalloc3D(&projData, extentP);
if (err != cudaSuccess) {
astraCUDA::reportCudaError(err);
- fprintf(stderr, "Failed to allocate %dx%dx%d GPU buffer\n", dims.iProjU, dims.iProjAngles, dims.iProjV);
+ ASTRA_ERROR("Failed to allocate %dx%dx%d GPU buffer", dims.iProjU, dims.iProjAngles, dims.iProjV);
projData.ptr = 0;
// TODO: return 0 somehow?
}
@@ -303,7 +305,7 @@ cudaArray* allocateVolumeArray(const SDimensions3D& dims)
cudaError err = cudaMalloc3DArray(&cuArray, &channelDesc, extentA);
if (err != cudaSuccess) {
astraCUDA::reportCudaError(err);
- fprintf(stderr, "Failed to allocate %dx%dx%d GPU array\n", dims.iVolX, dims.iVolY, dims.iVolZ);
+ ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iVolX, dims.iVolY, dims.iVolZ);
return 0;
}
@@ -321,7 +323,7 @@ cudaArray* allocateProjectionArray(const SDimensions3D& dims)
if (err != cudaSuccess) {
astraCUDA::reportCudaError(err);
- fprintf(stderr, "Failed to allocate %dx%dx%d GPU array\n", dims.iProjU, dims.iProjAngles, dims.iProjV);
+ ASTRA_ERROR("Failed to allocate %dx%dx%d GPU array", dims.iProjU, dims.iProjAngles, dims.iProjV);
return 0;
}
@@ -397,7 +399,7 @@ bool cudaTextForceKernelsCompletion()
cudaError_t returnedCudaError = cudaThreadSynchronize();
if(returnedCudaError != cudaSuccess) {
- fprintf(stderr, "Failed to force completion of cuda kernels: %d: %s.\n", returnedCudaError, cudaGetErrorString(returnedCudaError));
+ ASTRA_ERROR("Failed to force completion of cuda kernels: %d: %s.", returnedCudaError, cudaGetErrorString(returnedCudaError));
return false;
}