summaryrefslogtreecommitdiffstats
path: root/cuda/2d
diff options
context:
space:
mode:
authorWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-07-28 17:05:24 +0200
committerWillem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>2016-07-28 17:05:24 +0200
commitb2611a03577c285ddf48edab0d05dafa09ab362c (patch)
treec1d2f1b5166ba23f55e68e8faf0832f7c540f787 /cuda/2d
parent1ff4a270a7df1edb54dd91fe653d6a936b959b3a (diff)
parent53249b3ad63f0d08b9862a75602acf263d230d77 (diff)
downloadastra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.gz
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.bz2
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.tar.xz
astra-b2611a03577c285ddf48edab0d05dafa09ab362c.zip
Merge branch 'master' into parvec
Diffstat (limited to 'cuda/2d')
-rw-r--r--cuda/2d/algo.cu7
-rw-r--r--cuda/2d/algo.h3
-rw-r--r--cuda/2d/astra.cu12
-rw-r--r--cuda/2d/cgls.cu4
-rw-r--r--cuda/2d/em.cu4
-rw-r--r--cuda/2d/fan_bp.cu47
-rw-r--r--cuda/2d/fan_bp.h9
-rw-r--r--cuda/2d/fft.cu46
-rw-r--r--cuda/2d/par_bp.cu31
-rw-r--r--cuda/2d/par_bp.h4
-rw-r--r--cuda/2d/sart.cu13
-rw-r--r--cuda/2d/sart.h6
-rw-r--r--cuda/2d/sirt.cu14
-rw-r--r--cuda/2d/sirt.h4
14 files changed, 134 insertions, 70 deletions
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index 144fabd..dc74e51 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -336,16 +336,17 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
}
bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch)
+ float* D_projData, unsigned int projPitch,
+ float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, angles, TOffsets);
+ dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
- dims, fanProjs);
+ dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index a75905e..99959c8 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -118,7 +118,8 @@ protected:
float* D_projData, unsigned int projPitch,
float outputScale);
bool callBP(float* D_volumeData, unsigned int volumePitch,
- float* D_projData, unsigned int projPitch);
+ float* D_projData, unsigned int projPitch,
+ float outputScale);
SDimensions dims;
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index 379c690..c1e6566 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -368,21 +368,19 @@ bool AstraFBP::run()
}
+ float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
+
if (pData->bFanBeam) {
- ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections);
+ ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
} else {
- ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets);
+ ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
}
if(!ok)
{
return false;
}
- processVol<opMul>(pData->D_volumeData,
- (M_PI / 2.0f) / (float)pData->dims.iProjAngles,
- pData->volumePitch, pData->dims);
-
return true;
}
@@ -594,7 +592,7 @@ bool BPalgo::iterate(unsigned int)
{
// TODO: This zeroVolume makes an earlier memcpy of D_volumeData redundant
zeroVolumeData(D_volumeData, volumePitch, dims);
- callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch);
+ callBP(D_volumeData, volumePitch, D_sinoData, sinoPitch, 1.0f);
return true;
}
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index 9ead563..f402914 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -135,7 +135,7 @@ bool CGLS::iterate(unsigned int iterations)
// p = A'*r
zeroVolumeData(D_p, pPitch, dims);
- callBP(D_p, pPitch, D_r, rPitch);
+ callBP(D_p, pPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_p, D_maskData, pPitch, dims);
@@ -166,7 +166,7 @@ bool CGLS::iterate(unsigned int iterations)
// z = A'*r
zeroVolumeData(D_z, zPitch, dims);
- callBP(D_z, zPitch, D_r, rPitch);
+ callBP(D_z, zPitch, D_r, rPitch, 1.0f);
if (useVolumeMask)
processVol<opMul>(D_z, D_maskData, zPitch, dims);
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 00127c0..8593b08 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -102,7 +102,7 @@ bool EM::precomputeWeights()
#endif
{
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -137,7 +137,7 @@ bool EM::iterate(unsigned int iterations)
// Do BP of projData into tmpData
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
// Multiply volumeData with tmpData divided by pixel weights
processVol<opMul2>(D_volumeData, D_tmpData, D_pixelWeight, pixelPitch, dims);
diff --git a/cuda/2d/fan_bp.cu b/cuda/2d/fan_bp.cu
index 74e8b12..b4321ba 100644
--- a/cuda/2d/fan_bp.cu
+++ b/cuda/2d/fan_bp.cu
@@ -77,7 +77,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -121,11 +121,11 @@ __global__ void devFanBP(float* D_volData, unsigned int volPitch, unsigned int s
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -146,6 +146,8 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
float* volData = (float*)D_volData;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
float fVal = 0.0f;
float fA = startAngle + 0.5f;
@@ -180,14 +182,14 @@ __global__ void devFanBP_SS(float* D_volData, unsigned int volPitch, unsigned in
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// BP specifically for SART.
// It includes (free) weighting with voxel weight.
// It assumes the proj texture is set up _without_ padding, unlike regular BP.
-__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims)
+__global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -222,12 +224,12 @@ __global__ void devFanBP_SART(float* D_volData, unsigned int volPitch, const SDi
const float fT = fNum / fDen;
const float fVal = tex2D(gT_FanProjTexture, fT, 0.5f);
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// Weighted BP for use in fan beam FBP
// Each pixel/ray is weighted by 1/L^2 where L is the distance to the source.
-__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims)
+__global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -273,13 +275,14 @@ __global__ void devFanBP_FBPWeighted(float* D_volData, unsigned int volPitch, un
fA += 1.0f;
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -310,9 +313,9 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
else
- devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -325,7 +328,8 @@ bool FanBP_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
assert(dims.iProjAngles <= g_MaxAngles);
@@ -355,7 +359,7 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
cudaStreamCreate(&stream);
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
- devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims);
+ devFanBP_FBPWeighted<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -370,7 +374,8 @@ bool FanBP_FBPWeighted_internal(float* D_volumeData, unsigned int volumePitch,
bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
// only one angle
bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
@@ -391,7 +396,7 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims);
+ devFanBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -401,7 +406,8 @@ bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -413,7 +419,7 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
}
@@ -422,7 +428,8 @@ bool FanBP(float* D_volumeData, unsigned int volumePitch,
bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles)
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -434,7 +441,7 @@ bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
bool ret;
ret = FanBP_FBPWeighted_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
- subdims, angles + iAngle);
+ subdims, angles + iAngle, fOutputScale);
if (!ret)
return false;
@@ -498,7 +505,7 @@ int main()
copyVolumeToDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
copySinogramToDevice(sino, dims.iProjDets, dims.iProjDets, dims.iProjAngles, D_projData, projPitch);
- FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs);
+ FanBP(D_volumeData, volumePitch, D_projData, projPitch, dims, projs, 1.0f);
copyVolumeFromDevice(img, dims.iVolWidth, dims.iVolWidth, dims.iVolHeight, D_volumeData, volumePitch);
diff --git a/cuda/2d/fan_bp.h b/cuda/2d/fan_bp.h
index e4e69b0..3ebe1e8 100644
--- a/cuda/2d/fan_bp.h
+++ b/cuda/2d/fan_bp.h
@@ -33,16 +33,19 @@ namespace astraCUDA {
_AstraExport bool FanBP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
_AstraExport bool FanBP_FBPWeighted(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const SFanProjection* angles);
+ const SDimensions& dims, const SFanProjection* angles,
+ float fOutputScale);
}
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2bfd493..2d259a9 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -35,6 +35,7 @@ $Id$
#include <fstream>
#include "../../include/astra/Logging.h"
+#include "astra/Fourier.h"
using namespace astra;
@@ -303,16 +304,48 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
float * pfFilt = new float[_iFFTFourierDetectorCount];
float * pfW = new float[_iFFTFourierDetectorCount];
+ // We cache one Fourier transform for repeated FBP's of the same size
+ static float *pfData = 0;
+ static int iFilterCacheSize = 0;
+
+ if (!pfData || iFilterCacheSize != _iFFTRealDetectorCount) {
+ // Compute filter in spatial domain
+
+ delete[] pfData;
+ pfData = new float[2*_iFFTRealDetectorCount];
+ int *ip = new int[int(2+sqrt(_iFFTRealDetectorCount)+1)];
+ ip[0] = 0;
+ float32 *w = new float32[_iFFTRealDetectorCount/2];
+
+ for (int i = 0; i < _iFFTRealDetectorCount; ++i) {
+ pfData[2*i+1] = 0.0f;
+
+ if (i & 1) {
+ int j = i;
+ if (2*j > _iFFTRealDetectorCount)
+ j = _iFFTRealDetectorCount - j;
+ float f = M_PI * j;
+ pfData[2*i] = -1 / (f*f);
+ } else {
+ pfData[2*i] = 0.0f;
+ }
+ }
+
+ pfData[0] = 0.25f;
+
+ cdft(2*_iFFTRealDetectorCount, -1, pfData, ip, w);
+ delete[] ip;
+ delete[] w;
+
+ iFilterCacheSize = _iFFTRealDetectorCount;
+ }
+
for(int iDetectorIndex = 0; iDetectorIndex < _iFFTFourierDetectorCount; iDetectorIndex++)
{
float fRelIndex = (float)iDetectorIndex / (float)_iFFTRealDetectorCount;
- // filt = 2*( 0:(order/2) )./order;
- pfFilt[iDetectorIndex] = 2.0f * fRelIndex;
- //pfFilt[iDetectorIndex] = 1.0f;
-
- // w = 2*pi*(0:size(filt,2)-1)/order
- pfW[iDetectorIndex] = 3.1415f * 2.0f * fRelIndex;
+ pfFilt[iDetectorIndex] = 2.0f * pfData[2*iDetectorIndex];
+ pfW[iDetectorIndex] = M_PI * 2.0f * fRelIndex;
}
switch(_eFilter)
@@ -866,5 +899,4 @@ void downloadDebugFilterReal(float * _pfHostSinogram, int _iProjectionCount,
free(pfHostFilter);
}
-
#endif
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index 635200f..d9f7325 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
return true;
}
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -123,11 +123,11 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
}
- volData[Y*volPitch+X] += fVal;
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
// supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -152,6 +152,8 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
float fA = startAngle + 0.5f;
const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
+ fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+
if (offsets) {
for (int angle = startAngle; angle < endAngle; ++angle)
@@ -196,10 +198,10 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
}
- volData[Y*volPitch+X] += fVal / (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
+ volData[Y*volPitch+X] += fVal * fOutputScale;
}
-__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims)
+__global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset, float angle_sin, float angle_cos, const SDimensions dims, float fOutputScale)
{
const int relX = threadIdx.x;
const int relY = threadIdx.y;
@@ -218,13 +220,13 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
const float fVal = tex2D(gT_projTexture, fT, 0.5f);
- D_volData[Y*volPitch+X] += fVal;
+ D_volData[Y*volPitch+X] += fVal * fOutputScale;
}
bool BP_internal(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
// TODO: process angles block by block
assert(dims.iProjAngles <= g_MaxAngles);
@@ -261,9 +263,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
if (dims.iRaysPerPixelDim > 1)
- devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
else
- devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims);
+ devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
}
cudaThreadSynchronize();
@@ -276,7 +278,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- const SDimensions& dims, const float* angles, const float* TOffsets)
+ const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
{
for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
SDimensions subdims = dims;
@@ -289,7 +291,8 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
ret = BP_internal(D_volumeData, volumePitch,
D_projData + iAngle * projPitch, projPitch,
subdims, angles + iAngle,
- TOffsets ? TOffsets + iAngle : 0);
+ TOffsets ? TOffsets + iAngle : 0,
+ fOutputScale);
if (!ret)
return false;
}
@@ -300,7 +303,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets)
+ const float* angles, const float* TOffsets, float fOutputScale)
{
// Only one angle.
// We need to Clamp to the border pixels instead of to zero, because
@@ -318,7 +321,7 @@ bool BP_SART(float* D_volumeData, unsigned int volumePitch,
dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
(dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
- devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims);
+ devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
cudaThreadSynchronize();
cudaTextForceKernelsCompletion();
@@ -369,7 +372,7 @@ int main()
for (unsigned int i = 0; i < dims.iProjAngles; ++i)
angle[i] = i*(M_PI/dims.iProjAngles);
- BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0);
+ BP(D_volumeData, volumePitch, D_projData, projPitch, dims, angle, 0, 1.0f);
delete[] angle;
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index eaeafd8..64bcd34 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -36,12 +36,12 @@ namespace astraCUDA {
_AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
const SDimensions& dims, const float* angles,
- const float* TOffsets);
+ const float* TOffsets, float fOutputScale);
_AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
unsigned int angle, const SDimensions& dims,
- const float* angles, const float* TOffsets);
+ const float* angles, const float* TOffsets, float fOutputScale);
}
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index 29670c3..c8608a3 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -71,6 +71,8 @@ SART::SART() : ReconAlgo()
projectionCount = 0;
iteration = 0;
customOrder = false;
+
+ fRelaxation = 1.0f;
}
@@ -98,6 +100,7 @@ void SART::reset()
projectionCount = 0;
iteration = 0;
customOrder = false;
+ fRelaxation = 1.0f;
ReconAlgo::reset();
}
@@ -200,10 +203,10 @@ bool SART::iterate(unsigned int iterations)
// BP, mask, and add back
// TODO: Try putting the masking directly in the BP
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle);
+ callBP_SART(D_tmpData, tmpPitch, D_projData, projPitch, angle, fRelaxation);
processVol<opAddMul>(D_volumeData, D_maskData, D_tmpData, volumePitch, dims);
} else {
- callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle);
+ callBP_SART(D_volumeData, volumePitch, D_projData, projPitch, angle, fRelaxation);
}
if (useMinConstraint)
@@ -264,16 +267,16 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle)
+ unsigned int angle, float outputScale)
{
if (angles) {
assert(!fanProjs);
return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, angles, TOffsets);
+ angle, dims, angles, TOffsets, outputScale);
} else {
assert(fanProjs);
return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
- angle, dims, fanProjs);
+ angle, dims, fanProjs, outputScale);
}
}
diff --git a/cuda/2d/sart.h b/cuda/2d/sart.h
index 6574a6f..eff9ecf 100644
--- a/cuda/2d/sart.h
+++ b/cuda/2d/sart.h
@@ -50,6 +50,8 @@ public:
virtual float computeDiffNorm();
+ void setRelaxation(float r) { fRelaxation = r; }
+
protected:
void reset();
bool precomputeWeights();
@@ -59,7 +61,7 @@ protected:
unsigned int angle, float outputScale);
bool callBP_SART(float* D_volumeData, unsigned int volumePitch,
float* D_projData, unsigned int projPitch,
- unsigned int angle);
+ unsigned int angle, float outputScale);
// projection angle variables
@@ -78,6 +80,8 @@ protected:
// Geometry-specific precomputed data
float* D_lineWeight;
unsigned int linePitch;
+
+ float fRelaxation;
};
}
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index a6194a5..4baaccb 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -50,6 +50,8 @@ SIRT::SIRT() : ReconAlgo()
D_minMaskData = 0;
D_maxMaskData = 0;
+ fRelaxation = 1.0f;
+
freeMinMaxMasks = false;
}
@@ -83,6 +85,8 @@ void SIRT::reset()
useVolumeMask = false;
useSinogramMask = false;
+ fRelaxation = 1.0f;
+
ReconAlgo::reset();
}
@@ -127,10 +131,10 @@ bool SIRT::precomputeWeights()
zeroVolumeData(D_pixelWeight, pixelPitch, dims);
if (useSinogramMask) {
- callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch);
+ callBP(D_pixelWeight, pixelPitch, D_smaskData, smaskPitch, 1.0f);
} else {
processSino<opSet>(D_projData, 1.0f, projPitch, dims);
- callBP(D_pixelWeight, pixelPitch, D_projData, projPitch);
+ callBP(D_pixelWeight, pixelPitch, D_projData, projPitch, 1.0f);
}
processVol<opInvert>(D_pixelWeight, pixelPitch, dims);
@@ -139,6 +143,9 @@ bool SIRT::precomputeWeights()
processVol<opMul>(D_pixelWeight, D_maskData, pixelPitch, dims);
}
+ // Also fold the relaxation factor into pixel weights
+ processVol<opMul>(D_pixelWeight, fRelaxation, pixelPitch, dims);
+
return true;
}
@@ -251,8 +258,9 @@ bool SIRT::iterate(unsigned int iterations)
zeroVolumeData(D_tmpData, tmpPitch, dims);
- callBP(D_tmpData, tmpPitch, D_projData, projPitch);
+ callBP(D_tmpData, tmpPitch, D_projData, projPitch, 1.0f);
+ // pixel weights also contain the volume mask and relaxation factor
processVol<opAddMul>(D_volumeData, D_pixelWeight, D_tmpData, volumePitch, dims);
if (useMinConstraint)
diff --git a/cuda/2d/sirt.h b/cuda/2d/sirt.h
index 21094a1..bc913f4 100644
--- a/cuda/2d/sirt.h
+++ b/cuda/2d/sirt.h
@@ -53,6 +53,8 @@ public:
bool uploadMinMaxMasks(const float* minMaskData, const float* maxMaskData,
unsigned int pitch);
+ void setRelaxation(float r) { fRelaxation = r; }
+
virtual bool iterate(unsigned int iterations);
virtual float computeDiffNorm();
@@ -81,6 +83,8 @@ protected:
unsigned int minMaskPitch;
float* D_maxMaskData;
unsigned int maxMaskPitch;
+
+ float fRelaxation;
};
bool doSIRT(float* D_volumeData, unsigned int volumePitch,