From b1ffc11d930c19bd73af9837a08bc8dde9fe8e72 Mon Sep 17 00:00:00 2001
From: Willem Jan Palenstijn <Willem.Jan.Palenstijn@cwi.nl>
Date: Fri, 29 Jul 2016 12:03:38 +0200
Subject: Add CUDA parvec support

---
 astra_vc09.vcproj                                  |  52 ++
 astra_vc11.vcxproj                                 |  10 +-
 astra_vc11.vcxproj.filters                         |  24 +-
 build/linux/Makefile.in                            |   2 +
 build/msvc/gen.py                                  |   5 +
 cuda/2d/algo.cu                                    |  62 +-
 cuda/2d/algo.h                                     |  22 +-
 cuda/2d/astra.cu                                   | 726 ++++-----------------
 cuda/2d/astra.h                                    | 156 +----
 cuda/2d/cgls.cu                                    |  36 -
 cuda/2d/dims.h                                     |   5 +-
 cuda/2d/em.cu                                      |  28 -
 cuda/2d/fbp.cu                                     | 347 ++++++++++
 cuda/2d/fbp.h                                      |  98 +++
 cuda/2d/fbp_filters.h                              |   4 +
 cuda/2d/fft.cu                                     |  12 +-
 cuda/2d/fft.h                                      |   8 +-
 cuda/2d/par_bp.cu                                  | 166 ++---
 cuda/2d/par_bp.h                                   |   6 +-
 cuda/2d/par_fp.cu                                  | 360 ++--------
 cuda/2d/par_fp.h                                   |   4 +-
 cuda/2d/sart.cu                                    |   8 +-
 cuda/2d/sirt.cu                                    |  46 +-
 cuda/3d/fdk.cu                                     |  22 +-
 .../astra/CudaFilteredBackProjectionAlgorithm.h    |  30 +-
 include/astra/GeometryUtil2D.h                     |  73 ++-
 include/astra/ParallelProjectionGeometry2D.h       |   6 +-
 include/astra/ProjectionGeometry2D.h               |   6 +-
 samples/matlab/s014_FBP.m                          |   2 +-
 src/CudaFilteredBackProjectionAlgorithm.cpp        | 217 +-----
 src/CudaForwardProjectionAlgorithm.cpp             |  66 +-
 src/CudaReconstructionAlgorithm2D.cpp              |  63 +-
 src/FanFlatProjectionGeometry2D.cpp                |  18 +-
 src/GeometryUtil2D.cpp                             | 161 +++++
 src/ParallelProjectionGeometry2D.cpp               |  35 +-
 src/ProjectionGeometry2D.cpp                       |   8 +-
 36 files changed, 1179 insertions(+), 1715 deletions(-)
 create mode 100644 cuda/2d/fbp.cu
 create mode 100644 cuda/2d/fbp.h
 create mode 100644 src/GeometryUtil2D.cpp

diff --git a/astra_vc09.vcproj b/astra_vc09.vcproj
index f2cf62a..428bf8d 100644
--- a/astra_vc09.vcproj
+++ b/astra_vc09.vcproj
@@ -1076,6 +1076,10 @@
 					RelativePath=".\include\astra\ParallelProjectionGeometry3D.h"
 					>
 				</File>
+				<File
+					RelativePath=".\include\astra\ParallelVecProjectionGeometry2D.h"
+					>
+				</File>
 				<File
 					RelativePath=".\include\astra\ParallelVecProjectionGeometry3D.h"
 					>
@@ -1120,6 +1124,10 @@
 					RelativePath=".\src\FanFlatVecProjectionGeometry2D.cpp"
 					>
 				</File>
+				<File
+					RelativePath=".\src\GeometryUtil2D.cpp"
+					>
+				</File>
 				<File
 					RelativePath=".\src\GeometryUtil3D.cpp"
 					>
@@ -1132,6 +1140,10 @@
 					RelativePath=".\src\ParallelProjectionGeometry3D.cpp"
 					>
 				</File>
+				<File
+					RelativePath=".\src\ParallelVecProjectionGeometry2D.cpp"
+					>
+				</File>
 				<File
 					RelativePath=".\src\ParallelVecProjectionGeometry3D.cpp"
 					>
@@ -2184,6 +2196,10 @@
 					RelativePath=".\cuda\2d\fbp_filters.h"
 					>
 				</File>
+				<File
+					RelativePath=".\cuda\2d\fbp.h"
+					>
+				</File>
 				<File
 					RelativePath=".\cuda\2d\fft.h"
 					>
@@ -2556,6 +2572,42 @@
 						/>
 					</FileConfiguration>
 				</File>
+				<File
+					RelativePath=".\cuda\2d\fbp.cu"
+					>
+					<FileConfiguration
+						Name="Debug|Win32"
+						ExcludedFromBuild="true"
+						>
+						<Tool
+							Name="Cudart Build Rule"
+						/>
+					</FileConfiguration>
+					<FileConfiguration
+						Name="Debug|x64"
+						ExcludedFromBuild="true"
+						>
+						<Tool
+							Name="Cudart Build Rule"
+						/>
+					</FileConfiguration>
+					<FileConfiguration
+						Name="Release|Win32"
+						ExcludedFromBuild="true"
+						>
+						<Tool
+							Name="Cudart Build Rule"
+						/>
+					</FileConfiguration>
+					<FileConfiguration
+						Name="Release|x64"
+						ExcludedFromBuild="true"
+						>
+						<Tool
+							Name="Cudart Build Rule"
+						/>
+					</FileConfiguration>
+				</File>
 				<File
 					RelativePath=".\cuda\2d\fft.cu"
 					>
diff --git a/astra_vc11.vcxproj b/astra_vc11.vcxproj
index cd73076..d48796b 100644
--- a/astra_vc11.vcxproj
+++ b/astra_vc11.vcxproj
@@ -529,6 +529,7 @@
     <ClCompile Include="src\Float32VolumeData3DMemory.cpp" />
     <ClCompile Include="src\ForwardProjectionAlgorithm.cpp" />
     <ClCompile Include="src\Fourier.cpp" />
+    <ClCompile Include="src\GeometryUtil2D.cpp" />
     <ClCompile Include="src\GeometryUtil3D.cpp" />
     <ClCompile Include="src\Globals.cpp" />
     <ClCompile Include="src\Logging.cpp" />
@@ -569,6 +570,7 @@
     <ClInclude Include="cuda\2d\em.h" />
     <ClInclude Include="cuda\2d\fan_bp.h" />
     <ClInclude Include="cuda\2d\fan_fp.h" />
+    <ClInclude Include="cuda\2d\fbp.h" />
     <ClInclude Include="cuda\2d\fbp_filters.h" />
     <ClInclude Include="cuda\2d\fft.h" />
     <ClInclude Include="cuda\2d\par_bp.h" />
@@ -652,8 +654,8 @@
     <ClInclude Include="include\astra\ParallelBeamStripKernelProjector2D.h" />
     <ClInclude Include="include\astra\ParallelProjectionGeometry2D.h" />
     <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h" />
-    <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" />
     <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h" />
+    <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h" />
     <ClInclude Include="include\astra\PlatformDepSystemCode.h" />
     <ClInclude Include="include\astra\PluginAlgorithm.h" />
     <ClInclude Include="include\astra\ProjectionGeometry2D.h" />
@@ -727,6 +729,12 @@
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
     </CudaCompile>
+    <CudaCompile Include="cuda\2d\fbp.cu">
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">true</ExcludedFromBuild>
+      <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
+    </CudaCompile>
     <CudaCompile Include="cuda\2d\fft.cu">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
diff --git a/astra_vc11.vcxproj.filters b/astra_vc11.vcxproj.filters
index 4143cd9..d6748f1 100644
--- a/astra_vc11.vcxproj.filters
+++ b/astra_vc11.vcxproj.filters
@@ -25,6 +25,9 @@
     <CudaCompile Include="cuda\2d\fan_fp.cu">
       <Filter>CUDA\cuda source</Filter>
     </CudaCompile>
+    <CudaCompile Include="cuda\2d\fbp.cu">
+      <Filter>CUDA\cuda source</Filter>
+    </CudaCompile>
     <CudaCompile Include="cuda\2d\fft.cu">
       <Filter>CUDA\cuda source</Filter>
     </CudaCompile>
@@ -198,6 +201,9 @@
     <ClCompile Include="src\FanFlatVecProjectionGeometry2D.cpp">
       <Filter>Geometries\source</Filter>
     </ClCompile>
+    <ClCompile Include="src\GeometryUtil2D.cpp">
+      <Filter>Geometries\source</Filter>
+    </ClCompile>
     <ClCompile Include="src\GeometryUtil3D.cpp">
       <Filter>Geometries\source</Filter>
     </ClCompile>
@@ -207,6 +213,9 @@
     <ClCompile Include="src\ParallelProjectionGeometry3D.cpp">
       <Filter>Geometries\source</Filter>
     </ClCompile>
+    <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp">
+      <Filter>Geometries\source</Filter>
+    </ClCompile>
     <ClCompile Include="src\ParallelVecProjectionGeometry3D.cpp">
       <Filter>Geometries\source</Filter>
     </ClCompile>
@@ -321,10 +330,6 @@
     <ClCompile Include="src\CudaSirtAlgorithm3D.cpp">
       <Filter>CUDA\astra source</Filter>
     </ClCompile>
-    <ClCompile Include="src\GeometryUtil3D.cpp" />
-    <ClCompile Include="src\ParallelVecProjectionGeometry2D.cpp">
-      <Filter>Geometries\source</Filter>
-    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <ClInclude Include="include\astra\Algorithm.h">
@@ -474,6 +479,9 @@
     <ClInclude Include="include\astra\ParallelProjectionGeometry3D.h">
       <Filter>Geometries\headers</Filter>
     </ClInclude>
+    <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h">
+      <Filter>Geometries\headers</Filter>
+    </ClInclude>
     <ClInclude Include="include\astra\ParallelVecProjectionGeometry3D.h">
       <Filter>Geometries\headers</Filter>
     </ClInclude>
@@ -615,6 +623,9 @@
     <ClInclude Include="cuda\2d\fbp_filters.h">
       <Filter>CUDA\cuda headers</Filter>
     </ClInclude>
+    <ClInclude Include="cuda\2d\fbp.h">
+      <Filter>CUDA\cuda headers</Filter>
+    </ClInclude>
     <ClInclude Include="cuda\2d\fft.h">
       <Filter>CUDA\cuda headers</Filter>
     </ClInclude>
@@ -675,11 +686,6 @@
     <ClInclude Include="cuda\3d\util3d.h">
       <Filter>CUDA\cuda headers</Filter>
     </ClInclude>
-    <ClInclude Include="include\astra\GeometryUtil2D.h" />
-    <ClInclude Include="include\astra\GeometryUtil3D.h" />
-    <ClInclude Include="include\astra\ParallelVecProjectionGeometry2D.h">
-      <Filter>Geometries\headers</Filter>
-    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <None Include="include\astra\DataProjectorPolicies.inl">
diff --git a/build/linux/Makefile.in b/build/linux/Makefile.in
index c72e20f..2af705f 100644
--- a/build/linux/Makefile.in
+++ b/build/linux/Makefile.in
@@ -139,6 +139,7 @@ BASE_OBJECTS=\
 	src/Float32VolumeData3DMemory.lo \
 	src/ForwardProjectionAlgorithm.lo \
 	src/Fourier.lo \
+	src/GeometryUtil2D.lo \
 	src/GeometryUtil3D.lo \
 	src/Globals.lo \
 	src/Logging.lo \
@@ -197,6 +198,7 @@ CUDA_OBJECTS=\
 	cuda/2d/par_bp.lo \
 	cuda/2d/fan_fp.lo \
 	cuda/2d/fan_bp.lo \
+	cuda/2d/fbp.lo \
 	cuda/2d/sirt.lo \
 	cuda/2d/sart.lo \
 	cuda/2d/cgls.lo \
diff --git a/build/msvc/gen.py b/build/msvc/gen.py
index cc69a62..57f4539 100644
--- a/build/msvc/gen.py
+++ b/build/msvc/gen.py
@@ -156,6 +156,7 @@ P_astra["filters"]["CUDA\\cuda source"] = [
 "cuda\\2d\\em.cu",
 "cuda\\2d\\fan_bp.cu",
 "cuda\\2d\\fan_fp.cu",
+"cuda\\2d\\fbp.cu",
 "cuda\\2d\\fft.cu",
 "cuda\\2d\\par_bp.cu",
 "cuda\\2d\\par_fp.cu",
@@ -225,9 +226,11 @@ P_astra["filters"]["Geometries\\source"] = [
 "src\\ConeVecProjectionGeometry3D.cpp",
 "src\\FanFlatProjectionGeometry2D.cpp",
 "src\\FanFlatVecProjectionGeometry2D.cpp",
+"src\\GeometryUtil2D.cpp",
 "src\\GeometryUtil3D.cpp",
 "src\\ParallelProjectionGeometry2D.cpp",
 "src\\ParallelProjectionGeometry3D.cpp",
+"src\\ParallelVecProjectionGeometry2D.cpp",
 "src\\ParallelVecProjectionGeometry3D.cpp",
 "src\\ProjectionGeometry2D.cpp",
 "src\\ProjectionGeometry3D.cpp",
@@ -285,6 +288,7 @@ P_astra["filters"]["CUDA\\cuda headers"] = [
 "cuda\\2d\\fan_bp.h",
 "cuda\\2d\\fan_fp.h",
 "cuda\\2d\\fbp_filters.h",
+"cuda\\2d\\fbp.h",
 "cuda\\2d\\fft.h",
 "cuda\\2d\\par_bp.h",
 "cuda\\2d\\par_fp.h",
@@ -366,6 +370,7 @@ P_astra["filters"]["Geometries\\headers"] = [
 "include\\astra\\GeometryUtil3D.h",
 "include\\astra\\ParallelProjectionGeometry2D.h",
 "include\\astra\\ParallelProjectionGeometry3D.h",
+"include\\astra\\ParallelVecProjectionGeometry2D.h",
 "include\\astra\\ParallelVecProjectionGeometry3D.h",
 "include\\astra\\ProjectionGeometry2D.h",
 "include\\astra\\ProjectionGeometry3D.h",
diff --git a/cuda/2d/algo.cu b/cuda/2d/algo.cu
index dc74e51..bde4f42 100644
--- a/cuda/2d/algo.cu
+++ b/cuda/2d/algo.cu
@@ -35,13 +35,13 @@ $Id$
 #include "fan_bp.h"
 #include "util.h"
 #include "arith.h"
+#include "astra.h"
 
 namespace astraCUDA {
 
 ReconAlgo::ReconAlgo()
 {
-	angles = 0;
-	TOffsets = 0;
+	parProjs = 0;
 	fanProjs = 0;
 	shouldAbort = false;
 
@@ -66,8 +66,7 @@ ReconAlgo::~ReconAlgo()
 
 void ReconAlgo::reset()
 {
-	delete[] angles;
-	delete[] TOffsets;
+	delete[] parProjs;
 	delete[] fanProjs;
 
 	if (freeGPUMemory) {
@@ -77,8 +76,7 @@ void ReconAlgo::reset()
 		cudaFree(D_volumeData);
 	}
 
-	angles = 0;
-	TOffsets = 0;
+	parProjs = 0;
 	fanProjs = 0;
 	shouldAbort = false;
 
@@ -124,46 +122,40 @@ bool ReconAlgo::enableSinogramMask()
 }
 
 
-bool ReconAlgo::setGeometry(const SDimensions& _dims, const float* _angles)
+bool ReconAlgo::setGeometry(const astra::CVolumeGeometry2D* pVolGeom,
+                            const astra::CProjectionGeometry2D* pProjGeom)
 {
-	dims = _dims;
+	bool ok;
 
-	angles = new float[dims.iProjAngles];
+	ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
 
-	memcpy(angles, _angles, sizeof(angles[0]) * dims.iProjAngles);
+	if (!ok)
+		return false;
 
+	delete[] parProjs;
+	parProjs = 0;
 	delete[] fanProjs;
 	fanProjs = 0;
 
-	return true;
-}
-
-bool ReconAlgo::setFanGeometry(const SDimensions& _dims,
-                               const SFanProjection* _projs)
-{
-	dims = _dims;
-	fanProjs = new SFanProjection[dims.iProjAngles];
-
-	memcpy(fanProjs, _projs, sizeof(fanProjs[0]) * dims.iProjAngles);
-
-	delete[] angles;
-	angles = 0;
+	fOutputScale = 1.0f;
+	ok = convertAstraGeometry(pVolGeom, pProjGeom, parProjs, fanProjs, fOutputScale);
+	if (!ok)
+		return false;
 
 	return true;
 }
 
-
-bool ReconAlgo::setTOffsets(const float* _TOffsets)
+bool ReconAlgo::setSuperSampling(int raysPerDet, int raysPerPixelDim)
 {
-	// TODO: determine if they're all zero?
-	TOffsets = new float[dims.iProjAngles];
-	memcpy(TOffsets, _TOffsets, sizeof(angles[0]) * dims.iProjAngles);
+	if (raysPerDet <= 0 || raysPerPixelDim <= 0)
+		return false;
+
+	dims.iRaysPerDet = raysPerDet;
+	dims.iRaysPerPixelDim = raysPerPixelDim;
 
 	return true;
 }
 
-
-
 bool ReconAlgo::setVolumeMask(float* _D_maskData, unsigned int _maskPitch)
 {
 	assert(useVolumeMask);
@@ -324,14 +316,14 @@ bool ReconAlgo::callFP(float* D_volumeData, unsigned int volumePitch,
                        float* D_projData, unsigned int projPitch,
                        float outputScale)
 {
-	if (angles) {
+	if (parProjs) {
 		assert(!fanProjs);
 		return FP(D_volumeData, volumePitch, D_projData, projPitch,
-		          dims, angles, TOffsets, outputScale);
+		          dims, parProjs, fOutputScale * outputScale);
 	} else {
 		assert(fanProjs);
 		return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
-		             dims, fanProjs, outputScale);
+		             dims, fanProjs, fOutputScale * outputScale);
 	}
 }
 
@@ -339,10 +331,10 @@ bool ReconAlgo::callBP(float* D_volumeData, unsigned int volumePitch,
                        float* D_projData, unsigned int projPitch,
                        float outputScale)
 {
-	if (angles) {
+	if (parProjs) {
 		assert(!fanProjs);
 		return BP(D_volumeData, volumePitch, D_projData, projPitch,
-		          dims, angles, TOffsets, outputScale);
+		          dims, parProjs, outputScale);
 	} else {
 		assert(fanProjs);
 		return FanBP(D_volumeData, volumePitch, D_projData, projPitch,
diff --git a/cuda/2d/algo.h b/cuda/2d/algo.h
index 99959c8..0b6a066 100644
--- a/cuda/2d/algo.h
+++ b/cuda/2d/algo.h
@@ -31,6 +31,17 @@ $Id$
 
 #include "util.h"
 
+namespace astra {
+
+class CParallelProjectionGeometry2D;
+class CParallelVecProjectionGeometry2D;
+class CFanFlatProjectionGeometry2D;
+class CFanFlatVecProjectionGeometry2D;
+class CVolumeGeometry2D;
+class CProjectionGeometry2D;
+
+}
+
 namespace astraCUDA {
 
 class _AstraExport ReconAlgo {
@@ -40,11 +51,10 @@ public:
 
 	bool setGPUIndex(int iGPUIndex);
 
-	bool setGeometry(const SDimensions& dims, const float* angles);
-	bool setFanGeometry(const SDimensions& dims, const SFanProjection* projs);
+	bool setGeometry(const astra::CVolumeGeometry2D* pVolGeom,
+	                 const astra::CProjectionGeometry2D* pProjGeom);
 
-	// setTOffsets should (optionally) be called after setGeometry
-	bool setTOffsets(const float* TOffsets);
+	bool setSuperSampling(int raysPerDet, int raysPerPixelDim);
 
 	void signalAbort() { shouldAbort = true; }
 
@@ -123,9 +133,9 @@ protected:
 
 
 	SDimensions dims;
-	float* angles;
-	float* TOffsets;
+	SParProjection* parProjs;
 	SFanProjection* fanProjs;
+	float fOutputScale;
 
 	volatile bool shouldAbort;
 
diff --git a/cuda/2d/astra.cu b/cuda/2d/astra.cu
index c1e6566..b39e0a9 100644
--- a/cuda/2d/astra.cu
+++ b/cuda/2d/astra.cu
@@ -42,8 +42,10 @@ $Id$
 #include <fstream>
 #include <cuda.h>
 
+#include "../../include/astra/GeometryUtil2D.h"
 #include "../../include/astra/VolumeGeometry2D.h"
 #include "../../include/astra/ParallelProjectionGeometry2D.h"
+#include "../../include/astra/ParallelVecProjectionGeometry2D.h"
 #include "../../include/astra/FanFlatProjectionGeometry2D.h"
 #include "../../include/astra/FanFlatVecProjectionGeometry2D.h"
 
@@ -64,515 +66,6 @@ enum CUDAProjectionType {
 };
 
 
-class AstraFBP_internal {
-public:
-	SDimensions dims;
-	float* angles;
-	float* TOffsets;
-	astraCUDA::SFanProjection* fanProjections;
-
-	float fOriginSourceDistance;
-	float fOriginDetectorDistance;
-
-	float fPixelSize;
-
-	bool bFanBeam;
-	bool bShortScan;
-
-	bool initialized;
-	bool setStartReconstruction;
-
-	float* D_sinoData;
-	unsigned int sinoPitch;
-
-	float* D_volumeData;
-	unsigned int volumePitch;
-
-	cufftComplex * m_pDevFilter;
-};
-
-AstraFBP::AstraFBP()
-{
-	pData = new AstraFBP_internal();
-
-	pData->angles = 0;
-	pData->fanProjections = 0;
-	pData->TOffsets = 0;
-	pData->D_sinoData = 0;
-	pData->D_volumeData = 0;
-
-	pData->dims.iVolWidth = 0;
-	pData->dims.iProjAngles = 0;
-	pData->dims.fDetScale = 1.0f;
-	pData->dims.iRaysPerDet = 1;
-	pData->dims.iRaysPerPixelDim = 1;
-
-	pData->initialized = false;
-	pData->setStartReconstruction = false;
-
-	pData->m_pDevFilter = NULL;
-}
-
-AstraFBP::~AstraFBP()
-{
-	delete[] pData->angles;
-	pData->angles = 0;
-
-	delete[] pData->TOffsets;
-	pData->TOffsets = 0;
-
-	delete[] pData->fanProjections;
-	pData->fanProjections = 0;
-
-	cudaFree(pData->D_sinoData);
-	pData->D_sinoData = 0;
-
-	cudaFree(pData->D_volumeData);
-	pData->D_volumeData = 0;
-
-	if(pData->m_pDevFilter != NULL)
-	{
-		freeComplexOnDevice(pData->m_pDevFilter);
-		pData->m_pDevFilter = NULL;
-	}
-
-	delete pData;
-	pData = 0;
-}
-
-bool AstraFBP::setReconstructionGeometry(unsigned int iVolWidth,
-                                          unsigned int iVolHeight,
-                                          float fPixelSize)
-{
-	if (pData->initialized)
-		return false;
-
-	pData->dims.iVolWidth = iVolWidth;
-	pData->dims.iVolHeight = iVolHeight;
-
-	pData->fPixelSize = fPixelSize;
-
-	return (iVolWidth > 0 && iVolHeight > 0 && fPixelSize > 0.0f);
-}
-
-bool AstraFBP::setProjectionGeometry(unsigned int iProjAngles,
-                                      unsigned int iProjDets,
-                                      const float* pfAngles,
-                                      float fDetSize)
-{
-	if (pData->initialized)
-		return false;
-
-	pData->dims.iProjAngles = iProjAngles;
-	pData->dims.iProjDets = iProjDets;
-	pData->dims.fDetScale = fDetSize / pData->fPixelSize;
-
-	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0)
-		return false;
-
-	pData->angles = new float[iProjAngles];
-	memcpy(pData->angles, pfAngles, iProjAngles * sizeof(pfAngles[0]));
-
-	pData->bFanBeam = false;
-
-	return true;
-}
-
-bool AstraFBP::setFanGeometry(unsigned int iProjAngles,
-                              unsigned int iProjDets,
-                              const astraCUDA::SFanProjection *fanProjs,
-                              const float* pfAngles,
-                              float fOriginSourceDistance,
-                              float fOriginDetectorDistance,
-                              float fDetSize,
-                              bool bShortScan)
-{
-	// Slightly abusing setProjectionGeometry for this...
-	if (!setProjectionGeometry(iProjAngles, iProjDets, pfAngles, fDetSize))
-		return false;
-
-	pData->fOriginSourceDistance = fOriginSourceDistance;
-	pData->fOriginDetectorDistance = fOriginDetectorDistance;
-
-	pData->fanProjections = new astraCUDA::SFanProjection[iProjAngles];
-	memcpy(pData->fanProjections, fanProjs, iProjAngles * sizeof(fanProjs[0]));
-
-	pData->bFanBeam = true;
-	pData->bShortScan = bShortScan;
-
-	return true;
-}
-
-
-bool AstraFBP::setPixelSuperSampling(unsigned int iPixelSuperSampling)
-{
-	if (pData->initialized)
-		return false;
-
-	if (iPixelSuperSampling == 0)
-		return false;
-
-	pData->dims.iRaysPerPixelDim = iPixelSuperSampling;
-
-	return true;
-}
-
-
-bool AstraFBP::setTOffsets(const float* pfTOffsets)
-{
-	if (pData->initialized)
-		return false;
-
-	if (pfTOffsets == 0)
-		return false;
-
-	pData->TOffsets = new float[pData->dims.iProjAngles];
-	memcpy(pData->TOffsets, pfTOffsets, pData->dims.iProjAngles * sizeof(pfTOffsets[0]));
-
-	return true;
-}
-
-bool AstraFBP::init(int iGPUIndex)
-{
-	if (pData->initialized)
-	{
-		return false;
-	}
-
-	if (pData->dims.iProjAngles == 0 || pData->dims.iVolWidth == 0)
-	{
-		return false;
-	}
-
-	if (iGPUIndex != -1) {
-		cudaSetDevice(iGPUIndex);
-		cudaError_t err = cudaGetLastError();
-
-		// Ignore errors caused by calling cudaSetDevice multiple times
-		if (err != cudaSuccess && err != cudaErrorSetOnActiveProcess)
-		{
-			return false;
-		}
-	}
-
-	bool ok = allocateVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
-	if (!ok)
-	{
-		return false;
-	}
-
-	ok = allocateProjectionData(pData->D_sinoData, pData->sinoPitch, pData->dims);
-	if (!ok)
-	{
-		cudaFree(pData->D_volumeData);
-		pData->D_volumeData = 0;
-		return false;
-	}
-
-	pData->initialized = true;
-
-	return true;
-}
-
-bool AstraFBP::setSinogram(const float* pfSinogram,
-                            unsigned int iSinogramPitch)
-{
-	if (!pData->initialized)
-		return false;
-	if (!pfSinogram)
-		return false;
-
-	bool ok = copySinogramToDevice(pfSinogram, iSinogramPitch,
-	                               pData->dims,
-	                               pData->D_sinoData, pData->sinoPitch);
-	if (!ok)
-		return false;
-
-	// rescale sinogram to adjust for pixel size
-	processSino<opMul>(pData->D_sinoData,
-	                       1.0f/(pData->fPixelSize*pData->fPixelSize),
-	                       pData->sinoPitch, pData->dims);
-
-	pData->setStartReconstruction = false;
-
-	return true;
-}
-
-static int calcNextPowerOfTwo(int _iValue)
-{
-	int iOutput = 1;
-
-	while(iOutput < _iValue)
-	{
-		iOutput *= 2;
-	}
-
-	return iOutput;
-}
-
-bool AstraFBP::run()
-{
-	if (!pData->initialized)
-	{
-		return false;
-	}
-
-	zeroVolumeData(pData->D_volumeData, pData->volumePitch, pData->dims);
-
-	bool ok = false;
-
-	if (pData->bFanBeam) {
-		// Call FDK_PreWeight to handle fan beam geometry. We treat
-		// this as a cone beam setup of a single slice:
-
-		// TODO: TOffsets affects this preweighting...
-
-		// We create a fake cudaPitchedPtr
-		cudaPitchedPtr tmp;
-		tmp.ptr = pData->D_sinoData;
-		tmp.pitch = pData->sinoPitch * sizeof(float);
-		tmp.xsize = pData->dims.iProjDets;
-		tmp.ysize = pData->dims.iProjAngles;
-		// and a fake Dimensions3D
-		astraCUDA3d::SDimensions3D dims3d;
-		dims3d.iVolX = pData->dims.iVolWidth;
-		dims3d.iVolY = pData->dims.iVolHeight;
-		dims3d.iVolZ = 1;
-		dims3d.iProjAngles = pData->dims.iProjAngles;
-		dims3d.iProjU = pData->dims.iProjDets;
-		dims3d.iProjV = 1;
-		dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1;
-
-		astraCUDA3d::FDK_PreWeight(tmp, pData->fOriginSourceDistance,
-		              pData->fOriginDetectorDistance, 0.0f, 0.0f,
-		              pData->dims.fDetScale, 1.0f, // TODO: Are these correct?
-		              pData->bShortScan, dims3d, pData->angles);
-	}
-
-	if (pData->m_pDevFilter) {
-
-		int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets);
-		int iFFTFourDetCount = calcFFTFourSize(iFFTRealDetCount);
-
-		cufftComplex * pDevComplexSinogram = NULL;
-
-		allocateComplexOnDevice(pData->dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram);
-
-		runCudaFFT(pData->dims.iProjAngles, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram);
-
-		applyFilter(pData->dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, pData->m_pDevFilter);
-
-		runCudaIFFT(pData->dims.iProjAngles, pDevComplexSinogram, pData->D_sinoData, pData->sinoPitch, pData->dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount);
-
-		freeComplexOnDevice(pDevComplexSinogram);
-
-	}
-
-	float fOutputScale = (M_PI / 2.0f) / (float)pData->dims.iProjAngles;
-
-	if (pData->bFanBeam) {
-		ok = FanBP_FBPWeighted(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->fanProjections, fOutputScale);
-
-	} else {
-		ok = BP(pData->D_volumeData, pData->volumePitch, pData->D_sinoData, pData->sinoPitch, pData->dims, pData->angles, pData->TOffsets, fOutputScale);
-	}
-	if(!ok)
-	{
-		return false;
-	}
-
-	return true;
-}
-
-bool AstraFBP::getReconstruction(float* pfReconstruction, unsigned int iReconstructionPitch) const
-{
-	if (!pData->initialized)
-		return false;
-
-	bool ok = copyVolumeFromDevice(pfReconstruction, iReconstructionPitch,
-	                               pData->dims,
-	                               pData->D_volumeData, pData->volumePitch);
-	if (!ok)
-		return false;
-
-	return true;
-}
-
-int AstraFBP::calcFourierFilterSize(int _iDetectorCount)
-{
-	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount);
-	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount);
-
-	// CHECKME: Matlab makes this at least 64. Do we also need to?
-	return iFreqBinCount;
-}
-
-bool AstraFBP::setFilter(E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */)
-{
-	if(pData->m_pDevFilter != 0)
-	{
-		freeComplexOnDevice(pData->m_pDevFilter);
-		pData->m_pDevFilter = 0;
-	}
-
-	if (_eFilter == FILTER_NONE)
-		return true; // leave pData->m_pDevFilter set to 0
-
-
-	int iFFTRealDetCount = calcNextPowerOfTwo(2 * pData->dims.iProjDets);
-	int iFreqBinCount = calcFFTFourSize(iFFTRealDetCount);
-
-	cufftComplex * pHostFilter = new cufftComplex[pData->dims.iProjAngles * iFreqBinCount];
-	memset(pHostFilter, 0, sizeof(cufftComplex) * pData->dims.iProjAngles * iFreqBinCount);
-
-	allocateComplexOnDevice(pData->dims.iProjAngles, iFreqBinCount, &(pData->m_pDevFilter));
-
-	switch(_eFilter)
-	{
-		case FILTER_NONE:
-			// handled above
-			break;
-		case FILTER_RAMLAK:
-		case FILTER_SHEPPLOGAN:
-		case FILTER_COSINE:
-		case FILTER_HAMMING:
-		case FILTER_HANN:
-		case FILTER_TUKEY:
-		case FILTER_LANCZOS:
-		case FILTER_TRIANGULAR:
-		case FILTER_GAUSSIAN:
-		case FILTER_BARTLETTHANN:
-		case FILTER_BLACKMAN:
-		case FILTER_NUTTALL:
-		case FILTER_BLACKMANHARRIS:
-		case FILTER_BLACKMANNUTTALL:
-		case FILTER_FLATTOP:
-		case FILTER_PARZEN:
-		{
-			genFilter(_eFilter, _fD, pData->dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter);
-			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
-
-			break;
-		}
-		case FILTER_PROJECTION:
-		{
-			// make sure the offered filter has the correct size
-			assert(_iFilterWidth == iFreqBinCount);
-
-			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
-			{
-				float fValue = _pfHostFilter[iFreqBinIndex];
-
-				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
-				{
-					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
-					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
-				}
-			}
-			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
-			break;
-		}
-		case FILTER_SINOGRAM:
-		{
-			// make sure the offered filter has the correct size
-			assert(_iFilterWidth == iFreqBinCount);
-
-			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
-			{
-				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
-				{
-					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth];
-
-					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
-					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
-				}
-			}
-			uploadComplexArrayToDevice(pData->dims.iProjAngles, iFreqBinCount, pHostFilter, pData->m_pDevFilter);
-			break;
-		}
-		case FILTER_RPROJECTION:
-		{
-			int iProjectionCount = pData->dims.iProjAngles;
-			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
-			float * pfHostRealFilter = new float[iRealFilterElementCount];
-			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
-
-			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
-			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
-			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
-
-			int iFilterShiftSize = _iFilterWidth / 2;
-
-			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
-			{
-				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
-				float fValue = _pfHostFilter[iDetectorIndex];
-
-				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
-				{
-					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
-				}
-			}
-
-			float* pfDevRealFilter = NULL;
-			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
-			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
-			delete[] pfHostRealFilter;
-
-			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter);
-
-			cudaFree(pfDevRealFilter);
-
-			break;
-		}
-		case FILTER_RSINOGRAM:
-		{
-			int iProjectionCount = pData->dims.iProjAngles;
-			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
-			float* pfHostRealFilter = new float[iRealFilterElementCount];
-			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
-
-			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
-			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
-			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
-
-			int iFilterShiftSize = _iFilterWidth / 2;
-
-			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
-			{
-				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
-
-				for(int iProjectionIndex = 0; iProjectionIndex < (int)pData->dims.iProjAngles; iProjectionIndex++)
-				{
-					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth];
-					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
-				}
-			}
-
-			float* pfDevRealFilter = NULL;
-			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
-			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
-			delete[] pfHostRealFilter;
-
-			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, pData->m_pDevFilter);
-
-			cudaFree(pfDevRealFilter);
-
-			break;
-		}
-		default:
-		{
-			ASTRA_ERROR("AstraFBP::setFilter: Unknown filter type requested");
-			delete [] pHostFilter;
-			return false;
-		}
-	}
-
-	delete [] pHostFilter;
-
-	return true;
-}
-
 BPalgo::BPalgo()
 {
 
@@ -617,18 +110,17 @@ float BPalgo::computeDiffNorm()
 bool astraCudaFP(const float* pfVolume, float* pfSinogram,
                  unsigned int iVolWidth, unsigned int iVolHeight,
                  unsigned int iProjAngles, unsigned int iProjDets,
-                 const float *pfAngles, const float *pfOffsets,
-                 float fDetSize, unsigned int iDetSuperSampling,
+                 const SParProjection *pAngles,
+                 unsigned int iDetSuperSampling,
                  float fOutputScale, int iGPUIndex)
 {
 	SDimensions dims;
 
-	if (iProjAngles == 0 || iProjDets == 0 || pfAngles == 0)
+	if (iProjAngles == 0 || iProjDets == 0 || pAngles == 0)
 		return false;
 
 	dims.iProjAngles = iProjAngles;
 	dims.iProjDets = iProjDets;
-	dims.fDetScale = fDetSize;
 
 	if (iDetSuperSampling == 0)
 		return false;
@@ -678,7 +170,7 @@ bool astraCudaFP(const float* pfVolume, float* pfSinogram,
 	}
 
 	zeroProjectionData(D_sinoData, sinoPitch, dims);
-	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pfAngles, pfOffsets, fOutputScale);
+	ok = FP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, pAngles, fOutputScale);
 	if (!ok) {
 		cudaFree(D_volumeData);
 		cudaFree(D_sinoData);
@@ -713,7 +205,6 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
 
 	dims.iProjAngles = iProjAngles;
 	dims.iProjDets = iProjDets;
-	dims.fDetScale = 1.0f; // TODO?
 
 	if (iDetSuperSampling == 0)
 		return false;
@@ -789,118 +280,99 @@ bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
 }
 
 
-bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
-                          const CParallelProjectionGeometry2D* pProjGeom,
-                          float*& detectorOffsets, float*& projectionAngles,
-                          float& detSize, float& outputScale)
+// adjust pProjs to normalize volume geometry
+template<typename ProjectionT>
+static bool convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom,
+                          unsigned int iProjectionAngleCount,
+                          ProjectionT*& pProjs,
+                          float& fOutputScale)
 {
-	assert(pVolGeom);
-	assert(pProjGeom);
-	assert(pProjGeom->getProjectionAngles());
-
+	// TODO: Make EPS relative
 	const float EPS = 0.00001f;
 
-	int nth = pProjGeom->getProjectionAngleCount();
-
 	// Check if pixels are square
 	if (abs(pVolGeom->getPixelLengthX() - pVolGeom->getPixelLengthY()) > EPS)
 		return false;
 
+	float dx = -(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
+	float dy = -(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
 
-	// Scale volume pixels to 1x1
-	detSize = pProjGeom->getDetectorWidth() / pVolGeom->getPixelLengthX();
+	float factor = 1.0f / pVolGeom->getPixelLengthX();
 
-	// Copy angles
-	float *angles = new float[nth];
-	for (int i = 0; i < nth; ++i)
-		angles[i] = pProjGeom->getProjectionAngles()[i];
-	projectionAngles = angles;
-
-	// Check if we need to translate
-	bool offCenter = false;
-	if (abs(pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) > EPS ||
-	    abs(pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) > EPS)
-	{
-		offCenter = true;
+	for (int i = 0; i < iProjectionAngleCount; ++i) {
+		// CHECKME: Order of scaling and translation
+		pProjs[i].translate(dx, dy);
+		pProjs[i].scale(factor);
 	}
+	// CHECKME: Check factor
+	fOutputScale *= pVolGeom->getPixelLengthX() * pVolGeom->getPixelLengthY();
 
-	// If there are existing detector offsets, or if we need to translate,
-	// we need to return offsets
-	if (offCenter)
-	{
-		float* offset = new float[nth];
+	return true;
+}
 
-		for (int i = 0; i < nth; ++i)
-			offset[i] = 0.0f;
 
-		if (offCenter) {
-			float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
-			float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
 
-			// CHECKME: Is d in pixels or in units?
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+                          const CParallelProjectionGeometry2D* pProjGeom,
+                          SParProjection*& pProjs,
+                          float& fOutputScale)
+{
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionAngles());
 
-			for (int i = 0; i < nth; ++i) {
-				float d = dx * cos(angles[i]) + dy * sin(angles[i]);
-				offset[i] += d;
-			}
-		}
+	int nth = pProjGeom->getProjectionAngleCount();
 
-		// CHECKME: Order of scaling and translation
+	pProjs = genParProjections(nth,
+	                           pProjGeom->getDetectorCount(),
+	                           pProjGeom->getDetectorWidth(),
+	                           pProjGeom->getProjectionAngles(), 0);
 
-		// Scale volume pixels to 1x1
-		for (int i = 0; i < nth; ++i) {
-			//offset[i] /= pVolGeom->getPixelLengthX();
-			//offset[i] *= detSize;
-		}
+	bool ok;
+	fOutputScale = 1.0f;
 
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
 
-		detectorOffsets = offset;
-	} else {
-		detectorOffsets = 0;
+	if (!ok) {
+		delete[] pProjs;
+		pProjs = 0;
 	}
 
-	outputScale = pVolGeom->getPixelLengthX();
-	outputScale *= outputScale;
-
-	return true;
+	return ok;
 }
 
-static void convertAstraGeometry_internal(const CVolumeGeometry2D* pVolGeom,
-                          unsigned int iProjectionAngleCount,
-                          astraCUDA::SFanProjection*& pProjs,
-                          float& outputScale)
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+                          const CParallelVecProjectionGeometry2D* pProjGeom,
+                          SParProjection*& pProjs,
+                          float& fOutputScale)
 {
-	// Translate
-	float dx = (pVolGeom->getWindowMinX() + pVolGeom->getWindowMaxX()) / 2;
-	float dy = (pVolGeom->getWindowMinY() + pVolGeom->getWindowMaxY()) / 2;
+	assert(pVolGeom);
+	assert(pProjGeom);
+	assert(pProjGeom->getProjectionVectors());
 
-	for (int i = 0; i < iProjectionAngleCount; ++i) {
-		pProjs[i].fSrcX -= dx;
-		pProjs[i].fSrcY -= dy;
-		pProjs[i].fDetSX -= dx;
-		pProjs[i].fDetSY -= dy;
+	int nth = pProjGeom->getProjectionAngleCount();
+
+	pProjs = new SParProjection[nth];
+
+	for (int i = 0; i < nth; ++i) {
+		pProjs[i] = pProjGeom->getProjectionVectors()[i];
 	}
 
-	// CHECKME: Order of scaling and translation
+	bool ok;
+	fOutputScale = 1.0f;
 
-	// Scale
-	float factor = 1.0f / pVolGeom->getPixelLengthX();
-	for (int i = 0; i < iProjectionAngleCount; ++i) {
-		pProjs[i].fSrcX *= factor;
-		pProjs[i].fSrcY *= factor;
-		pProjs[i].fDetSX *= factor;
-		pProjs[i].fDetSY *= factor;
-		pProjs[i].fDetUX *= factor;
-		pProjs[i].fDetUY *= factor;
+	ok = convertAstraGeometry_internal(pVolGeom, nth, pProjs, fOutputScale);
 
+	if (!ok) {
+		delete[] pProjs;
+		pProjs = 0;
 	}
 
-	// CHECKME: Check factor
-	outputScale = pVolGeom->getPixelLengthX();
-//	outputScale *= outputScale;
+	return ok;
 }
 
 
+
 bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
                           const CFanFlatProjectionGeometry2D* pProjGeom,
                           astraCUDA::SFanProjection*& pProjs,
@@ -910,6 +382,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
 	assert(pProjGeom);
 	assert(pProjGeom->getProjectionAngles());
 
+	// TODO: Make EPS relative
 	const float EPS = 0.00001f;
 
 	int nth = pProjGeom->getProjectionAngleCount();
@@ -928,23 +401,9 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
 	float fDetSize = pProjGeom->getDetectorWidth();
 	const float *pfAngles = pProjGeom->getProjectionAngles();
 
-	pProjs = new SFanProjection[nth];
-
-	float fSrcX0 = 0.0f;
-	float fSrcY0 = -fOriginSourceDistance;
-	float fDetUX0 = fDetSize;
-	float fDetUY0 = 0.0f;
-	float fDetSX0 = pProjGeom->getDetectorCount() * fDetUX0 / -2.0f;
-	float fDetSY0 = fOriginDetectorDistance;
-
-#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0)
-	for (int i = 0; i < nth; ++i) {
-		ROTATE0(Src, i, pfAngles[i]);
-		ROTATE0(DetS, i, pfAngles[i]);
-		ROTATE0(DetU, i, pfAngles[i]);
-	}
-
-#undef ROTATE0
+	pProjs = genFanProjections(nth, pProjGeom->getDetectorCount(),
+                               fOriginSourceDistance, fOriginDetectorDistance,
+	                           fDetSize, pfAngles);
 
 	convertAstraGeometry_internal(pVolGeom, nth, pProjs, outputScale);
 
@@ -961,6 +420,7 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
 	assert(pProjGeom);
 	assert(pProjGeom->getProjectionVectors());
 
+	// TODO: Make EPS relative
 	const float EPS = 0.00001f;
 
 	int nx = pVolGeom->getGridColCount();
@@ -983,6 +443,52 @@ bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
 }
 
 
+bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+                          const CProjectionGeometry2D* pProjGeom,
+                          astraCUDA::SParProjection*& pParProjs,
+                          astraCUDA::SFanProjection*& pFanProjs,
+                          float& outputScale)
+{
+	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<const CParallelProjectionGeometry2D*>(pProjGeom);
+	const CParallelVecProjectionGeometry2D* parVecProjGeom = dynamic_cast<const CParallelVecProjectionGeometry2D*>(pProjGeom);
+	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<const CFanFlatProjectionGeometry2D*>(pProjGeom);
+	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<const CFanFlatVecProjectionGeometry2D*>(pProjGeom);
+
+	bool ok = false;
+
+	if (parProjGeom) {
+		ok = convertAstraGeometry(pVolGeom, parProjGeom, pParProjs, outputScale);
+	} else if (parVecProjGeom) {
+		ok = convertAstraGeometry(pVolGeom, parVecProjGeom, pParProjs, outputScale);
+	} else if (fanProjGeom) {
+		ok = convertAstraGeometry(pVolGeom, fanProjGeom, pFanProjs, outputScale);
+	} else if (fanVecProjGeom) {
+		ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, pFanProjs, outputScale);
+	} else {
+		ok = false;
+	}
+
+	return ok;
+}
+
+bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom,
+                               const CProjectionGeometry2D* pProjGeom,
+                               SDimensions& dims)
+{
+	dims.iVolWidth = pVolGeom->getGridColCount();
+	dims.iVolHeight = pVolGeom->getGridRowCount();
+
+	dims.iProjAngles = pProjGeom->getProjectionAngleCount();
+	dims.iProjDets = pProjGeom->getDetectorCount();
+
+	dims.iRaysPerDet = 1;
+	dims.iRaysPerPixelDim = 1;
+
+	return true;
+}
+
+
+
 
 
 }
diff --git a/cuda/2d/astra.h b/cuda/2d/astra.h
index 617883b..8e91feb 100644
--- a/cuda/2d/astra.h
+++ b/cuda/2d/astra.h
@@ -29,7 +29,6 @@ $Id$
 #ifndef _CUDA_ASTRA_H
 #define _CUDA_ASTRA_H
 
-#include "fft.h"
 #include "fbp_filters.h"
 #include "dims.h"
 #include "algo.h"
@@ -43,140 +42,12 @@ enum Cuda2DProjectionKernel {
 };
 
 class CParallelProjectionGeometry2D;
+class CParallelVecProjectionGeometry2D;
 class CFanFlatProjectionGeometry2D;
 class CFanFlatVecProjectionGeometry2D;
 class CVolumeGeometry2D;
+class CProjectionGeometry2D;
 
-class AstraFBP_internal;
-
-class _AstraExport AstraFBP {
-public:
-	// Constructor
-	AstraFBP();
-
-	// Destructor
-	~AstraFBP();
-
-	// Set the size of the reconstruction rectangle.
-	// Volume pixels are currently assumed to be 1x1 squares.
-	bool setReconstructionGeometry(unsigned int iVolWidth,
-	                               unsigned int iVolHeight,
-	                               float fPixelSize = 1.0f);
-
-	// Set the projection angles and number of detector pixels per angle.
-	// pfAngles must be a float array of length iProjAngles.
-	// fDetSize indicates the size of a detector pixel compared to a
-	// volume pixel edge.
-	//
-	// pfAngles will only be read from during this call.
-	bool setProjectionGeometry(unsigned int iProjAngles,
-	                           unsigned int iProjDets,
-	                           const float *pfAngles,
-	                           float fDetSize = 1.0f);
-	// Set the projection angles and number of detector pixels per angle.
-	// pfAngles must be a float array of length iProjAngles.
-	// fDetSize indicates the size of a detector pixel compared to a
-	// volume pixel edge.
-	//
-	// pfAngles, fanProjs will only be read from during this call.
-	bool setFanGeometry(unsigned int iProjAngles,
-	                    unsigned int iProjDets,
-	                    const astraCUDA::SFanProjection *fanProjs,
-	                    const float *pfAngles,
-	                    float fOriginSourceDistance,
-	                    float fOriginDetectorDistance,
-	                    float fDetSize = 1.0f,
-	                    bool bShortScan = false);
-
-	// Set linear supersampling factor for the BP.
-	// (The number of rays is the square of this)
-	//
-	// This may optionally be called before init().
-	bool setPixelSuperSampling(unsigned int iPixelSuperSampling);
-
-	// Set per-detector shifts.
-	//
-	// pfTOffsets will only be read from during this call.
-	bool setTOffsets(const float *pfTOffsets);
-
-	// Returns the required size of a filter in the fourier domain
-	// when multiplying it with the fft of the projection data.
-	// Its value is equal to the smallest power of two larger than
-	// or equal to twice the number of detectors in the spatial domain.
-	//
-	// _iDetectorCount is the number of detectors in the spatial domain.
-	static int calcFourierFilterSize(int _iDetectorCount);
-
-	// Sets the filter type. Some filter types require the user to supply an
-	// array containing the filter.
-	// The number of elements in a filter in the fourier domain should be equal
-	// to the value returned by calcFourierFilterSize().
-	// The following types require a filter:
-	//
-	// - FILTER_PROJECTION:
-	// The filter size should be equal to the output of
-	// calcFourierFilterSize(). The filtered sinogram is
-	// multiplied with the supplied filter.
-	//
-	// - FILTER_SINOGRAM:
-	// Same as FILTER_PROJECTION, but now the filter should contain a row for
-	// every projection direction.
-	//
-	// - FILTER_RPROJECTION:
-	// The filter should now contain one kernel (= ifft of filter), with the
-	// peak in the center. The filter width
-	// can be any value. If odd, the peak is assumed to be in the center, if
-	// even, it is assumed to be at floor(filter-width/2).
-	//
-	// - FILTER_RSINOGRAM
-	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a
-	// row for every projection direction.
-	//
-	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN,
-	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN)
-	// have a D variable, which gives the cutoff point in the frequency domain.
-	// Setting this value to 1.0 will include the whole filter
-	bool setFilter(E_FBPFILTER _eFilter,
-                   const float * _pfHostFilter = NULL,
-                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f);
-
-	// Initialize CUDA, allocate GPU buffers and
-	// precompute geometry-specific data.
-	//
-	// CUDA is set up to use GPU number iGPUIndex.
-	//
-	// This must be called after calling setReconstructionGeometry() and
-	// setProjectionGeometry().
-	bool init(int iGPUIndex = 0);
-
-	// Setup input sinogram for a slice.
-	// pfSinogram must be a float array of size iProjAngles*iSinogramPitch.
-	// NB: iSinogramPitch is measured in floats, not in bytes.
-	//
-	// This must be called after init(), and before iterate(). It may be
-	// called again after iterate()/getReconstruction() to start a new slice.
-	//
-	// pfSinogram will only be read from during this call.
-	bool setSinogram(const float* pfSinogram, unsigned int iSinogramPitch);
-
-	// Runs an FBP reconstruction.
-	// This must be called after setSinogram().
-	//
-	// run can be called before setFilter, but will then use the default Ram-Lak filter
-	bool run();
-
-	// Get the reconstructed slice.
-	// pfReconstruction must be a float array of size
-	// iVolHeight*iReconstructionPitch.
-	// NB: iReconstructionPitch is measured in floats, not in bytes.
-	//
-	// This may be called after run().
-	bool getReconstruction(float* pfReconstruction,
-	                       unsigned int iReconstructionPitch) const;
-
-private:
-	AstraFBP_internal* pData;
-};
 
 class _AstraExport BPalgo : public astraCUDA::ReconAlgo {
 public:
@@ -199,8 +70,8 @@ public:
 _AstraExport bool astraCudaFP(const float* pfVolume, float* pfSinogram,
                  unsigned int iVolWidth, unsigned int iVolHeight,
                  unsigned int iProjAngles, unsigned int iProjDets,
-                 const float *pfAngles, const float *pfOffsets,
-                 float fDetSize = 1.0f, unsigned int iDetSuperSampling = 1,
+                 const SParProjection *pAngles,
+                 unsigned int iDetSuperSampling = 1,
                  float fOutputScale = 1.0f, int iGPUIndex = 0);
 
 _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
@@ -213,8 +84,14 @@ _AstraExport bool astraCudaFanFP(const float* pfVolume, float* pfSinogram,
 
 _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
                     const CParallelProjectionGeometry2D* pProjGeom,
-                    float*& pfDetectorOffsets, float*& pfProjectionAngles,
-                    float& fDetSize, float& fOutputScale);
+                    astraCUDA::SParProjection*& pProjs,
+                    float& fOutputScale);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+                    const CParallelVecProjectionGeometry2D* pProjGeom,
+                    astraCUDA::SParProjection*& pProjs,
+                    float& fOutputScale);
+
 
 _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
                     const CFanFlatProjectionGeometry2D* pProjGeom,
@@ -226,6 +103,15 @@ _AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
                     astraCUDA::SFanProjection*& pProjs,
                     float& outputScale);
 
+_AstraExport bool convertAstraGeometry_dims(const CVolumeGeometry2D* pVolGeom,
+                          const CProjectionGeometry2D* pProjGeom,
+                          astraCUDA::SDimensions& dims);
+
+_AstraExport bool convertAstraGeometry(const CVolumeGeometry2D* pVolGeom,
+                          const CProjectionGeometry2D* pProjGeom,
+                          astraCUDA::SParProjection*& pParProjs,
+                          astraCUDA::SFanProjection*& pFanProjs,
+                          float& outputScale);
 
 }
 #endif
diff --git a/cuda/2d/cgls.cu b/cuda/2d/cgls.cu
index f402914..3f06581 100644
--- a/cuda/2d/cgls.cu
+++ b/cuda/2d/cgls.cu
@@ -207,42 +207,6 @@ float CGLS::computeDiffNorm()
 	return sqrt(s);
 }
 
-bool doCGLS(float* D_volumeData, unsigned int volumePitch,
-            float* D_sinoData, unsigned int sinoPitch,
-            const SDimensions& dims, /*const SAugmentedData& augs,*/
-            const float* angles, const float* TOffsets, unsigned int iterations)
-{
-	CGLS cgls;
-	bool ok = true;
-
-	ok &= cgls.setGeometry(dims, angles);
-#if 0
-	if (D_maskData)
-		ok &= cgls.enableVolumeMask();
-#endif
-	if (TOffsets)
-		ok &= cgls.setTOffsets(TOffsets);
-
-	if (!ok)
-		return false;
-
-	ok = cgls.init();
-	if (!ok)
-		return false;
-
-#if 0
-	if (D_maskData)
-		ok &= cgls.setVolumeMask(D_maskData, maskPitch);
-#endif
-
-	ok &= cgls.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-	if (!ok)
-		return false;
-
-	ok = cgls.iterate(iterations);
-
-	return ok;
-}
 
 }
 
diff --git a/cuda/2d/dims.h b/cuda/2d/dims.h
index e870da5..f74d0e4 100644
--- a/cuda/2d/dims.h
+++ b/cuda/2d/dims.h
@@ -34,9 +34,11 @@ $Id$
 
 namespace astraCUDA {
 
+using astra::SParProjection;
 using astra::SFanProjection;
 
 
+
 struct SDimensions {
 	// Width, height of reconstruction volume
 	unsigned int iVolWidth;
@@ -48,9 +50,6 @@ struct SDimensions {
 	// Number of detector pixels
 	unsigned int iProjDets;
 
-	// size of detector compared to volume pixels
-	float fDetScale;
-
 	// in FP, number of rays to trace per detector pixel.
 	// This should usually be set to 1.
 	// If fDetScale > 1, this should be set to an integer of roughly
diff --git a/cuda/2d/em.cu b/cuda/2d/em.cu
index 8593b08..47ec548 100644
--- a/cuda/2d/em.cu
+++ b/cuda/2d/em.cu
@@ -170,34 +170,6 @@ float EM::computeDiffNorm()
 }
 
 
-bool doEM(float* D_volumeData, unsigned int volumePitch,
-          float* D_sinoData, unsigned int sinoPitch,
-          const SDimensions& dims, const float* angles,
-          const float* TOffsets, unsigned int iterations)
-{
-	EM em;
-	bool ok = true;
-
-	ok &= em.setGeometry(dims, angles);
-	if (TOffsets)
-		ok &= em.setTOffsets(TOffsets);
-
-	if (!ok)
-		return false;
-
-	ok = em.init();
-	if (!ok)
-		return false;
-
-	ok &= em.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-	if (!ok)
-		return false;
-
-	ok = em.iterate(iterations);
-
-	return ok;
-}
-
 }
 
 #ifdef STANDALONE
diff --git a/cuda/2d/fbp.cu b/cuda/2d/fbp.cu
new file mode 100644
index 0000000..2feac7d
--- /dev/null
+++ b/cuda/2d/fbp.cu
@@ -0,0 +1,347 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "fbp.h"
+#include "fft.h"
+#include "par_bp.h"
+#include "fan_bp.h"
+
+// For fan-beam preweighting
+#include "../3d/fdk.h"
+
+#include "astra/Logging.h"
+
+#include <cuda.h>
+
+namespace astraCUDA {
+
+
+
+static int calcNextPowerOfTwo(int n)
+{
+	int x = 1;
+	while (x < n)
+		x *= 2;
+
+	return x;
+}
+
+// static
+int FBP::calcFourierFilterSize(int _iDetectorCount)
+{
+	int iFFTRealDetCount = calcNextPowerOfTwo(2 * _iDetectorCount);
+	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount);
+
+	// CHECKME: Matlab makes this at least 64. Do we also need to?
+	return iFreqBinCount;
+}
+
+
+
+
+FBP::FBP() : ReconAlgo()
+{
+	D_filter = 0;
+
+}
+
+FBP::~FBP()
+{
+	reset();
+}
+
+void FBP::reset()
+{
+	if (D_filter) {
+		freeComplexOnDevice((cufftComplex *)D_filter);
+		D_filter = 0;
+	}
+}
+
+bool FBP::init()
+{
+	return true;
+}
+
+bool FBP::setFilter(astra::E_FBPFILTER _eFilter, const float * _pfHostFilter /* = NULL */, int _iFilterWidth /* = 0 */, float _fD /* = 1.0f */, float _fFilterParameter /* = -1.0f */)
+{
+	if (D_filter)
+	{
+		freeComplexOnDevice((cufftComplex*)D_filter);
+		D_filter = 0;
+	}
+
+	if (_eFilter == astra::FILTER_NONE)
+		return true; // leave D_filter set to 0
+
+
+	int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets);
+	int iFreqBinCount = calcFFTFourierSize(iFFTRealDetCount);
+
+	cufftComplex * pHostFilter = new cufftComplex[dims.iProjAngles * iFreqBinCount];
+	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iFreqBinCount);
+
+	allocateComplexOnDevice(dims.iProjAngles, iFreqBinCount, (cufftComplex**)&D_filter);
+
+	switch(_eFilter)
+	{
+		case astra::FILTER_NONE:
+			// handled above
+			break;
+		case astra::FILTER_RAMLAK:
+		case astra::FILTER_SHEPPLOGAN:
+		case astra::FILTER_COSINE:
+		case astra::FILTER_HAMMING:
+		case astra::FILTER_HANN:
+		case astra::FILTER_TUKEY:
+		case astra::FILTER_LANCZOS:
+		case astra::FILTER_TRIANGULAR:
+		case astra::FILTER_GAUSSIAN:
+		case astra::FILTER_BARTLETTHANN:
+		case astra::FILTER_BLACKMAN:
+		case astra::FILTER_NUTTALL:
+		case astra::FILTER_BLACKMANHARRIS:
+		case astra::FILTER_BLACKMANNUTTALL:
+		case astra::FILTER_FLATTOP:
+		case astra::FILTER_PARZEN:
+		{
+			genFilter(_eFilter, _fD, dims.iProjAngles, pHostFilter, iFFTRealDetCount, iFreqBinCount, _fFilterParameter);
+			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+
+			break;
+		}
+		case astra::FILTER_PROJECTION:
+		{
+			// make sure the offered filter has the correct size
+			assert(_iFilterWidth == iFreqBinCount);
+
+			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
+			{
+				float fValue = _pfHostFilter[iFreqBinIndex];
+
+				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+				{
+					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
+					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
+				}
+			}
+			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+			break;
+		}
+		case astra::FILTER_SINOGRAM:
+		{
+			// make sure the offered filter has the correct size
+			assert(_iFilterWidth == iFreqBinCount);
+
+			for(int iFreqBinIndex = 0; iFreqBinIndex < iFreqBinCount; iFreqBinIndex++)
+			{
+				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+				{
+					float fValue = _pfHostFilter[iFreqBinIndex + iProjectionIndex * _iFilterWidth];
+
+					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].x = fValue;
+					pHostFilter[iFreqBinIndex + iProjectionIndex * iFreqBinCount].y = 0.0f;
+				}
+			}
+			uploadComplexArrayToDevice(dims.iProjAngles, iFreqBinCount, pHostFilter, (cufftComplex*)D_filter);
+			break;
+		}
+		case astra::FILTER_RPROJECTION:
+		{
+			int iProjectionCount = dims.iProjAngles;
+			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
+			float * pfHostRealFilter = new float[iRealFilterElementCount];
+			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
+
+			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
+			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
+			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
+
+			int iFilterShiftSize = _iFilterWidth / 2;
+
+			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
+			{
+				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
+				float fValue = _pfHostFilter[iDetectorIndex];
+
+				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+				{
+					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
+				}
+			}
+
+			float* pfDevRealFilter = NULL;
+			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
+			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
+			delete[] pfHostRealFilter;
+
+			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter);
+
+			cudaFree(pfDevRealFilter);
+
+			break;
+		}
+		case astra::FILTER_RSINOGRAM:
+		{
+			int iProjectionCount = dims.iProjAngles;
+			int iRealFilterElementCount = iProjectionCount * iFFTRealDetCount;
+			float* pfHostRealFilter = new float[iRealFilterElementCount];
+			memset(pfHostRealFilter, 0, sizeof(float) * iRealFilterElementCount);
+
+			int iUsedFilterWidth = min(_iFilterWidth, iFFTRealDetCount);
+			int iStartFilterIndex = (_iFilterWidth - iUsedFilterWidth) / 2;
+			int iMaxFilterIndex = iStartFilterIndex + iUsedFilterWidth;
+
+			int iFilterShiftSize = _iFilterWidth / 2;
+
+			for(int iDetectorIndex = iStartFilterIndex; iDetectorIndex < iMaxFilterIndex; iDetectorIndex++)
+			{
+				int iFFTInFilterIndex = (iDetectorIndex + iFFTRealDetCount - iFilterShiftSize) % iFFTRealDetCount;
+
+				for(int iProjectionIndex = 0; iProjectionIndex < (int)dims.iProjAngles; iProjectionIndex++)
+				{
+					float fValue = _pfHostFilter[iDetectorIndex + iProjectionIndex * _iFilterWidth];
+					pfHostRealFilter[iFFTInFilterIndex + iProjectionIndex * iFFTRealDetCount] = fValue;
+				}
+			}
+
+			float* pfDevRealFilter = NULL;
+			cudaMalloc((void **)&pfDevRealFilter, sizeof(float) * iRealFilterElementCount); // TODO: check for errors
+			cudaMemcpy(pfDevRealFilter, pfHostRealFilter, sizeof(float) * iRealFilterElementCount, cudaMemcpyHostToDevice);
+			delete[] pfHostRealFilter;
+
+			runCudaFFT(iProjectionCount, pfDevRealFilter, iFFTRealDetCount, iFFTRealDetCount, iFFTRealDetCount, iFreqBinCount, (cufftComplex*)D_filter);
+
+			cudaFree(pfDevRealFilter);
+
+			break;
+		}
+		default:
+		{
+			ASTRA_ERROR("FBP::setFilter: Unknown filter type requested");
+			delete [] pHostFilter;
+			return false;
+		}
+	}
+
+	delete [] pHostFilter;
+
+	return true;
+}
+
+bool FBP::iterate(unsigned int iterations)
+{
+	zeroVolumeData(D_volumeData, volumePitch, dims);
+
+	bool ok = false;
+
+	if (fanProjs) {
+		// Call FDK_PreWeight to handle fan beam geometry. We treat
+		// this as a cone beam setup of a single slice:
+
+		// TODO: TOffsets affects this preweighting...
+
+		// TODO: We take the fan parameters from the last projection here
+		// without checking if they're the same in all projections
+
+		float *pfAngles = new float[dims.iProjAngles];
+
+		float fOriginSource, fOriginDetector, fDetSize, fOffset;
+		for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
+			bool ok = astra::getFanParameters(fanProjs[i], dims.iProjDets,
+			                                  pfAngles[i],
+			                                  fOriginSource, fOriginDetector,
+			                                  fDetSize, fOffset);
+			if (!ok) {
+				ASTRA_ERROR("FBP_CUDA: Failed to extract circular fan beam parameters from fan beam geometry");
+				return false;
+			}
+		}
+
+		// We create a fake cudaPitchedPtr
+		cudaPitchedPtr tmp;
+		tmp.ptr = D_sinoData;
+		tmp.pitch = sinoPitch * sizeof(float);
+		tmp.xsize = dims.iProjDets;
+		tmp.ysize = dims.iProjAngles;
+		// and a fake Dimensions3D
+		astraCUDA3d::SDimensions3D dims3d;
+		dims3d.iVolX = dims.iVolWidth;
+		dims3d.iVolY = dims.iVolHeight;
+		dims3d.iVolZ = 1;
+		dims3d.iProjAngles = dims.iProjAngles;
+		dims3d.iProjU = dims.iProjDets;
+		dims3d.iProjV = 1;
+		dims3d.iRaysPerDetDim = dims3d.iRaysPerVoxelDim = 1;
+
+		astraCUDA3d::FDK_PreWeight(tmp, fOriginSource,
+		              fOriginDetector, 0.0f, 0.0f,
+		              fDetSize, 1.0f,
+		              m_bShortScan, dims3d, pfAngles);
+	} else {
+		// TODO: How should different detector pixel size in different
+		// projections be handled?
+	}
+
+	if (D_filter) {
+
+		int iFFTRealDetCount = calcNextPowerOfTwo(2 * dims.iProjDets);
+		int iFFTFourDetCount = calcFFTFourierSize(iFFTRealDetCount);
+
+		cufftComplex * pDevComplexSinogram = NULL;
+
+		allocateComplexOnDevice(dims.iProjAngles, iFFTFourDetCount, &pDevComplexSinogram);
+
+		runCudaFFT(dims.iProjAngles, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount, pDevComplexSinogram);
+
+		applyFilter(dims.iProjAngles, iFFTFourDetCount, pDevComplexSinogram, (cufftComplex*)D_filter);
+
+		runCudaIFFT(dims.iProjAngles, pDevComplexSinogram, D_sinoData, sinoPitch, dims.iProjDets, iFFTRealDetCount, iFFTFourDetCount);
+
+		freeComplexOnDevice(pDevComplexSinogram);
+
+	}
+
+	float fOutputScale = (M_PI / 2.0f) / (float)dims.iProjAngles;
+
+	if (fanProjs) {
+		ok = FanBP_FBPWeighted(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, fanProjs, fOutputScale);
+
+	} else {
+		ok = BP(D_volumeData, volumePitch, D_sinoData, sinoPitch, dims, parProjs, fOutputScale);
+	}
+	if(!ok)
+	{
+		return false;
+	}
+
+	return true;
+}
+
+
+}
diff --git a/cuda/2d/fbp.h b/cuda/2d/fbp.h
new file mode 100644
index 0000000..8b4d64d
--- /dev/null
+++ b/cuda/2d/fbp.h
@@ -0,0 +1,98 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "algo.h"
+#include "fbp_filters.h"
+
+namespace astraCUDA {
+
+class _AstraExport FBP : public ReconAlgo {
+public:
+	FBP();
+	~FBP();
+
+	virtual bool useSinogramMask() { return false; }
+	virtual bool useVolumeMask() { return false; }
+
+	// Returns the required size of a filter in the fourier domain
+	// when multiplying it with the fft of the projection data.
+	// Its value is equal to the smallest power of two larger than
+	// or equal to twice the number of detectors in the spatial domain.
+	//
+	// _iDetectorCount is the number of detectors in the spatial domain.
+	static int calcFourierFilterSize(int _iDetectorCount);
+
+	// Sets the filter type. Some filter types require the user to supply an
+	// array containing the filter.
+	// The number of elements in a filter in the fourier domain should be equal
+	// to the value returned by calcFourierFilterSize().
+	// The following types require a filter:
+	//
+	// - FILTER_PROJECTION:
+	// The filter size should be equal to the output of
+	// calcFourierFilterSize(). The filtered sinogram is
+	// multiplied with the supplied filter.
+	//
+	// - FILTER_SINOGRAM:
+	// Same as FILTER_PROJECTION, but now the filter should contain a row for
+	// every projection direction.
+	//
+	// - FILTER_RPROJECTION:
+	// The filter should now contain one kernel (= ifft of filter), with the
+	// peak in the center. The filter width
+	// can be any value. If odd, the peak is assumed to be in the center, if
+	// even, it is assumed to be at floor(filter-width/2).
+	//
+	// - FILTER_RSINOGRAM
+	// Same as FILTER_RPROJECTION, but now the supplied filter should contain a
+	// row for every projection direction.
+	//
+	// A large number of other filters (FILTER_RAMLAK, FILTER_SHEPPLOGAN,
+	// FILTER_COSINE, FILTER_HAMMING, and FILTER_HANN)
+	// have a D variable, which gives the cutoff point in the frequency domain.
+	// Setting this value to 1.0 will include the whole filter
+	bool setFilter(astra::E_FBPFILTER _eFilter,
+                   const float * _pfHostFilter = NULL,
+                   int _iFilterWidth = 0, float _fD = 1.0f, float _fFilterParameter = -1.0f);
+
+	bool setShortScan(bool ss) { m_bShortScan = ss; return true; }
+
+	virtual bool init();
+
+	virtual bool iterate(unsigned int iterations);
+
+	virtual float computeDiffNorm() { return 0.0f; } // TODO
+
+protected:
+	void reset();
+
+	void* D_filter; // cufftComplex*
+	bool m_bShortScan;
+};
+
+}
diff --git a/cuda/2d/fbp_filters.h b/cuda/2d/fbp_filters.h
index b206a5d..71c7572 100644
--- a/cuda/2d/fbp_filters.h
+++ b/cuda/2d/fbp_filters.h
@@ -29,6 +29,8 @@ $Id$
 #ifndef FBP_FILTERS_H
 #define FBP_FILTERS_H
 
+namespace astra {
+
 enum E_FBPFILTER
 {
 	FILTER_NONE,			//< no filter (regular BP)
@@ -55,4 +57,6 @@ enum E_FBPFILTER
 	FILTER_RSINOGRAM,		//< sinogram filter in real space
 };
 
+}
+
 #endif /* FBP_FILTERS_H */
diff --git a/cuda/2d/fft.cu b/cuda/2d/fft.cu
index 2d259a9..8a7cc10 100644
--- a/cuda/2d/fft.cu
+++ b/cuda/2d/fft.cu
@@ -64,6 +64,8 @@ using namespace astra;
   } } while (0)
 
 
+namespace astraCUDA {
+
 __global__ static void applyFilter_kernel(int _iProjectionCount,
                                           int _iFreqBinCount,
                                           cufftComplex * _pSinogram,
@@ -276,11 +278,11 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
 // Because the input is real, the Fourier transform is symmetric.
 // CUFFT only outputs the first half (ignoring the redundant second half),
 // and expects the same as input for the IFFT.
-int calcFFTFourSize(int _iFFTRealSize)
+int calcFFTFourierSize(int _iFFTRealSize)
 {
-	int iFFTFourSize = _iFFTRealSize / 2 + 1;
+	int iFFTFourierSize = _iFFTRealSize / 2 + 1;
 
-	return iFFTFourSize;
+	return iFFTFourierSize;
 }
 
 void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,
@@ -695,6 +697,10 @@ void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
 	delete[] pfW;
 }
 
+
+}
+
+
 #ifdef STANDALONE
 
 __global__ static void doubleFourierOutput_kernel(int _iProjectionCount,
diff --git a/cuda/2d/fft.h b/cuda/2d/fft.h
index e985fc6..b29f17a 100644
--- a/cuda/2d/fft.h
+++ b/cuda/2d/fft.h
@@ -34,6 +34,8 @@ $Id$
 
 #include "fbp_filters.h"
 
+namespace astraCUDA {
+
 bool allocateComplexOnDevice(int _iProjectionCount,
                              int _iDetectorCount,
                              cufftComplex ** _ppDevComplex);
@@ -57,13 +59,15 @@ bool runCudaIFFT(int _iProjectionCount, const cufftComplex* _pDevSourceComplex,
 void applyFilter(int _iProjectionCount, int _iFreqBinCount,
                  cufftComplex * _pSinogram, cufftComplex * _pFilter);
 
-int calcFFTFourSize(int _iFFTRealSize);
+int calcFFTFourierSize(int _iFFTRealSize);
 
-void genFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
+void genFilter(astra::E_FBPFILTER _eFilter, float _fD, int _iProjectionCount,
                cufftComplex * _pFilter, int _iFFTRealDetectorCount,
                int _iFFTFourierDetectorCount, float _fParameter = -1.0f);
 
 void genIdenFilter(int _iProjectionCount, cufftComplex * _pFilter,
                    int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount);
 
+}
+
 #endif /* FFT_H */
diff --git a/cuda/2d/par_bp.cu b/cuda/2d/par_bp.cu
index d9f7325..cf0a684 100644
--- a/cuda/2d/par_bp.cu
+++ b/cuda/2d/par_bp.cu
@@ -53,8 +53,8 @@ const unsigned int g_blockSlices = 16;
 
 const unsigned int g_MaxAngles = 2560;
 
-__constant__ float gC_angle_sin[g_MaxAngles];
-__constant__ float gC_angle_cos[g_MaxAngles];
+__constant__ float gC_angle_scaled_sin[g_MaxAngles];
+__constant__ float gC_angle_scaled_cos[g_MaxAngles];
 __constant__ float gC_angle_offset[g_MaxAngles];
 
 static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int width, unsigned int height, cudaTextureAddressMode mode = cudaAddressModeBorder)
@@ -73,7 +73,7 @@ static bool bindProjDataTexture(float* data, unsigned int pitch, unsigned int wi
 	return true;
 }
 
-__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
+__global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -87,47 +87,30 @@ __global__ void devBP(float* D_volData, unsigned int volPitch, unsigned int star
 	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
 		return;
 
-	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale;
-	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale;
+	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f );
+	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );
 
 	float* volData = (float*)D_volData;
 
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
-	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
 
-	if (offsets) {
-
-		for (int angle = startAngle; angle < endAngle; ++angle)
-		{
-			const float cos_theta = gC_angle_cos[angle];
-			const float sin_theta = gC_angle_sin[angle];
-			const float TOffset = gC_angle_offset[angle];
-
-			const float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset;
-			fVal += tex2D(gT_projTexture, fT, fA);
-			fA += 1.0f;
-		}
-
-	} else {
-
-		for (int angle = startAngle; angle < endAngle; ++angle)
-		{
-			const float cos_theta = gC_angle_cos[angle];
-			const float sin_theta = gC_angle_sin[angle];
-
-			const float fT = fT_base + fX * cos_theta - fY * sin_theta;
-			fVal += tex2D(gT_projTexture, fT, fA);
-			fA += 1.0f;
-		}
+	for (int angle = startAngle; angle < endAngle; ++angle)
+	{
+		const float scaled_cos_theta = gC_angle_scaled_cos[angle];
+		const float scaled_sin_theta = gC_angle_scaled_sin[angle];
+		const float TOffset = gC_angle_offset[angle];
 
+		const float fT = fX * scaled_cos_theta - fY * scaled_sin_theta + TOffset;
+		fVal += tex2D(gT_projTexture, fT, fA);
+		fA += 1.0f;
 	}
 
 	volData[Y*volPitch+X] += fVal * fOutputScale;
 }
 
 // supersampling version
-__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, bool offsets, const SDimensions dims, float fOutputScale)
+__global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int startAngle, const SDimensions dims, float fOutputScale)
 {
 	const int relX = threadIdx.x;
 	const int relY = threadIdx.y;
@@ -141,61 +124,35 @@ __global__ void devBP_SS(float* D_volData, unsigned int volPitch, unsigned int s
 	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
 		return;
 
-	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale;
-	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim) / dims.fDetScale;
+	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim);
+	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f - 0.5f + 0.5f/dims.iRaysPerPixelDim);
 
-	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim * dims.fDetScale);
+	const float fSubStep = 1.0f/(dims.iRaysPerPixelDim); // * dims.fDetScale);
 
 	float* volData = (float*)D_volData;
 
 	float fVal = 0.0f;
 	float fA = startAngle + 0.5f;
-	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
 
 	fOutputScale /= (dims.iRaysPerPixelDim * dims.iRaysPerPixelDim);
 
-	if (offsets) {
-
-		for (int angle = startAngle; angle < endAngle; ++angle)
-		{
-			const float cos_theta = gC_angle_cos[angle];
-			const float sin_theta = gC_angle_sin[angle];
-			const float TOffset = gC_angle_offset[angle];
-
-			float fT = fT_base + fX * cos_theta - fY * sin_theta + TOffset;
-
-			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
-				float fTy = fT;
-				fT += fSubStep * cos_theta;
-				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
-					fVal += tex2D(gT_projTexture, fTy, fA);
-					fTy -= fSubStep * sin_theta;
-				}
-			}
-			fA += 1.0f;
-		}
-
-	} else {
+	for (int angle = startAngle; angle < endAngle; ++angle)
+	{
+		const float cos_theta = gC_angle_scaled_cos[angle];
+		const float sin_theta = gC_angle_scaled_sin[angle];
+		const float TOffset = gC_angle_offset[angle];
 
-		for (int angle = startAngle; angle < endAngle; ++angle)
-		{
-			const float cos_theta = gC_angle_cos[angle];
-			const float sin_theta = gC_angle_sin[angle];
+		float fT = fX * cos_theta - fY * sin_theta + TOffset;
 
-			float fT = fT_base + fX * cos_theta - fY * sin_theta;
-
-			for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
-				float fTy = fT;
-				fT += fSubStep * cos_theta;
-				for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
-					fVal += tex2D(gT_projTexture, fTy, fA);
-					fTy -= fSubStep * sin_theta;
-				}
+		for (int iSubX = 0; iSubX < dims.iRaysPerPixelDim; ++iSubX) {
+			float fTy = fT;
+			fT += fSubStep * cos_theta;
+			for (int iSubY = 0; iSubY < dims.iRaysPerPixelDim; ++iSubY) {
+				fVal += tex2D(gT_projTexture, fTy, fA);
+				fTy -= fSubStep * sin_theta;
 			}
-			fA += 1.0f;
-
 		}
-
+		fA += 1.0f;
 	}
 
 	volData[Y*volPitch+X] += fVal * fOutputScale;
@@ -212,12 +169,10 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
 	if (X >= dims.iVolWidth || Y >= dims.iVolHeight)
 		return;
 
-	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f ) / dims.fDetScale;
-	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f ) / dims.fDetScale;
-
-	const float fT_base = 0.5f*dims.iProjDets - 0.5f + 0.5f;
+	const float fX = ( X - 0.5f*dims.iVolWidth + 0.5f );
+	const float fY = ( Y - 0.5f*dims.iVolHeight + 0.5f );
 
-	const float fT = fT_base + fX * angle_cos - fY * angle_sin + offset;
+	const float fT = fX * angle_cos - fY * angle_sin + offset;
 	const float fVal = tex2D(gT_projTexture, fT, 0.5f);
 
 	D_volData[Y*volPitch+X] += fVal * fOutputScale;
@@ -226,32 +181,35 @@ __global__ void devBP_SART(float* D_volData, unsigned int volPitch, float offset
 
 bool BP_internal(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
+        const SDimensions& dims, const SParProjection* angles,
+        float fOutputScale)
 {
-	// TODO: process angles block by block
 	assert(dims.iProjAngles <= g_MaxAngles);
 
-	float* angle_sin = new float[dims.iProjAngles];
-	float* angle_cos = new float[dims.iProjAngles];
+	float* angle_scaled_sin = new float[dims.iProjAngles];
+	float* angle_scaled_cos = new float[dims.iProjAngles];
+	float* angle_offset = new float[dims.iProjAngles];
 
 	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, dims.iProjAngles);
 
 	for (unsigned int i = 0; i < dims.iProjAngles; ++i) {
-		angle_sin[i] = sinf(angles[i]);
-		angle_cos[i] = cosf(angles[i]);
+		double d = angles[i].fDetUX * angles[i].fRayY - angles[i].fDetUY * angles[i].fRayX;
+		angle_scaled_cos[i] = angles[i].fRayY / d;
+		angle_scaled_sin[i] = -angles[i].fRayX / d; // TODO: Check signs
+		angle_offset[i] = (angles[i].fDetSY * angles[i].fRayX - angles[i].fDetSX * angles[i].fRayY) / d;
 	}
-	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_sin, angle_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_cos, angle_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+
+	cudaError_t e1 = cudaMemcpyToSymbol(gC_angle_scaled_sin, angle_scaled_sin, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+	cudaError_t e2 = cudaMemcpyToSymbol(gC_angle_scaled_cos, angle_scaled_cos, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
+	cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, angle_offset, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
 	assert(e1 == cudaSuccess);
 	assert(e2 == cudaSuccess);
+	assert(e3 == cudaSuccess);
 
-	if (TOffsets) {
-		cudaError_t e3 = cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-		assert(e3 == cudaSuccess);
-	}
 
-	delete[] angle_sin;
-	delete[] angle_cos;
+	delete[] angle_scaled_sin;
+	delete[] angle_scaled_cos;
+	delete[] angle_offset;
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
@@ -263,9 +221,9 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
 	for (unsigned int i = 0; i < dims.iProjAngles; i += g_anglesPerBlock) {
 
 		if (dims.iRaysPerPixelDim > 1)
-			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
+			devBP_SS<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
 		else
-			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, (TOffsets != 0), dims, fOutputScale);
+			devBP<<<dimGrid, dimBlock, 0, stream>>>(D_volumeData, volumePitch, i, dims, fOutputScale);
 	}
 	cudaThreadSynchronize();
 
@@ -278,7 +236,7 @@ bool BP_internal(float* D_volumeData, unsigned int volumePitch,
 
 bool BP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles, const float* TOffsets, float fOutputScale)
+        const SDimensions& dims, const SParProjection* angles, float fOutputScale)
 {
 	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
 		SDimensions subdims = dims;
@@ -290,9 +248,7 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
 		bool ret;
 		ret = BP_internal(D_volumeData, volumePitch,
 		                  D_projData + iAngle * projPitch, projPitch,
-		                  subdims, angles + iAngle,
-		                  TOffsets ? TOffsets + iAngle : 0,
-		                  fOutputScale);
+		                  subdims, angles + iAngle, fOutputScale);
 		if (!ret)
 			return false;
 	}
@@ -303,25 +259,23 @@ bool BP(float* D_volumeData, unsigned int volumePitch,
 bool BP_SART(float* D_volumeData, unsigned int volumePitch,
              float* D_projData, unsigned int projPitch,
              unsigned int angle, const SDimensions& dims,
-             const float* angles, const float* TOffsets, float fOutputScale)
+             const SParProjection* angles, float fOutputScale)
 {
 	// Only one angle.
 	// We need to Clamp to the border pixels instead of to zero, because
 	// SART weights with ray length.
 	bindProjDataTexture(D_projData, projPitch, dims.iProjDets, 1, cudaAddressModeClamp);
 
-	float angle_sin = sinf(angles[angle]);
-	float angle_cos = cosf(angles[angle]);
-
-	float offset = 0.0f;
-	if (TOffsets)
-		offset = TOffsets[angle];
+	double d = angles[angle].fDetUX * angles[angle].fRayY - angles[angle].fDetUY * angles[angle].fRayX;
+	float angle_scaled_cos = angles[angle].fRayY / d;
+	float angle_scaled_sin = -angles[angle].fRayX / d; // TODO: Check signs
+	float angle_offset = (angles[angle].fDetSY * angles[angle].fRayX - angles[angle].fDetSX * angles[angle].fRayY) / d;
 
 	dim3 dimBlock(g_blockSlices, g_blockSliceSize);
 	dim3 dimGrid((dims.iVolWidth+g_blockSlices-1)/g_blockSlices,
 	             (dims.iVolHeight+g_blockSliceSize-1)/g_blockSliceSize);
 
-	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, offset, angle_sin, angle_cos, dims, fOutputScale);
+	devBP_SART<<<dimGrid, dimBlock>>>(D_volumeData, volumePitch, angle_offset, angle_scaled_sin, angle_scaled_cos, dims, fOutputScale);
 	cudaThreadSynchronize();
 
 	cudaTextForceKernelsCompletion();
diff --git a/cuda/2d/par_bp.h b/cuda/2d/par_bp.h
index 64bcd34..102cb2b 100644
--- a/cuda/2d/par_bp.h
+++ b/cuda/2d/par_bp.h
@@ -35,13 +35,13 @@ namespace astraCUDA {
 
 _AstraExport bool BP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles,
-        const float* TOffsets, float fOutputScale);
+        const SDimensions& dims, const SParProjection* angles,
+        float fOutputScale);
 
 _AstraExport bool BP_SART(float* D_volumeData, unsigned int volumePitch,
              float* D_projData, unsigned int projPitch,
              unsigned int angle, const SDimensions& dims,
-             const float* angles, const float* TOffsets, float fOutputScale);
+             const SParProjection* angles, float fOutputScale);
 
 }
 
diff --git a/cuda/2d/par_fp.cu b/cuda/2d/par_fp.cu
index bb8b909..3c010b3 100644
--- a/cuda/2d/par_fp.cu
+++ b/cuda/2d/par_fp.cu
@@ -30,6 +30,7 @@ $Id$
 #include <cassert>
 #include <iostream>
 #include <list>
+#include <cmath>
 
 #include "util.h"
 #include "arith.h"
@@ -51,6 +52,7 @@ namespace astraCUDA {
 static const unsigned g_MaxAngles = 2560;
 __constant__ float gC_angle[g_MaxAngles];
 __constant__ float gC_angle_offset[g_MaxAngles];
+__constant__ float gC_angle_detsize[g_MaxAngles];
 
 
 // optimization parameters
@@ -63,10 +65,6 @@ static const unsigned int g_blockSlices = 64;
 #define iPREC_FACTOR 16
 
 
-// if necessary, a buffer of zeroes of size g_MaxAngles
-static float* g_pfZeroes = 0;
-
-
 static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned int pitch, unsigned int width, unsigned int height)
 {
 	cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
@@ -89,9 +87,10 @@ static bool bindVolumeDataTexture(float* data, cudaArray*& dataArray, unsigned i
 	return true;
 }
 
+
 // projection for angles that are roughly horizontal
-// theta between 45 and 135 degrees
-__global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale)
+// (detector roughly vertical)
+__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
 {
 	const int relDet = threadIdx.x;
 	const int relAngle = threadIdx.y;
@@ -106,33 +105,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
 	const float sin_theta = __sinf(theta);
 
 	// compute start detector for this block/angle:
-	// (The same for all threadIdx.x)
-	// -------------------------------------
-
-	const int midSlice = startSlice + g_blockSlices / 2;
-
-	// ASSUMPTION: fDetScale >= 1.0f
-	// problem: detector regions get skipped because slice blocks aren't large
-	// enough
-	const unsigned int g_blockSliceSize = g_detBlockSize;
-
-	// project (midSlice,midRegion) on this thread's detector
-
-	const float fBase = 0.5f*dims.iProjDets - 0.5f +
-		(
-		    (midSlice - 0.5f*dims.iVolWidth + 0.5f) * cos_theta
-		  - (g_blockSliceSize/2 - 0.5f*dims.iVolHeight + 0.5f) * sin_theta
-		  + gC_angle_offset[angle]
-		) / dims.fDetScale;
-	int iBase = (int)floorf(fBase * fPREC_FACTOR);
-	int iInc = (int)floorf(g_blockSliceSize * sin_theta / dims.fDetScale * -fPREC_FACTOR);
-
-	// ASSUMPTION: 16 > regionOffset / fDetScale
-	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset - 1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-
-	if (blockIdx.y > 0 && detRegion == detPrevRegion)
-		return;
+	const int detRegion = blockIdx.y;
 
 	const int detector = detRegion * g_detBlockSize + relDet;
 
@@ -142,7 +115,7 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
 	if (detector < 0 || detector >= dims.iProjDets)
 		return;
 
-	const float fDetStep = -dims.fDetScale / sin_theta;
+	const float fDetStep = -gC_angle_detsize[angle] / sin_theta;
 	float fSliceStep = cos_theta / sin_theta;
 	float fDistCorr;
 	if (sin_theta > 0.0f)
@@ -192,9 +165,10 @@ __global__ void FPhorizontal(float* D_projData, unsigned int projPitch, unsigned
 	D_projData[angle*projPitch+detector] += fVal * fDistCorr;
 }
 
+
 // projection for angles that are roughly vertical
-// theta between 0 and 45, or 135 and 180 degrees
-__global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, int regionOffset, const SDimensions dims, float outputScale)
+// (detector roughly horizontal)
+__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
 {
 	const int relDet = threadIdx.x;
 	const int relAngle = threadIdx.y;
@@ -209,33 +183,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
 	const float sin_theta = __sinf(theta);
 
 	// compute start detector for this block/angle:
-	// (The same for all threadIdx.x)
-	// -------------------------------------
-
-	const int midSlice = startSlice + g_blockSlices / 2;
-
-	// project (midSlice,midRegion) on this thread's detector
-
-	// ASSUMPTION: fDetScale >= 1.0f
-	// problem: detector regions get skipped because slice blocks aren't large
-	// enough
-	const unsigned int g_blockSliceSize = g_detBlockSize;
-
-	const float fBase = 0.5f*dims.iProjDets - 0.5f +
-		(
-		    (g_blockSliceSize/2 - 0.5f*dims.iVolWidth + 0.5f) * cos_theta
-		  - (midSlice - 0.5f*dims.iVolHeight + 0.5f) * sin_theta
-		  + gC_angle_offset[angle]
-		) / dims.fDetScale;
-	int iBase = (int)floorf(fBase * fPREC_FACTOR);
-	int iInc = (int)floorf(g_blockSliceSize * cos_theta / dims.fDetScale * fPREC_FACTOR);
-
-	// ASSUMPTION: 16 > regionOffset / fDetScale
-	const int detRegion = (iBase + (blockIdx.y - regionOffset)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-	const int detPrevRegion = (iBase + (blockIdx.y - regionOffset-1)*iInc + 16*iPREC_FACTOR*g_detBlockSize) / (iPREC_FACTOR * g_detBlockSize) - 16;
-
-	if (blockIdx.y > 0 && detRegion == detPrevRegion)
-		return;
+	const int detRegion = blockIdx.y;
 
 	const int detector = detRegion * g_detBlockSize + relDet;
 
@@ -245,7 +193,7 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
 	if (detector < 0 || detector >= dims.iProjDets)
 		return;
 
-	const float fDetStep = dims.fDetScale / cos_theta;
+	const float fDetStep = gC_angle_detsize[angle] / cos_theta;
 	float fSliceStep = sin_theta / cos_theta;
 	float fDistCorr;
 	if (cos_theta < 0.0f)
@@ -293,183 +241,67 @@ __global__ void FPvertical(float* D_projData, unsigned int projPitch, unsigned i
 	D_projData[angle*projPitch+detector] += fVal * fDistCorr;
 }
 
-// projection for angles that are roughly horizontal
-// (detector roughly vertical)
-__global__ void FPhorizontal_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
-{
-	const int relDet = threadIdx.x;
-	const int relAngle = threadIdx.y;
-
-	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle;
-
-	if (angle >= endAngle)
-		return;
-
-	const float theta = gC_angle[angle];
-	const float cos_theta = __cosf(theta);
-	const float sin_theta = __sinf(theta);
-
-	// compute start detector for this block/angle:
-	const int detRegion = blockIdx.y;
-
-	const int detector = detRegion * g_detBlockSize + relDet;
-
-	// Now project the part of the ray to angle,detector through
-	// slices startSlice to startSlice+g_blockSlices-1
-
-	if (detector < 0 || detector >= dims.iProjDets)
-		return;
-
-	const float fDetStep = -dims.fDetScale / sin_theta;
-	float fSliceStep = cos_theta / sin_theta;
-	float fDistCorr;
-	if (sin_theta > 0.0f)
-		fDistCorr = -fDetStep;
-	else
-		fDistCorr = fDetStep;
-	fDistCorr *= outputScale;
-
-	float fVal = 0.0f;
-	// project detector on slice
-	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolWidth + 0.5f) * fSliceStep + 0.5f*dims.iVolHeight - 0.5f + 0.5f;
-	float fS = startSlice + 0.5f;
-	int endSlice = startSlice + g_blockSlices;
-	if (endSlice > dims.iVolWidth)
-		endSlice = dims.iVolWidth;
 
-	if (dims.iRaysPerDet > 1) {
 
-		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep;
-		const float fSubDetStep = fDetStep / dims.iRaysPerDet;
-		fDistCorr /= dims.iRaysPerDet;
 
-		fSliceStep -= dims.iRaysPerDet * fSubDetStep;
+// Coordinates of center of detector pixel number t:
+// x = (t - 0.5*nDets + 0.5 - fOffset) * fSize * cos(fAngle)
+// y = - (t - 0.5*nDets + 0.5 - fOffset) * fSize * sin(fAngle)
 
-		for (int slice = startSlice; slice < endSlice; ++slice)
-		{
-			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) {
-				fVal += tex2D(gT_volumeTexture, fS, fP);
-				fP += fSubDetStep;
-			}
-			fP += fSliceStep;
-			fS += 1.0f;
-		}
-
-	} else {
 
-		for (int slice = startSlice; slice < endSlice; ++slice)
-		{
-			fVal += tex2D(gT_volumeTexture, fS, fP);
-			fP += fSliceStep;
-			fS += 1.0f;
-		}
-
-
-	}
-
-	D_projData[angle*projPitch+detector] += fVal * fDistCorr;
-}
-
-
-// projection for angles that are roughly vertical
-// (detector roughly horizontal)
-__global__ void FPvertical_simple(float* D_projData, unsigned int projPitch, unsigned int startSlice, unsigned int startAngle, unsigned int endAngle, const SDimensions dims, float outputScale)
+static void convertAndUploadAngles(const SParProjection *projs, unsigned int nth, unsigned int ndets)
 {
-	const int relDet = threadIdx.x;
-	const int relAngle = threadIdx.y;
-
-	int angle = startAngle + blockIdx.x * g_anglesPerBlock + relAngle;
-
-	if (angle >= endAngle)
-		return;
-
-	const float theta = gC_angle[angle];
-	const float cos_theta = __cosf(theta);
-	const float sin_theta = __sinf(theta);
-
-	// compute start detector for this block/angle:
-	const int detRegion = blockIdx.y;
-
-	const int detector = detRegion * g_detBlockSize + relDet;
+	float *angles = new float[nth];
+	float *offset = new float[nth];
+	float *detsizes = new float[nth];
 
-	// Now project the part of the ray to angle,detector through
-	// slices startSlice to startSlice+g_blockSlices-1
+	for (int i = 0; i < nth; ++i) {
 
-	if (detector < 0 || detector >= dims.iProjDets)
-		return;
+#warning TODO: Replace by getParParamaters
 
-	const float fDetStep = dims.fDetScale / cos_theta;
-	float fSliceStep = sin_theta / cos_theta;
-	float fDistCorr;
-	if (cos_theta < 0.0f)
-		fDistCorr = -fDetStep;
-	else
-		fDistCorr = fDetStep;
-	fDistCorr *= outputScale;
+		// Take part of DetU orthogonal to Ray
+		double ux = projs[i].fDetUX;
+		double uy = projs[i].fDetUY;
 
-	float fVal = 0.0f;
-	float fP = (detector - 0.5f*dims.iProjDets + 0.5f - gC_angle_offset[angle]) * fDetStep + (startSlice - 0.5f*dims.iVolHeight + 0.5f) * fSliceStep + 0.5f*dims.iVolWidth - 0.5f + 0.5f;
-	float fS = startSlice+0.5f;
-	int endSlice = startSlice + g_blockSlices;
-	if (endSlice > dims.iVolHeight)
-		endSlice = dims.iVolHeight;
+		double t = (ux * projs[i].fRayX + uy * projs[i].fRayY) / (projs[i].fRayX * projs[i].fRayX + projs[i].fRayY * projs[i].fRayY);
 
-	if (dims.iRaysPerDet > 1) {
+		ux -= t * projs[i].fRayX;
+		uy -= t * projs[i].fRayY;
 
-		fP += (-0.5f*dims.iRaysPerDet + 0.5f)/dims.iRaysPerDet * fDetStep;
-		const float fSubDetStep = fDetStep / dims.iRaysPerDet;
-		fDistCorr /= dims.iRaysPerDet;
+		double angle = atan2(uy, ux);
 
-		fSliceStep -= dims.iRaysPerDet * fSubDetStep;
+		angles[i] = angle;
 
-		for (int slice = startSlice; slice < endSlice; ++slice)
-		{
-			for (int iSubT = 0; iSubT < dims.iRaysPerDet; ++iSubT) {
-				fVal += tex2D(gT_volumeTexture, fP, fS);
-				fP += fSubDetStep;
-			}
-			fP += fSliceStep;
-			fS += 1.0f;
-		}
+		double norm2 = uy * uy + ux * ux;
 
-	} else {
-
-		for (int slice = startSlice; slice < endSlice; ++slice)
-		{
-			fVal += tex2D(gT_volumeTexture, fP, fS);
-			fP += fSliceStep;
-			fS += 1.0f;
-		}
+		detsizes[i] = sqrt(norm2);
 
+		// CHECKME: SIGNS?
+		offset[i] = -0.5*ndets - (projs[i].fDetSY*uy + projs[i].fDetSX*ux) / norm2;
 	}
 
-	D_projData[angle*projPitch+detector] += fVal * fDistCorr;
+	cudaMemcpyToSymbol(gC_angle, angles, nth*sizeof(float), 0, cudaMemcpyHostToDevice); 
+	cudaMemcpyToSymbol(gC_angle_offset, offset, nth*sizeof(float), 0, cudaMemcpyHostToDevice); 
+	cudaMemcpyToSymbol(gC_angle_detsize, detsizes, nth*sizeof(float), 0, cudaMemcpyHostToDevice); 
 }
 
 
 
 bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
                float* D_projData, unsigned int projPitch,
-               const SDimensions& dims, const float* angles,
-               const float* TOffsets, float outputScale)
+               const SDimensions& dims, const SParProjection* angles,
+               float outputScale)
 {
-	// TODO: load angles into constant memory in smaller blocks
 	assert(dims.iProjAngles <= g_MaxAngles);
 
+	assert(angles);
+
 	cudaArray* D_dataArray;
 	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight);
 
-	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
 
-	if (TOffsets) {
-		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-	} else {
-		if (!g_pfZeroes) {
-			g_pfZeroes = new float[g_MaxAngles];
-			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float));
-		}
-		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-	}
+	convertAndUploadAngles(angles, dims.iProjAngles, dims.iProjDets);
+
 
 	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // detector block size, angles
 
@@ -492,7 +324,7 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
 		// Maybe we should detect corner cases and put them in the optimal
 		// group of angles.
 		if (a != dims.iProjAngles)
-			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a])));
+			vertical = (fabsf(angles[a].fRayX) <= fabsf(angles[a].fRayY));
 		if (a == dims.iProjAngles || vertical != blockVertical) {
 			// block done
 
@@ -536,8 +368,8 @@ bool FP_simple_internal(float* D_volumeData, unsigned int volumePitch,
 
 bool FP_simple(float* D_volumeData, unsigned int volumePitch,
                float* D_projData, unsigned int projPitch,
-               const SDimensions& dims, const float* angles,
-               const float* TOffsets, float outputScale)
+               const SDimensions& dims, const SParProjection* angles,
+               float outputScale)
 {
 	for (unsigned int iAngle = 0; iAngle < dims.iProjAngles; iAngle += g_MaxAngles) {
 		SDimensions subdims = dims;
@@ -550,7 +382,7 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,
 		ret = FP_simple_internal(D_volumeData, volumePitch,
 		                         D_projData + iAngle * projPitch, projPitch,
 		                         subdims, angles + iAngle,
-		                         TOffsets ? TOffsets + iAngle : 0, outputScale);
+		                         outputScale);
 		if (!ret)
 			return false;
 	}
@@ -559,106 +391,12 @@ bool FP_simple(float* D_volumeData, unsigned int volumePitch,
 
 bool FP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles,
-        const float* TOffsets, float outputScale)
+        const SDimensions& dims, const SParProjection* angles,
+        float outputScale)
 {
 	return FP_simple(D_volumeData, volumePitch, D_projData, projPitch,
-	                 dims, angles, TOffsets, outputScale);
-
-	// TODO: Fix bug in this non-simple FP with large detscale and TOffsets
-#if 0
-
-	// TODO: load angles into constant memory in smaller blocks
-	assert(dims.iProjAngles <= g_MaxAngles);
-
-	// TODO: compute region size dynamically to resolve these two assumptions
-	// ASSUMPTION: 16 > regionOffset / fDetScale
-	const unsigned int g_blockSliceSize = g_detBlockSize;
-	assert(16 > (g_blockSlices / g_blockSliceSize) / dims.fDetScale);
-	// ASSUMPTION: fDetScale >= 1.0f
-	assert(dims.fDetScale > 0.9999f);
-
-	cudaArray* D_dataArray;
-	bindVolumeDataTexture(D_volumeData, D_dataArray, volumePitch, dims.iVolWidth, dims.iVolHeight);
-
-	cudaMemcpyToSymbol(gC_angle, angles, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-
-	if (TOffsets) {
-		cudaMemcpyToSymbol(gC_angle_offset, TOffsets, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-	} else {
-		if (!g_pfZeroes) {
-			g_pfZeroes = new float[g_MaxAngles];
-			memset(g_pfZeroes, 0, g_MaxAngles * sizeof(float));
-		}
-		cudaMemcpyToSymbol(gC_angle_offset, g_pfZeroes, dims.iProjAngles*sizeof(float), 0, cudaMemcpyHostToDevice);
-	}
-
-	int regionOffset = g_blockSlices / g_blockSliceSize;
-
-	dim3 dimBlock(g_detBlockSize, g_anglesPerBlock); // region size, angles
-
-	std::list<cudaStream_t> streams;
-
+	                 dims, angles, outputScale);
 
-	// Run over all angles, grouping them into groups of the same
-	// orientation (roughly horizontal vs. roughly vertical).
-	// Start a stream of grids for each such group.
-
-	// TODO: Check if it's worth it to store this info instead
-	// of recomputing it every FP.
-
-	unsigned int blockStart = 0;
-	unsigned int blockEnd = 0;
-	bool blockVertical = false;
-	for (unsigned int a = 0; a <= dims.iProjAngles; ++a) {
-		bool vertical;
-		// TODO: Having <= instead of < below causes a 5% speedup.
-		// Maybe we should detect corner cases and put them in the optimal
-		// group of angles.
-		if (a != dims.iProjAngles)
-			vertical = (fabsf(sinf(angles[a])) <= fabsf(cosf(angles[a])));
-		if (a == dims.iProjAngles || vertical != blockVertical) {
-			// block done
-
-			blockEnd = a;
-			if (blockStart != blockEnd) {
-				unsigned int length = dims.iVolHeight;
-				if (blockVertical)
-					length = dims.iVolWidth;
-				dim3 dimGrid((blockEnd-blockStart+g_anglesPerBlock-1)/g_anglesPerBlock,
-				             (length+g_blockSliceSize-1)/g_blockSliceSize+2*regionOffset); // angle blocks, regions
-				// TODO: check if we can't immediately
-				//       destroy the stream after use
-				cudaStream_t stream;
-				cudaStreamCreate(&stream);
-				streams.push_back(stream);
-				//printf("angle block: %d to %d, %d\n", blockStart, blockEnd, blockVertical);
-				if (!blockVertical)
-					for (unsigned int i = 0; i < dims.iVolWidth; i += g_blockSlices)
-						FPhorizontal<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale);
-				else
-					for (unsigned int i = 0; i < dims.iVolHeight; i += g_blockSlices)
-						FPvertical<<<dimGrid, dimBlock, 0, stream>>>(D_projData, projPitch, i, blockStart, blockEnd, regionOffset, dims, outputScale);
-			}
-			blockVertical = vertical;
-			blockStart = a;
-		}
-	}
-
-	for (std::list<cudaStream_t>::iterator iter = streams.begin(); iter != streams.end(); ++iter)
-		cudaStreamDestroy(*iter);
-
-	streams.clear();
-
-	cudaThreadSynchronize();
-
-	cudaTextForceKernelsCompletion();
-
-	cudaFreeArray(D_dataArray);
-		
-
-	return true;
-#endif
 }
 
 
diff --git a/cuda/2d/par_fp.h b/cuda/2d/par_fp.h
index 010d064..5fd593c 100644
--- a/cuda/2d/par_fp.h
+++ b/cuda/2d/par_fp.h
@@ -33,8 +33,8 @@ namespace astraCUDA {
 
 _AstraExport bool FP(float* D_volumeData, unsigned int volumePitch,
         float* D_projData, unsigned int projPitch,
-        const SDimensions& dims, const float* angles,
-        const float* TOffsets, float fOutputScale);
+        const SDimensions& dims, const SParProjection* angles,
+        float fOutputScale);
 
 }
 
diff --git a/cuda/2d/sart.cu b/cuda/2d/sart.cu
index c8608a3..a1085cd 100644
--- a/cuda/2d/sart.cu
+++ b/cuda/2d/sart.cu
@@ -254,10 +254,10 @@ bool SART::callFP_SART(float* D_volumeData, unsigned int volumePitch,
 {
 	SDimensions d = dims;
 	d.iProjAngles = 1;
-	if (angles) {
+	if (parProjs) {
 		assert(!fanProjs);
 		return FP(D_volumeData, volumePitch, D_projData, projPitch,
-		          d, &angles[angle], TOffsets, outputScale);
+		          d, &parProjs[angle], outputScale);
 	} else {
 		assert(fanProjs);
 		return FanFP(D_volumeData, volumePitch, D_projData, projPitch,
@@ -269,10 +269,10 @@ bool SART::callBP_SART(float* D_volumeData, unsigned int volumePitch,
                        float* D_projData, unsigned int projPitch,
                        unsigned int angle, float outputScale)
 {
-	if (angles) {
+	if (parProjs) {
 		assert(!fanProjs);
 		return BP_SART(D_volumeData, volumePitch, D_projData, projPitch,
-		               angle, dims, angles, TOffsets, outputScale);
+		               angle, dims, parProjs, outputScale);
 	} else {
 		assert(fanProjs);
 		return FanBP_SART(D_volumeData, volumePitch, D_projData, projPitch,
diff --git a/cuda/2d/sirt.cu b/cuda/2d/sirt.cu
index 4baaccb..9dc4f64 100644
--- a/cuda/2d/sirt.cu
+++ b/cuda/2d/sirt.cu
@@ -152,7 +152,7 @@ bool SIRT::precomputeWeights()
 bool SIRT::doSlabCorrections()
 {
 	// This function compensates for effectively infinitely large slab-like
-	// objects of finite thickness 1.
+	// objects of finite thickness 1 in a parallel beam geometry.
 
 	// Each ray through the object has an intersection of length d/cos(alpha).
 	// The length of the ray actually intersecting the reconstruction volume is
@@ -170,6 +170,10 @@ bool SIRT::doSlabCorrections()
 	if (useVolumeMask || useSinogramMask)
 		return false;
 
+	// Parallel-beam only
+	if (!parProjs)
+		return false;
+
 	// multiply by line weights
 	processSino<opDiv>(D_sinoData, D_lineWeight, projPitch, dims);
 
@@ -181,7 +185,10 @@ bool SIRT::doSlabCorrections()
 	float bound = cosf(1.3963f);
 	float* t = (float*)D_sinoData;
 	for (int i = 0; i < dims.iProjAngles; ++i) {
-		float f = fabs(cosf(angles[i]));
+		// TODO: Checkme
+		// TODO: Replace by getParParameters
+		double angle = atan2(parProjs[i].fRayX, -parProjs[i].fRayY);
+		float f = fabs(cosf(angle));
 
 		if (f < bound)
 			f = bound;
@@ -189,7 +196,6 @@ bool SIRT::doSlabCorrections()
 		processSino<opMul>(t, f, sinoPitch, subdims);
 		t += sinoPitch;
 	}
-
 	return true;
 }
 
@@ -299,40 +305,6 @@ float SIRT::computeDiffNorm()
 }
 
 
-bool doSIRT(float* D_volumeData, unsigned int volumePitch,
-            float* D_sinoData, unsigned int sinoPitch,
-            float* D_maskData, unsigned int maskPitch,
-            const SDimensions& dims, const float* angles,
-            const float* TOffsets, unsigned int iterations)
-{
-	SIRT sirt;
-	bool ok = true;
-
-	ok &= sirt.setGeometry(dims, angles);
-	if (D_maskData)
-		ok &= sirt.enableVolumeMask();
-	if (TOffsets)
-		ok &= sirt.setTOffsets(TOffsets);
-
-	if (!ok)
-		return false;
-
-	ok = sirt.init();
-	if (!ok)
-		return false;
-
-	if (D_maskData)
-		ok &= sirt.setVolumeMask(D_maskData, maskPitch);
-
-	ok &= sirt.setBuffers(D_volumeData, volumePitch, D_sinoData, sinoPitch);
-	if (!ok)
-		return false;
-
-	ok = sirt.iterate(iterations);
-
-	return ok;
-}
-
 }
 
 #ifdef STANDALONE
diff --git a/cuda/3d/fdk.cu b/cuda/3d/fdk.cu
index 0e13be1..e7418a2 100644
--- a/cuda/3d/fdk.cu
+++ b/cuda/3d/fdk.cu
@@ -353,7 +353,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
 	// The filtering is a regular ramp filter per detector line.
 
 	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
-	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+	int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);
 	int projPitch = D_projData.pitch/sizeof(float);
 	
 
@@ -361,22 +361,22 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
 	float* D_sinoData = (float*)D_projData.ptr;
 
 	cufftComplex * D_sinoFFT = NULL;
-	allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT);
+	astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_sinoFFT);
 
 	bool ok = true;
 
 	for (int v = 0; v < dims.iProjV; ++v) {
 
-		ok = runCudaFFT(dims.iProjAngles, D_sinoData, projPitch,
+		ok = astraCUDA::runCudaFFT(dims.iProjAngles, D_sinoData, projPitch,
 		                dims.iProjU, iPaddedDetCount, iHalfFFTSize,
 		                D_sinoFFT);
 
 		if (!ok) break;
 
-		applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter);
+		astraCUDA::applyFilter(dims.iProjAngles, iHalfFFTSize, D_sinoFFT, D_filter);
 
 
-		ok = runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch,
+		ok = astraCUDA::runCudaIFFT(dims.iProjAngles, D_sinoFFT, D_sinoData, projPitch,
 		                 dims.iProjU, iPaddedDetCount, iHalfFFTSize);
 
 		if (!ok) break;
@@ -384,7 +384,7 @@ bool FDK_Filter(cudaPitchedPtr D_projData,
 		D_sinoData += (dims.iProjAngles * projPitch);
 	}
 
-	freeComplexOnDevice(D_sinoFFT);
+	astraCUDA::freeComplexOnDevice(D_sinoFFT);
 
 	return ok;
 }
@@ -401,7 +401,7 @@ bool FDK(cudaPitchedPtr D_volumeData,
 	// TODO: Check errors
 	cufftComplex * D_filter;
 	int iPaddedDetCount = calcNextPowerOfTwo(2 * dims.iProjU);
-	int iHalfFFTSize = calcFFTFourSize(iPaddedDetCount);
+	int iHalfFFTSize = astraCUDA::calcFFTFourierSize(iPaddedDetCount);
 
 	ok = FDK_PreWeight(D_projData, fSrcOrigin, fDetOrigin,
 	                fSrcZ, fDetZ, fDetUSize, fDetVSize,
@@ -412,11 +412,11 @@ bool FDK(cudaPitchedPtr D_volumeData,
 	cufftComplex *pHostFilter = new cufftComplex[dims.iProjAngles * iHalfFFTSize];
 	memset(pHostFilter, 0, sizeof(cufftComplex) * dims.iProjAngles * iHalfFFTSize);
 
-	genFilter(FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
+	astraCUDA::genFilter(astra::FILTER_RAMLAK, 1.0f, dims.iProjAngles, pHostFilter, iPaddedDetCount, iHalfFFTSize);
 
 
-	allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter);
-	uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter);
+	astraCUDA::allocateComplexOnDevice(dims.iProjAngles, iHalfFFTSize, &D_filter);
+	astraCUDA::uploadComplexArrayToDevice(dims.iProjAngles, iHalfFFTSize, pHostFilter, D_filter);
 
 	delete [] pHostFilter;
 
@@ -430,7 +430,7 @@ bool FDK(cudaPitchedPtr D_volumeData,
 	                bShortScan, dims, angles);
 
 	// Clean up filter
-	freeComplexOnDevice(D_filter);
+	astraCUDA::freeComplexOnDevice(D_filter);
 
 
 	if (!ok)
diff --git a/include/astra/CudaFilteredBackProjectionAlgorithm.h b/include/astra/CudaFilteredBackProjectionAlgorithm.h
index cf1f19f..180e001 100644
--- a/include/astra/CudaFilteredBackProjectionAlgorithm.h
+++ b/include/astra/CudaFilteredBackProjectionAlgorithm.h
@@ -26,28 +26,24 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
 $Id$
 */
 
-#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM2_H
-#define CUDAFILTEREDBACKPROJECTIONALGORITHM2_H
+#ifndef CUDAFILTEREDBACKPROJECTIONALGORITHM_H
+#define CUDAFILTEREDBACKPROJECTIONALGORITHM_H
 
 #include <astra/Float32ProjectionData2D.h>
 #include <astra/Float32VolumeData2D.h>
-#include <astra/ReconstructionAlgorithm2D.h>
+#include <astra/CudaReconstructionAlgorithm2D.h>
 
 #include "../../cuda/2d/astra.h"
 
 namespace astra
 {
 
-class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CReconstructionAlgorithm2D
+class _AstraExport CCudaFilteredBackProjectionAlgorithm : public CCudaReconstructionAlgorithm2D
 {
 public:
 	static std::string type;
 
 private:
-	CFloat32ProjectionData2D * m_pSinogram;
-	CFloat32VolumeData2D * m_pReconstruction;
-	int m_iGPUIndex;
-	int m_iPixelSuperSampling;
 	E_FBPFILTER m_eFilter;
 	float * m_pfFilter;
 	int m_iFilterWidth;	// number of elements per projection direction in filter
@@ -64,15 +60,6 @@ public:
 	virtual bool initialize(const Config& _cfg);
 	bool initialize(CFloat32ProjectionData2D * _pSinogram, CFloat32VolumeData2D * _pReconstruction, E_FBPFILTER _eFilter, const float * _pfFilter = NULL, int _iFilterWidth = 0, int _iGPUIndex = -1, float _fFilterParameter = -1.0f);
 
-	virtual void run(int _iNrIterations = 0);
-
-	static int calcIdealRealFilterWidth(int _iDetectorCount);
-	static int calcIdealFourierFilterWidth(int _iDetectorCount);
-	
-	//debug
-	static void testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount);
-	static int getGPUCount();
-
 	/** Get a description of the class.
 	 *
 	 * @return description string
@@ -82,12 +69,7 @@ public:
 protected:
 	bool check();
 
-	AstraFBP* m_pFBP;
-
-	bool m_bAstraFBPInit;
-
-	void initializeFromProjector();
-	virtual bool requiresProjector() const { return false; }
+	virtual void initCUDAAlgorithm();
 };
 
 // inline functions
@@ -95,4 +77,4 @@ inline std::string CCudaFilteredBackProjectionAlgorithm::description() const { r
 
 }
 
-#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM2_H */
+#endif /* CUDAFILTEREDBACKPROJECTIONALGORITHM_H */
diff --git a/include/astra/GeometryUtil2D.h b/include/astra/GeometryUtil2D.h
index 680cecd..2f03062 100644
--- a/include/astra/GeometryUtil2D.h
+++ b/include/astra/GeometryUtil2D.h
@@ -32,27 +32,72 @@ $Id$
 namespace astra {
 
 struct SParProjection {
-    // the ray direction
-    float fRayX, fRayY;
+	// the ray direction
+	float fRayX, fRayY;
+
+	// the start of the (linear) detector
+	float fDetSX, fDetSY;
+
+	// the length of a single detector pixel
+	float fDetUX, fDetUY;
+
+
+	void translate(double dx, double dy) {
+		fDetSX += dx;
+		fDetSY += dy;
+	}
+	void scale(double factor) {
+		fRayX *= factor;
+		fRayY *= factor;
+		fDetSX *= factor;
+		fDetSY *= factor;
+		fDetUX *= factor;
+		fDetUY *= factor;
+	}
+};
 
-    // the start of the (linear) detector
-    float fDetSX, fDetSY;
 
-    // the length of a single detector pixel
-    float fDetUX, fDetUY;
+struct SFanProjection {
+	// the source
+	float fSrcX, fSrcY;
+
+	// the start of the (linear) detector
+	float fDetSX, fDetSY;
+
+	// the length of a single detector pixel
+	float fDetUX, fDetUY;
+
+	void translate(double dx, double dy) {
+		fSrcX += dx;
+		fSrcY += dy;
+		fDetSX += dx;
+		fDetSY += dy;
+	}
+	void scale(double factor) {
+		fSrcX *= factor;
+		fSrcY *= factor;
+		fDetSX *= factor;
+		fDetSY *= factor;
+		fDetUX *= factor;
+		fDetUY *= factor;
+	}
 };
 
 
-struct SFanProjection {
-        // the source
-        float fSrcX, fSrcY;
 
-        // the start of the (linear) detector
-        float fDetSX, fDetSY;
+SParProjection* genParProjections(unsigned int iProjAngles,
+                                  unsigned int iProjDets,
+                                  double fDetSize,
+                                  const float *pfAngles,
+                                  const float *pfExtraOffsets);
 
-        // the length of a single detector pixel
-        float fDetUX, fDetUY;
-};
+SFanProjection* genFanProjections(unsigned int iProjAngles,
+                                  unsigned int iProjDets,
+                                  double fOriginSource, double fOriginDetector,
+                                  double fDetSize,
+                                  const float *pfAngles);
+
+bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset);
 
 }
 
diff --git a/include/astra/ParallelProjectionGeometry2D.h b/include/astra/ParallelProjectionGeometry2D.h
index 36b4b6f..ca67ba7 100644
--- a/include/astra/ParallelProjectionGeometry2D.h
+++ b/include/astra/ParallelProjectionGeometry2D.h
@@ -84,8 +84,7 @@ public:
 	CParallelProjectionGeometry2D(int _iProjectionAngleCount, 
 								  int _iDetectorCount, 
 								  float32 _fDetectorWidth, 
-								  const float32* _pfProjectionAngles,
-								  const float32* _pfExtraDetectorOffsets = 0);
+								  const float32* _pfProjectionAngles);
 
 	/** Copy constructor. 
 	 */
@@ -117,8 +116,7 @@ public:
 	bool initialize(int _iProjectionAngleCount, 
 					int _iDetectorCount, 
 					float32 _fDetectorWidth, 
-					const float32* _pfProjectionAngles,
-					const float32* _pfExtraDetectorOffsets = 0);
+					const float32* _pfProjectionAngles);
 
 	/** Create a hard copy. 
 	*/
diff --git a/include/astra/ProjectionGeometry2D.h b/include/astra/ProjectionGeometry2D.h
index d26e6a7..b656d97 100644
--- a/include/astra/ProjectionGeometry2D.h
+++ b/include/astra/ProjectionGeometry2D.h
@@ -90,8 +90,7 @@ protected:
 	CProjectionGeometry2D(int _iProjectionAngleCount, 
 						  int _iDetectorCount, 
 						  float32 _fDetectorWidth, 
-						  const float32* _pfProjectionAngles,
-						  const float32* _pfExtraDetectorOffsets = 0);
+						  const float32* _pfProjectionAngles);
 
 	/** Copy constructor. 
 	 */
@@ -117,8 +116,7 @@ protected:
 	bool _initialize(int _iProjectionAngleCount,
 					 int _iDetectorCount,
 					 float32 _fDetectorWidth,
-					 const float32* _pfProjectionAngles,
-					 const float32* _pfExtraDetectorOffsets = 0);
+					 const float32* _pfProjectionAngles);
 
 public:
 
diff --git a/samples/matlab/s014_FBP.m b/samples/matlab/s014_FBP.m
index b73149c..faf91fb 100644
--- a/samples/matlab/s014_FBP.m
+++ b/samples/matlab/s014_FBP.m
@@ -9,7 +9,7 @@
 % -----------------------------------------------------------------------
 
 vol_geom = astra_create_vol_geom(256, 256);
-proj_geom = astra_create_proj_geom('parallel', 1.0, 384, linspace2(0,pi,180));
+proj_geom = astra_create_proj_geom('fanflat', 1.0, 384, linspace2(0,2*pi,1800), 500, 0);
 
 % As before, create a sinogram from a phantom
 P = phantom(256);
diff --git a/src/CudaFilteredBackProjectionAlgorithm.cpp b/src/CudaFilteredBackProjectionAlgorithm.cpp
index aa97eec..9d23253 100644
--- a/src/CudaFilteredBackProjectionAlgorithm.cpp
+++ b/src/CudaFilteredBackProjectionAlgorithm.cpp
@@ -33,6 +33,7 @@ $Id$
 #include "astra/AstraObjectManager.h"
 #include "astra/CudaProjector2D.h"
 #include "../cuda/2d/astra.h"
+#include "../cuda/2d/fbp.h"
 
 #include "astra/Logging.h"
 
@@ -44,8 +45,7 @@ string CCudaFilteredBackProjectionAlgorithm::type = "FBP_CUDA";
 CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm()
 {
 	m_bIsInitialized = false;
-	CReconstructionAlgorithm2D::_clear();
-	m_pFBP = 0;
+	CCudaReconstructionAlgorithm2D::_clear();
 	m_pfFilter = NULL;
 	m_fFilterParameter = -1.0f;
 	m_fFilterD = 1.0f;
@@ -53,35 +53,8 @@ CCudaFilteredBackProjectionAlgorithm::CCudaFilteredBackProjectionAlgorithm()
 
 CCudaFilteredBackProjectionAlgorithm::~CCudaFilteredBackProjectionAlgorithm()
 {
-	if(m_pfFilter != NULL)
-	{
-		delete [] m_pfFilter;
-		m_pfFilter = NULL;
-	}
-
-	if(m_pFBP != NULL)
-	{
-		delete m_pFBP;
-		m_pFBP = NULL;
-	}
-}
-
-void CCudaFilteredBackProjectionAlgorithm::initializeFromProjector()
-{
-	m_iPixelSuperSampling = 1;
-	m_iGPUIndex = -1;
-
-	// Projector
-	CCudaProjector2D* pCudaProjector = dynamic_cast<CCudaProjector2D*>(m_pProjector);
-	if (!pCudaProjector) {
-		if (m_pProjector) {
-			ASTRA_WARN("non-CUDA Projector2D passed to FBP_CUDA");
-		}
-	} else {
-		m_iPixelSuperSampling = pCudaProjector->getVoxelSuperSampling();
-		m_iGPUIndex = pCudaProjector->getGPUIndex();
-	}
-
+	delete[] m_pfFilter;
+	m_pfFilter = NULL;
 }
 
 bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
@@ -92,54 +65,28 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 	// if already initialized, clear first
 	if (m_bIsInitialized)
 	{
+#warning FIXME Necessary?
 		clear();
 	}
 
-	// Projector
-	XMLNode node = _cfg.self.getSingleNode("ProjectorId");
-	CCudaProjector2D* pCudaProjector = 0;
-	if (node) {
-		int id = node.getContentInt();
-		CProjector2D *projector = CProjector2DManager::getSingleton().get(id);
-		pCudaProjector = dynamic_cast<CCudaProjector2D*>(projector);
-		if (!pCudaProjector) {
-			ASTRA_WARN("non-CUDA Projector2D passed");
-		}
-	}
-	CC.markNodeParsed("ProjectorId");
-
-
-	// sinogram data
-	node = _cfg.self.getSingleNode("ProjectionDataId");
-	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ProjectionDataId tag specified.");
-	int id = node.getContentInt();
-	m_pSinogram = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
-	CC.markNodeParsed("ProjectionDataId");
+	m_bIsInitialized = CCudaReconstructionAlgorithm2D::initialize(_cfg);
+	if (!m_bIsInitialized)
+		return false;
 
-	// reconstruction data
-	node = _cfg.self.getSingleNode("ReconstructionDataId");
-	ASTRA_CONFIG_CHECK(node, "CudaFBP", "No ReconstructionDataId tag specified.");
-	id = node.getContentInt();
-	m_pReconstruction = dynamic_cast<CFloat32VolumeData2D*>(CData2DManager::getSingleton().get(id));
-	CC.markNodeParsed("ReconstructionDataId");
 
 	// filter type
-	node = _cfg.self.getSingleNode("FilterType");
+	XMLNode node = _cfg.self.getSingleNode("FilterType");
 	if (node)
-	{
 		m_eFilter = _convertStringToFilter(node.getContent().c_str());
-	}
 	else
-	{
 		m_eFilter = FILTER_RAMLAK;
-	}
 	CC.markNodeParsed("FilterType");
 
 	// filter
 	node = _cfg.self.getSingleNode("FilterSinogramId");
 	if (node)
 	{
-		id = node.getContentInt();
+		int id = node.getContentInt();
 		const CFloat32ProjectionData2D * pFilterData = dynamic_cast<CFloat32ProjectionData2D*>(CData2DManager::getSingleton().get(id));
 		m_iFilterWidth = pFilterData->getGeometry()->getDetectorCount();
 		int iFilterProjectionCount = pFilterData->getGeometry()->getProjectionAngleCount();
@@ -188,20 +135,9 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(const Config& _cfg)
 
 	initializeFromProjector();
 
-	// Deprecated options
-	m_iPixelSuperSampling = (int)_cfg.self.getOptionNumerical("PixelSuperSampling", m_iPixelSuperSampling);
-	CC.markOptionParsed("PixelSuperSampling");
 
-	// GPU number
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUindex", -1);
-	m_iGPUIndex = (int)_cfg.self.getOptionNumerical("GPUIndex", m_iGPUIndex);
-	CC.markOptionParsed("GPUIndex");
-	if (!_cfg.self.hasOption("GPUIndex"))
-		CC.markOptionParsed("GPUindex");
-
-
-	m_pFBP = new AstraFBP;
-	m_bAstraFBPInit = false;
+	m_pAlgo = new astraCUDA::FBP();
+	m_bAlgoInit = false;
 
 	return check();
 }
@@ -226,9 +162,8 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D *
 	// success
 	m_bIsInitialized = true;
 
-	m_pFBP = new AstraFBP;
-
-	m_bAstraFBPInit = false;
+	m_pAlgo = new astraCUDA::FBP();
+	m_bAlgoInit = false;
 
 	if(_pfFilter != NULL)
 	{
@@ -256,107 +191,26 @@ bool CCudaFilteredBackProjectionAlgorithm::initialize(CFloat32ProjectionData2D *
 	return check();
 }
 
-void CCudaFilteredBackProjectionAlgorithm::run(int _iNrIterations /* = 0 */)
-{
-	// check initialized
-	ASTRA_ASSERT(m_bIsInitialized);
-
-	if (!m_bAstraFBPInit) {
-
-		const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
-		const CParallelProjectionGeometry2D* parprojgeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry());
-		const CFanFlatProjectionGeometry2D* fanprojgeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry());
-
-		bool ok = true;
-
-		// TODO: non-square pixels?
-		ok &= m_pFBP->setReconstructionGeometry(volgeom.getGridColCount(),
-		                                         volgeom.getGridRowCount(),
-		                                         volgeom.getPixelLengthX());
-		int iDetectorCount;
-		if (parprojgeom) {
-
-			float *offsets, *angles, detSize, outputScale;
-
-			ok = convertAstraGeometry(&volgeom, parprojgeom, offsets, angles, detSize, outputScale);
-
-
-			ok &= m_pFBP->setProjectionGeometry(parprojgeom->getProjectionAngleCount(),
-			                                     parprojgeom->getDetectorCount(),
-			                                     angles,
-			                                     parprojgeom->getDetectorWidth());
-			iDetectorCount = parprojgeom->getDetectorCount();
 
-			// TODO: Are detSize and outputScale handled correctly?
-
-			if (offsets)
-				ok &= m_pFBP->setTOffsets(offsets);
-			ASTRA_ASSERT(ok);
-
-			delete[] offsets;
-			delete[] angles;
-
-		} else if (fanprojgeom) {
-
-			astraCUDA::SFanProjection* projs;
-			float outputScale;
-
-			// FIXME: Implement this, and clean up the interface to AstraFBP.
-			if (abs(volgeom.getWindowMinX() + volgeom.getWindowMaxX()) > 0.00001 * volgeom.getPixelLengthX()) {
-				// Off-center volume geometry isn't supported yet
-				ASTRA_ASSERT(false);
-			}
-			if (abs(volgeom.getWindowMinY() + volgeom.getWindowMaxY()) > 0.00001 * volgeom.getPixelLengthY()) {
-				// Off-center volume geometry isn't supported yet
-				ASTRA_ASSERT(false);
-			}
-
-			ok = convertAstraGeometry(&volgeom, fanprojgeom, projs, outputScale);
-
-			// CHECKME: outputScale?
-
-			ok &= m_pFBP->setFanGeometry(fanprojgeom->getProjectionAngleCount(),
-			                             fanprojgeom->getDetectorCount(),
-			                             projs,
-			                             fanprojgeom->getProjectionAngles(),
-			                             fanprojgeom->getOriginSourceDistance(),
-			                             fanprojgeom->getOriginDetectorDistance(),
-
-		                                     fanprojgeom->getDetectorWidth(),
-			                             m_bShortScan);
-
-			iDetectorCount = fanprojgeom->getDetectorCount();
-
-			delete[] projs;
-		} else {
-			assert(false);
-		}
-
-		ok &= m_pFBP->setPixelSuperSampling(m_iPixelSuperSampling);
-
-		ASTRA_ASSERT(ok);
+void CCudaFilteredBackProjectionAlgorithm::initCUDAAlgorithm()
+{
+	CCudaReconstructionAlgorithm2D::initCUDAAlgorithm();
 
-		ok &= m_pFBP->init(m_iGPUIndex);
-		ASTRA_ASSERT(ok);
+	astraCUDA::FBP* pFBP = dynamic_cast<astraCUDA::FBP*>(m_pAlgo);
 
-		ok &= m_pFBP->setSinogram(m_pSinogram->getDataConst(), iDetectorCount);
+	bool ok = pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter);
+	if (!ok) {
+		ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set filter");
 		ASTRA_ASSERT(ok);
-
-		ok &= m_pFBP->setFilter(m_eFilter, m_pfFilter, m_iFilterWidth, m_fFilterD, m_fFilterParameter);
-		ASTRA_ASSERT(ok);
-
-		m_bAstraFBPInit = true;
 	}
 
-	bool ok = m_pFBP->run();
-	ASTRA_ASSERT(ok);
-
-	const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
-	ok &= m_pFBP->getReconstruction(m_pReconstruction->getData(), volgeom.getGridColCount());
-
-	ASTRA_ASSERT(ok);
+	ok &= pFBP->setShortScan(m_bShortScan);
+	if (!ok) {
+		ASTRA_ERROR("CCudaFilteredBackProjectionAlgorithm: Failed to set short-scan mode");
+	}
 }
 
+
 bool CCudaFilteredBackProjectionAlgorithm::check()
 {
 	// check pointers
@@ -395,16 +249,6 @@ static int calcNextPowerOfTwo(int _iValue)
 	return iOutput;
 }
 
-int CCudaFilteredBackProjectionAlgorithm::calcIdealRealFilterWidth(int _iDetectorCount)
-{
-	return calcNextPowerOfTwo(_iDetectorCount);
-}
-
-int CCudaFilteredBackProjectionAlgorithm::calcIdealFourierFilterWidth(int _iDetectorCount)
-{
-	return (calcNextPowerOfTwo(_iDetectorCount) / 2 + 1);
-}
-
 static bool stringCompareLowerCase(const char * _stringA, const char * _stringB)
 {
 	int iCmpReturn = 0;
@@ -518,12 +362,3 @@ E_FBPFILTER CCudaFilteredBackProjectionAlgorithm::_convertStringToFilter(const c
 	return output;
 }
 
-void CCudaFilteredBackProjectionAlgorithm::testGenFilter(E_FBPFILTER _eFilter, float _fD, int _iProjectionCount, cufftComplex * _pFilter, int _iFFTRealDetectorCount, int _iFFTFourierDetectorCount)
-{
-	genFilter(_eFilter, _fD, _iProjectionCount, _pFilter, _iFFTRealDetectorCount, _iFFTFourierDetectorCount);
-}
-
-int CCudaFilteredBackProjectionAlgorithm::getGPUCount()
-{
-	return 0;
-}
diff --git a/src/CudaForwardProjectionAlgorithm.cpp b/src/CudaForwardProjectionAlgorithm.cpp
index 80f2e02..bdd7dd0 100644
--- a/src/CudaForwardProjectionAlgorithm.cpp
+++ b/src/CudaForwardProjectionAlgorithm.cpp
@@ -221,56 +221,48 @@ void CCudaForwardProjectionAlgorithm::run(int)
 	// check initialized
 	assert(m_bIsInitialized);
 
-	CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry();
-	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry());
-	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry());
-	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry());
+	bool ok;
 
-	bool ok = false;
-	if (parProjGeom) {
+	const CVolumeGeometry2D* pVolGeom = m_pVolume->getGeometry();
+	const CProjectionGeometry2D* pProjGeom = m_pSinogram->getGeometry();
+	astraCUDA::SDimensions dims;
 
-		float *offsets, *angles, detSize, outputScale;
-		ok = convertAstraGeometry(pVolGeom, parProjGeom, offsets, angles, detSize, outputScale);
+	ok = convertAstraGeometry_dims(pVolGeom, pProjGeom, dims);
 
-		ASTRA_ASSERT(ok); // FIXME
+	if (!ok)
+		return;
 
-		// FIXME: Output scaling
+	astraCUDA::SParProjection* pParProjs = 0;
+	astraCUDA::SFanProjection* pFanProjs = 0;
+	float fOutputScale = 1.0f;
 
-		ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(),
-		                 pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(),
-		                 parProjGeom->getProjectionAngleCount(),
-		                 parProjGeom->getDetectorCount(),
-		                 angles, offsets, detSize,
-		                 m_iDetectorSuperSampling, 1.0f * outputScale, m_iGPUIndex);
-
-		delete[] offsets;
-		delete[] angles;
+	ok = convertAstraGeometry(pVolGeom, pProjGeom, pParProjs, pFanProjs, fOutputScale);
+	if (!ok)
+		return;
 
-	} else if (fanProjGeom || fanVecProjGeom) {
+	if (pParProjs) {
+		assert(!pFanProjs);
 
-		astraCUDA::SFanProjection* projs;
-		float outputScale;
+		ok = astraCudaFP(m_pVolume->getDataConst(), m_pSinogram->getData(),
+		                 pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(),
+		                 pProjGeom->getProjectionAngleCount(),
+		                 pProjGeom->getDetectorCount(),
+		                 pParProjs,
+		                 m_iDetectorSuperSampling, 1.0f * fOutputScale, m_iGPUIndex);
 
-		if (fanProjGeom) {
-			ok = convertAstraGeometry(pVolGeom, fanProjGeom, projs, outputScale);
-		} else {
-			ok = convertAstraGeometry(pVolGeom, fanVecProjGeom, projs, outputScale);
-		}
+		delete[] pParProjs;
 
-		ASTRA_ASSERT(ok);
+	} else {
+		assert(pFanProjs);
 
 		ok = astraCudaFanFP(m_pVolume->getDataConst(), m_pSinogram->getData(),
 		                    pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(),
-		                    m_pSinogram->getGeometry()->getProjectionAngleCount(),
-		                    m_pSinogram->getGeometry()->getDetectorCount(),
-		                    projs,
-		                    m_iDetectorSuperSampling, outputScale, m_iGPUIndex);
-
-		delete[] projs;
-
-	} else {
+		                    pProjGeom->getProjectionAngleCount(),
+		                    pProjGeom->getDetectorCount(),
+		                    pFanProjs,
+		                    m_iDetectorSuperSampling, fOutputScale, m_iGPUIndex);
 
-		ASTRA_ASSERT(false);
+		delete[] pFanProjs;
 
 	}
 
diff --git a/src/CudaReconstructionAlgorithm2D.cpp b/src/CudaReconstructionAlgorithm2D.cpp
index 2798434..326ef1e 100644
--- a/src/CudaReconstructionAlgorithm2D.cpp
+++ b/src/CudaReconstructionAlgorithm2D.cpp
@@ -250,69 +250,14 @@ bool CCudaReconstructionAlgorithm2D::setupGeometry()
 	ok = m_pAlgo->setGPUIndex(m_iGPUIndex);
 	if (!ok) return false;
 
-	astraCUDA::SDimensions dims;
-
 	const CVolumeGeometry2D& volgeom = *m_pReconstruction->getGeometry();
+	const CProjectionGeometry2D& projgeom = *m_pSinogram->getGeometry();
 
-	// TODO: non-square pixels?
-	dims.iVolWidth = volgeom.getGridColCount();
-	dims.iVolHeight = volgeom.getGridRowCount();
-	float fPixelSize = volgeom.getPixelLengthX();
-
-	dims.iRaysPerDet = m_iDetectorSuperSampling;
-	dims.iRaysPerPixelDim = m_iPixelSuperSampling;
-
-
-	const CParallelProjectionGeometry2D* parProjGeom = dynamic_cast<CParallelProjectionGeometry2D*>(m_pSinogram->getGeometry());
-	const CFanFlatProjectionGeometry2D* fanProjGeom = dynamic_cast<CFanFlatProjectionGeometry2D*>(m_pSinogram->getGeometry());
-	const CFanFlatVecProjectionGeometry2D* fanVecProjGeom = dynamic_cast<CFanFlatVecProjectionGeometry2D*>(m_pSinogram->getGeometry());
-
-	if (parProjGeom) {
-
-		float *offsets, *angles, detSize, outputScale;
-
-		ok = convertAstraGeometry(&volgeom, parProjGeom, offsets, angles, detSize, outputScale);
-
-		dims.iProjAngles = parProjGeom->getProjectionAngleCount();
-		dims.iProjDets = parProjGeom->getDetectorCount();
-		dims.fDetScale = parProjGeom->getDetectorWidth() / fPixelSize;
-
-		ok = m_pAlgo->setGeometry(dims, parProjGeom->getProjectionAngles());
-		ok &= m_pAlgo->setTOffsets(offsets);
-
-		// CHECKME: outputScale? detSize?
-
-		delete[] offsets;
-		delete[] angles;
-
-	} else if (fanProjGeom || fanVecProjGeom) {
-
-		astraCUDA::SFanProjection* projs;
-		float outputScale;
-
-		if (fanProjGeom) {
-			ok = convertAstraGeometry(&volgeom, fanProjGeom, projs, outputScale);
-		} else {
-			ok = convertAstraGeometry(&volgeom, fanVecProjGeom, projs, outputScale);
-		}
-
-		dims.iProjAngles = m_pSinogram->getGeometry()->getProjectionAngleCount();
-		dims.iProjDets = m_pSinogram->getGeometry()->getDetectorCount();
-		dims.fDetScale = m_pSinogram->getGeometry()->getDetectorWidth() / fPixelSize;
-
-		ok = m_pAlgo->setFanGeometry(dims, projs);
-
-		// CHECKME: outputScale?
-
-		delete[] projs;
-
-	} else {
-
-		ASTRA_ASSERT(false);
-
-	}
+	ok = m_pAlgo->setGeometry(&volgeom, &projgeom);
 	if (!ok) return false;
 
+	ok = m_pAlgo->setSuperSampling(m_iDetectorSuperSampling, m_iPixelSuperSampling);
+	if (!ok) return false;
 
 	if (m_bUseReconstructionMask)
 		ok &= m_pAlgo->enableVolumeMask();
diff --git a/src/FanFlatProjectionGeometry2D.cpp b/src/FanFlatProjectionGeometry2D.cpp
index 3aab582..4eec9c4 100644
--- a/src/FanFlatProjectionGeometry2D.cpp
+++ b/src/FanFlatProjectionGeometry2D.cpp
@@ -28,6 +28,8 @@ $Id$
 
 #include "astra/FanFlatProjectionGeometry2D.h"
 
+#include "astra/GeometryUtil2D.h"
+
 #include <cstring>
 #include <sstream>
 
@@ -218,16 +220,12 @@ Config* CFanFlatProjectionGeometry2D::getConfiguration() const
 //----------------------------------------------------------------------------------------
 CFanFlatVecProjectionGeometry2D* CFanFlatProjectionGeometry2D::toVectorGeometry()
 {
-	SFanProjection* vectors = new SFanProjection[m_iProjectionAngleCount];
-	for (int i = 0; i < m_iProjectionAngleCount; ++i)
-	{
-		vectors[i].fSrcX =  sinf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance;
-		vectors[i].fSrcY = -cosf(m_pfProjectionAngles[i]) * m_fOriginSourceDistance;
-		vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth;
-		vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth;
-		vectors[i].fDetSX = -sinf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUX;
-		vectors[i].fDetSY =  cosf(m_pfProjectionAngles[i]) * m_fOriginDetectorDistance - 0.5f * m_iDetectorCount * vectors[i].fDetUY;
-	}
+	SFanProjection* vectors = genFanProjections(m_iProjectionAngleCount,
+	                                            m_iDetectorCount,
+	                                            m_fOriginSourceDistance,
+	                                            m_fOriginDetectorDistance,
+	                                            m_fDetectorWidth,
+	                                            m_pfProjectionAngles);
 
 	CFanFlatVecProjectionGeometry2D* vecGeom = new CFanFlatVecProjectionGeometry2D();
 	vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors);
diff --git a/src/GeometryUtil2D.cpp b/src/GeometryUtil2D.cpp
new file mode 100644
index 0000000..1bca2bc
--- /dev/null
+++ b/src/GeometryUtil2D.cpp
@@ -0,0 +1,161 @@
+/*
+-----------------------------------------------------------------------
+Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+           2014-2015, CWI, Amsterdam
+
+Contact: astra@uantwerpen.be
+Website: http://sf.net/projects/astra-toolbox
+
+This file is part of the ASTRA Toolbox.
+
+
+The ASTRA Toolbox is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+The ASTRA Toolbox is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+
+-----------------------------------------------------------------------
+$Id$
+*/
+
+#include "astra/GeometryUtil2D.h"
+
+#include <cmath>
+
+namespace astra {
+
+SParProjection* genParProjections(unsigned int iProjAngles,
+                                  unsigned int iProjDets,
+                                  double fDetSize,
+                                  const float *pfAngles,
+                                  const float *pfExtraOffsets)
+{
+	SParProjection base;
+	base.fRayX = 0.0f;
+	base.fRayY = 1.0f;
+
+	base.fDetSX = iProjDets * fDetSize * -0.5f;
+	base.fDetSY = 0.0f;
+
+	base.fDetUX = fDetSize;
+	base.fDetUY = 0.0f;
+
+	SParProjection* p = new SParProjection[iProjAngles];
+
+#define ROTATE0(name,i,alpha) do { p[i].f##name##X = base.f##name##X * cos(alpha) - base.f##name##Y * sin(alpha); p[i].f##name##Y = base.f##name##X * sin(alpha) + base.f##name##Y * cos(alpha); } while(0)
+
+	for (unsigned int i = 0; i < iProjAngles; ++i) {
+		if (pfExtraOffsets) {
+			// TODO
+		}
+
+		ROTATE0(Ray, i, pfAngles[i]);
+		ROTATE0(DetS, i, pfAngles[i]);
+		ROTATE0(DetU, i, pfAngles[i]);
+
+		if (pfExtraOffsets) {
+			float d = pfExtraOffsets[i];
+			p[i].fDetSX -= d * p[i].fDetUX;
+			p[i].fDetSY -= d * p[i].fDetUY;
+		}
+	}
+
+#undef ROTATE0
+
+	return p;
+}
+
+
+SFanProjection* genFanProjections(unsigned int iProjAngles,
+                                  unsigned int iProjDets,
+                                  double fOriginSource, double fOriginDetector,
+                                  double fDetSize,
+                                  const float *pfAngles)
+//                                  const float *pfExtraOffsets)
+{
+	SFanProjection *pProjs = new SFanProjection[iProjAngles];
+
+	float fSrcX0 = 0.0f;
+	float fSrcY0 = -fOriginSource;
+	float fDetUX0 = fDetSize;
+	float fDetUY0 = 0.0f;
+	float fDetSX0 = iProjDets * fDetUX0 / -2.0f;
+	float fDetSY0 = fOriginDetector;
+
+#define ROTATE0(name,i,alpha) do { pProjs[i].f##name##X = f##name##X0 * cos(alpha) - f##name##Y0 * sin(alpha); pProjs[i].f##name##Y = f##name##X0 * sin(alpha) + f##name##Y0 * cos(alpha); } while(0)
+	for (unsigned int i = 0; i < iProjAngles; ++i) {
+		ROTATE0(Src, i, pfAngles[i]);
+		ROTATE0(DetS, i, pfAngles[i]);
+		ROTATE0(DetU, i, pfAngles[i]);
+	}
+
+#undef ROTATE0
+
+	return pProjs;
+}
+
+// Convert a SParProjection back into its set of "standard" circular parallel
+// beam parameters. This is always possible.
+bool getParParameters(const SParProjection &proj /* , ... */)
+{
+	// angle
+	// det size
+	// offset
+
+	// (see convertAndUploadAngles in par_fp.cu)
+
+	return true;
+}
+
+// Convert a SFanProjection back into its set of "standard" circular fan beam
+// parameters. This will return false if it can not be represented in this way.
+bool getFanParameters(const SFanProjection &proj, unsigned int iProjDets, float &fAngle, float &fOriginSource, float &fOriginDetector, float &fDetSize, float &fOffset)
+{
+	// angle
+	// det size
+	// offset
+	// origin-source
+	// origin-detector
+
+	// Need to check if line source-origin is orthogonal to vector ux,uy
+	// (including the case source==origin)
+
+	// (equivalent: source and origin project to same point on detector)
+
+	double dp = proj.fSrcX * proj.fDetUX + proj.fSrcY * proj.fDetUY;
+
+	double rel = (proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY) * (proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY);
+	rel = sqrt(rel);
+
+	if (std::abs(dp) > rel * 0.0001)
+		return false;
+
+	fOriginSource = sqrt(proj.fSrcX*proj.fSrcX + proj.fSrcY*proj.fSrcY);
+
+	fDetSize = sqrt(proj.fDetUX*proj.fDetUX + proj.fDetUY*proj.fDetUY);
+
+	// project origin on detector line ( == project source on detector line)
+
+	double t = (- proj.fDetSX) * proj.fDetUX + (- proj.fDetSY) * proj.fDetUY;
+
+	fOffset = (float)t - 0.5*iProjDets;
+
+	// TODO: CHECKME
+	fOriginDetector = sqrt((proj.fDetSX + t * proj.fDetUX)*(proj.fDetSX + t * proj.fDetUX) + (proj.fDetSY + t * proj.fDetUY)*(proj.fDetSY + t * proj.fDetUY));
+
+	//float fAngle = atan2(proj.fDetSX + t * proj.fDetUX - proj.fSrcX, proj.fDetSY + t * proj.fDetUY); // TODO: Fix order + sign
+	fAngle = atan2(proj.fDetUY, proj.fDetUX); // TODO: Check order + sign
+
+	return true;
+}
+
+
+}
diff --git a/src/ParallelProjectionGeometry2D.cpp b/src/ParallelProjectionGeometry2D.cpp
index 105e24b..e1f93aa 100644
--- a/src/ParallelProjectionGeometry2D.cpp
+++ b/src/ParallelProjectionGeometry2D.cpp
@@ -28,6 +28,8 @@ $Id$
 
 #include "astra/ParallelProjectionGeometry2D.h"
 
+#include "astra/GeometryUtil2D.h"
+
 #include <cstring>
 
 using namespace std;
@@ -48,15 +50,13 @@ CParallelProjectionGeometry2D::CParallelProjectionGeometry2D() :
 CParallelProjectionGeometry2D::CParallelProjectionGeometry2D(int _iProjectionAngleCount, 
 															 int _iDetectorCount, 
 															 float32 _fDetectorWidth, 
-															 const float32* _pfProjectionAngles,
-															 const float32* _pfExtraDetectorOffsets)
+															 const float32* _pfProjectionAngles)
 {
 	_clear();
 	initialize(_iProjectionAngleCount,
 				_iDetectorCount, 
 				_fDetectorWidth, 
-				_pfProjectionAngles,
-				_pfExtraDetectorOffsets);
+				_pfProjectionAngles);
 }
 
 //----------------------------------------------------------------------------------------
@@ -115,14 +115,12 @@ bool CParallelProjectionGeometry2D::initialize(const Config& _cfg)
 bool CParallelProjectionGeometry2D::initialize(int _iProjectionAngleCount, 
 											   int _iDetectorCount, 
 											   float32 _fDetectorWidth, 
-											   const float32* _pfProjectionAngles,
-											   const float32* _pfExtraDetectorOffsets)
+											   const float32* _pfProjectionAngles)
 {
 	_initialize(_iProjectionAngleCount, 
 			    _iDetectorCount, 
 			    _fDetectorWidth, 
-			    _pfProjectionAngles,
-				_pfExtraDetectorOffsets);
+			    _pfProjectionAngles);
 
 	// success
 	m_bInitialized = _check();
@@ -178,11 +176,6 @@ Config* CParallelProjectionGeometry2D::getConfiguration() const
 	cfg->self.addChildNode("DetectorCount", getDetectorCount());
 	cfg->self.addChildNode("DetectorWidth", getDetectorWidth());
 	cfg->self.addChildNode("ProjectionAngles", m_pfProjectionAngles, m_iProjectionAngleCount);
-	if(m_pfExtraDetectorOffset!=NULL){
-		XMLNode opt = cfg->self.addChildNode("Option");
-		opt.addAttribute("key","ExtraDetectorOffset");
-		opt.setContent(m_pfExtraDetectorOffset, m_iProjectionAngleCount);
-	}
 	return cfg;
 }
 
@@ -203,17 +196,11 @@ CVector3D CParallelProjectionGeometry2D::getProjectionDirection(int _iProjection
 //----------------------------------------------------------------------------------------
 CParallelVecProjectionGeometry2D* CParallelProjectionGeometry2D::toVectorGeometry()
 {
-	SParProjection* vectors = new SParProjection[m_iProjectionAngleCount];
-	for (int i = 0; i < m_iProjectionAngleCount; ++i)
-	{
-		vectors[i].fRayX = sinf(m_pfProjectionAngles[i]);
-		vectors[i].fRayY = -cosf(m_pfProjectionAngles[i]);
-		vectors[i].fDetUX = cosf(m_pfProjectionAngles[i]) * m_fDetectorWidth;
-		vectors[i].fDetUY = sinf(m_pfProjectionAngles[i]) * m_fDetectorWidth;		
-		vectors[i].fDetSX = -0.5f * m_iDetectorCount * vectors[i].fDetUX;
-		vectors[i].fDetSY = -0.5f * m_iDetectorCount * vectors[i].fDetUY;
-	}
-
+	SParProjection* vectors = genParProjections(m_iProjectionAngleCount,
+	                                            m_iDetectorCount,
+	                                            m_fDetectorWidth,
+	                                            m_pfProjectionAngles, 0);
+	// TODO: ExtraOffsets?
 	CParallelVecProjectionGeometry2D* vecGeom = new CParallelVecProjectionGeometry2D();
 	vecGeom->initialize(m_iProjectionAngleCount, m_iDetectorCount, vectors);
 	delete[] vectors;
diff --git a/src/ProjectionGeometry2D.cpp b/src/ProjectionGeometry2D.cpp
index 03fbd22..a306b48 100644
--- a/src/ProjectionGeometry2D.cpp
+++ b/src/ProjectionGeometry2D.cpp
@@ -45,11 +45,10 @@ CProjectionGeometry2D::CProjectionGeometry2D() : configCheckData(0)
 CProjectionGeometry2D::CProjectionGeometry2D(int _iAngleCount, 
 											 int _iDetectorCount, 
 											 float32 _fDetectorWidth, 
-											 const float32* _pfProjectionAngles,
-											 const float32* _pfExtraDetectorOffsets) : configCheckData(0)
+											 const float32* _pfProjectionAngles) : configCheckData(0)
 {
 	_clear();
-	_initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles,_pfExtraDetectorOffsets);
+	_initialize(_iAngleCount, _iDetectorCount, _fDetectorWidth, _pfProjectionAngles);
 }
 
 //----------------------------------------------------------------------------------------
@@ -156,8 +155,7 @@ bool CProjectionGeometry2D::initialize(const Config& _cfg)
 bool CProjectionGeometry2D::_initialize(int _iProjectionAngleCount, 
 									    int _iDetectorCount, 
 									    float32 _fDetectorWidth, 
-									    const float32* _pfProjectionAngles,
-										const float32* _pfExtraDetectorOffsets)
+									    const float32* _pfProjectionAngles)
 {
 	if (m_bInitialized) {
 		clear();
-- 
cgit v1.2.3