summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--build/msvc/gen.py7
-rw-r--r--cuda/3d/mem3d.cu19
-rw-r--r--cuda/3d/mem3d.h3
-rw-r--r--include/astra/CompositeGeometryManager.h35
-rw-r--r--matlab/mex/astra_mex_c.cpp45
-rw-r--r--python/astra/__init__.py8
-rw-r--r--python/astra/astra.py4
-rw-r--r--python/astra/astra_c.pyx21
-rw-r--r--samples/matlab/s020_3d_multiGPU.m38
-rw-r--r--samples/python/s019_experimental_multires.py (renamed from samples/python/s018_experimental_multires.py)0
-rw-r--r--samples/python/s020_3d_multiGPU.py57
-rw-r--r--src/CompositeGeometryManager.cpp787
-rw-r--r--src/ConeProjectionGeometry3D.cpp3
13 files changed, 834 insertions, 193 deletions
diff --git a/build/msvc/gen.py b/build/msvc/gen.py
index 999710f..cc69a62 100644
--- a/build/msvc/gen.py
+++ b/build/msvc/gen.py
@@ -1,6 +1,8 @@
from __future__ import print_function
import sys
import os
+import codecs
+import six
vcppguid = "8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942" # C++ project
siguid = "2150E333-8FDC-42A3-9474-1A3956D46DE8" # project group
@@ -428,7 +430,10 @@ P_astra["files"].sort()
projects = [ P_astra, F_astra_mex, P0, P1, P2, P3, P4, P5, P6, P7, P8 ]
-bom = "\xef\xbb\xbf"
+if six.PY2:
+ bom = "\xef\xbb\xbf"
+else:
+ bom = codecs.BOM_UTF8.decode("utf-8")
class Configuration:
def __init__(self, debug, cuda, x64):
diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu
index 6d81dc0..0320117 100644
--- a/cuda/3d/mem3d.cu
+++ b/cuda/3d/mem3d.cu
@@ -62,6 +62,25 @@ size_t availableGPUMemory()
return free;
}
+int maxBlockDimension()
+{
+ int dev;
+ cudaError_t err = cudaGetDevice(&dev);
+ if (err != cudaSuccess) {
+ ASTRA_WARN("Error querying device");
+ return 0;
+ }
+
+ cudaDeviceProp props;
+ err = cudaGetDeviceProperties(&props, dev);
+ if (err != cudaSuccess) {
+ ASTRA_WARN("Error querying device %d properties", dev);
+ return 0;
+ }
+
+ return std::min(props.maxTexture3D[0], std::min(props.maxTexture3D[1], props.maxTexture3D[2]));
+}
+
MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero)
{
SMemHandle3D_internal hnd;
diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h
index 82bad19..6fff80b 100644
--- a/cuda/3d/mem3d.h
+++ b/cuda/3d/mem3d.h
@@ -78,6 +78,7 @@ enum Mem3DZeroMode {
};
size_t availableGPUMemory();
+int maxBlockDimension();
MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero);
@@ -87,6 +88,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos)
bool freeGPUMemory(MemHandle3D handle);
+bool setGPUIndex(int index);
+
bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel);
diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h
index 6610151..18dd72f 100644
--- a/include/astra/CompositeGeometryManager.h
+++ b/include/astra/CompositeGeometryManager.h
@@ -50,9 +50,16 @@ class CProjectionGeometry3D;
class CProjector3D;
+struct SGPUParams {
+ std::vector<int> GPUIndices;
+ size_t memory;
+};
+
class _AstraExport CCompositeGeometryManager {
public:
+ CCompositeGeometryManager();
+
class CPart;
typedef std::list<boost::shared_ptr<CPart> > TPartList;
class CPart {
@@ -72,7 +79,9 @@ public:
bool uploadToGPU();
bool downloadFromGPU(/*mode?*/);
- virtual TPartList split(size_t maxSize, int div) = 0;
+ virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0;
+ virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0;
+ virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0;
virtual CPart* reduce(const CPart *other) = 0;
virtual void getDims(size_t &x, size_t &y, size_t &z) = 0;
size_t getSize();
@@ -86,7 +95,9 @@ public:
CVolumeGeometry3D* pGeom;
- virtual TPartList split(size_t maxSize, int div);
+ virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div);
+ virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div);
+ virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div);
virtual CPart* reduce(const CPart *other);
virtual void getDims(size_t &x, size_t &y, size_t &z);
@@ -100,7 +111,9 @@ public:
CProjectionGeometry3D* pGeom;
- virtual TPartList split(size_t maxSize, int div);
+ virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div);
+ virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div);
+ virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div);
virtual CPart* reduce(const CPart *other);
virtual void getDims(size_t &x, size_t &y, size_t &z);
@@ -130,6 +143,13 @@ public:
bool doJobs(TJobList &jobs);
+ SJob createJobFP(CProjector3D *pProjector,
+ CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData);
+ SJob createJobBP(CProjector3D *pProjector,
+ CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData);
+
// Convenience functions for creating and running a single FP or BP job
bool doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
CFloat32ProjectionData3DMemory *pProjData);
@@ -139,10 +159,19 @@ public:
bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData);
+ void setGPUIndices(const std::vector<int>& GPUIndices);
+
+ static void setGlobalGPUParams(const SGPUParams& params);
+
protected:
bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split);
+ std::vector<int> m_GPUIndices;
+ size_t m_iMaxSize;
+
+
+ static SGPUParams* s_params;
};
}
diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp
index d34334c..fdf4f33 100644
--- a/matlab/mex/astra_mex_c.cpp
+++ b/matlab/mex/astra_mex_c.cpp
@@ -38,6 +38,7 @@ $Id$
#include "astra/Globals.h"
#ifdef ASTRA_CUDA
#include "../cuda/2d/darthelper.h"
+#include "astra/CompositeGeometryManager.h"
#endif
using namespace std;
using namespace astra;
@@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs
* Set active GPU
*/
void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[])
-{
+{
#ifdef ASTRA_CUDA
- if (nrhs >= 2) {
- bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1]));
- if (!ret)
- mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1]));
+ bool usage = false;
+ if (nrhs != 2 && nrhs != 4) {
+ usage = true;
+ }
+
+ astra::SGPUParams params;
+ params.memory = 0;
+
+ if (!usage && nrhs >= 4) {
+ std::string s = mexToString(prhs[2]);
+ if (s != "memory") {
+ usage = true;
+ } else {
+ params.memory = (size_t)mxGetScalar(prhs[3]);
+ }
+ }
+
+ if (!usage && nrhs >= 2) {
+ int n = mxGetN(prhs[1]) * mxGetM(prhs[1]);
+ params.GPUIndices.resize(n);
+ double* pdMatlabData = mxGetPr(prhs[1]);
+ for (int i = 0; i < n; ++i)
+ params.GPUIndices[i] = (int)pdMatlabData[i];
+
+
+ astra::CCompositeGeometryManager::setGlobalGPUParams(params);
+
+
+ // Set first GPU
+ if (n >= 1) {
+ bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]);
+ if (!ret)
+ mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]);
+ }
+ }
+
+ if (usage) {
+ mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])");
}
#endif
}
diff --git a/python/astra/__init__.py b/python/astra/__init__.py
index 10ed74d..515d9a2 100644
--- a/python/astra/__init__.py
+++ b/python/astra/__init__.py
@@ -39,7 +39,7 @@ from . import log
from .optomo import OpTomo
import os
-try:
- astra.set_gpu_index(int(os.environ['ASTRA_GPU_INDEX']))
-except KeyError:
- pass
+
+if 'ASTRA_GPU_INDEX' in os.environ:
+ L = [ int(x) for x in os.environ['ASTRA_GPU_INDEX'].split(',') ]
+ astra.set_gpu_index(L)
diff --git a/python/astra/astra.py b/python/astra/astra.py
index 26b1ff0..9328b6b 100644
--- a/python/astra/astra.py
+++ b/python/astra/astra.py
@@ -49,10 +49,10 @@ def version(printToScreen=False):
"""
return a.version(printToScreen)
-def set_gpu_index(idx):
+def set_gpu_index(idx, memory=0):
"""Set default GPU index to use.
:param idx: GPU index
:type idx: :class:`int`
"""
- a.set_gpu_index(idx)
+ a.set_gpu_index(idx, memory)
diff --git a/python/astra/astra_c.pyx b/python/astra/astra_c.pyx
index 5075fed..65192b5 100644
--- a/python/astra/astra_c.pyx
+++ b/python/astra/astra_c.pyx
@@ -31,6 +31,7 @@ import six
from .utils import wrap_from_bytes
from libcpp.string cimport string
+from libcpp.vector cimport vector
from libcpp cimport bool
cdef extern from "astra/Globals.h" namespace "astra":
int getVersion()
@@ -43,6 +44,12 @@ IF HAVE_CUDA==True:
ELSE:
def setGPUIndex():
pass
+cdef extern from "astra/CompositeGeometryManager.h" namespace "astra":
+ cdef cppclass SGPUParams:
+ vector[int] GPUIndices
+ size_t memory
+cdef extern from "astra/CompositeGeometryManager.h" namespace "astra::CCompositeGeometryManager":
+ void setGlobalGPUParams(SGPUParams&)
def credits():
six.print_("""The ASTRA Toolbox has been developed at the University of Antwerp and CWI, Amsterdam by
@@ -70,8 +77,16 @@ def version(printToScreen=False):
else:
return getVersion()
-def set_gpu_index(idx):
+def set_gpu_index(idx, memory=0):
+ import types
+ import collections
+ cdef SGPUParams params
if use_cuda()==True:
- ret = setGPUIndex(idx)
+ if not isinstance(idx, collections.Iterable) or isinstance(idx, types.StringTypes):
+ idx = (idx,)
+ params.memory = memory
+ params.GPUIndices = idx
+ setGlobalGPUParams(params)
+ ret = setGPUIndex(params.GPUIndices[0])
if not ret:
- six.print_("Failed to set GPU " + str(idx))
+ six.print_("Failed to set GPU " + str(params.GPUIndices[0]))
diff --git a/samples/matlab/s020_3d_multiGPU.m b/samples/matlab/s020_3d_multiGPU.m
new file mode 100644
index 0000000..bade325
--- /dev/null
+++ b/samples/matlab/s020_3d_multiGPU.m
@@ -0,0 +1,38 @@
+% -----------------------------------------------------------------------
+% This file is part of the ASTRA Toolbox
+%
+% Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp
+% 2014-2015, CWI, Amsterdam
+% License: Open Source under GPLv3
+% Contact: astra@uantwerpen.be
+% Website: http://sf.net/projects/astra-toolbox
+% -----------------------------------------------------------------------
+
+
+% Set up multi-GPU usage.
+% This only works for 3D GPU forward projection and back projection.
+astra_mex('set_gpu_index', [0 1]);
+
+% Optionally, you can also restrict the amount of GPU memory ASTRA will use.
+% The line commented below sets this to 1GB.
+%astra_mex('set_gpu_index', [0 1], 'memory', 1024*1024*1024);
+
+vol_geom = astra_create_vol_geom(1024, 1024, 1024);
+
+angles = linspace2(0, pi, 1024);
+proj_geom = astra_create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles);
+
+% Create a simple hollow cube phantom
+cube = zeros(1024,1024,1024);
+cube(129:896,129:896,129:896) = 1;
+cube(257:768,257:768,257:768) = 0;
+
+% Create projection data from this
+[proj_id, proj_data] = astra_create_sino3d_cuda(cube, proj_geom, vol_geom);
+
+% Backproject projection data
+[bproj_id, bproj_data] = astra_create_backprojection3d_cuda(proj_data, proj_geom, vol_geom);
+
+astra_mex_data3d('delete', proj_id);
+astra_mex_data3d('delete', bproj_id);
+
diff --git a/samples/python/s018_experimental_multires.py b/samples/python/s019_experimental_multires.py
index cf38e53..cf38e53 100644
--- a/samples/python/s018_experimental_multires.py
+++ b/samples/python/s019_experimental_multires.py
diff --git a/samples/python/s020_3d_multiGPU.py b/samples/python/s020_3d_multiGPU.py
new file mode 100644
index 0000000..d6799c4
--- /dev/null
+++ b/samples/python/s020_3d_multiGPU.py
@@ -0,0 +1,57 @@
+#-----------------------------------------------------------------------
+#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam
+#
+#Author: Daniel M. Pelt
+#Contact: D.M.Pelt@cwi.nl
+#Website: http://dmpelt.github.io/pyastratoolbox/
+#
+#
+#This file is part of the Python interface to the
+#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox").
+#
+#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation, either version 3 of the License, or
+#(at your option) any later version.
+#
+#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with the Python interface to the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
+#
+#-----------------------------------------------------------------------
+
+import astra
+import numpy as np
+
+# Set up multi-GPU usage.
+# This only works for 3D GPU forward projection and back projection.
+astra.astra.set_gpu_index([0,1])
+
+# Optionally, you can also restrict the amount of GPU memory ASTRA will use.
+# The line commented below sets this to 1GB.
+#astra.astra.set_gpu_index([0,1], memory=1024*1024*1024)
+
+vol_geom = astra.create_vol_geom(1024, 1024, 1024)
+
+angles = np.linspace(0, np.pi, 1024,False)
+proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles)
+
+# Create a simple hollow cube phantom
+cube = np.zeros((1024,1024,1024))
+cube[128:895,128:895,128:895] = 1
+cube[256:767,256:767,256:767] = 0
+
+# Create projection data from this
+proj_id, proj_data = astra.create_sino3d_gpu(cube, proj_geom, vol_geom)
+
+# Backproject projection data
+bproj_id, bproj_data = astra.create_backprojection3d_gpu(proj_data, proj_geom, vol_geom)
+
+# Clean up. Note that GPU memory is tied up in the algorithm object,
+# and main RAM in the data objects.
+astra.data3d.delete(proj_id)
+astra.data3d.delete(bproj_id)
diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp
index 41f6319..c9cbaaa 100644
--- a/src/CompositeGeometryManager.cpp
+++ b/src/CompositeGeometryManager.cpp
@@ -44,11 +44,32 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>.
#include "../cuda/3d/mem3d.h"
#include <cstring>
+#include <sstream>
+#include <stdint.h>
+
+#ifndef USE_PTHREADS
+#include <boost/thread/mutex.hpp>
+#include <boost/thread.hpp>
+#endif
+
namespace astra {
+SGPUParams* CCompositeGeometryManager::s_params = 0;
+
+CCompositeGeometryManager::CCompositeGeometryManager()
+{
+ m_iMaxSize = 0;
+
+ if (s_params) {
+ m_iMaxSize = s_params->memory;
+ m_GPUIndices = s_params->GPUIndices;
+ }
+}
+
+
// JOB:
-//
+//
// VolumePart
// ProjectionPart
// FP-or-BP
@@ -76,9 +97,11 @@ namespace astra {
// (First approach: 0.5/0.5)
-
bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
{
+ int maxBlockDim = astraCUDA3d::maxBlockDimension();
+ ASTRA_DEBUG("Found max block dim %d", maxBlockDim);
+
split.clear();
for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i)
@@ -92,7 +115,22 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
// b. split input part
// c. create jobs for new (input,output) subparts
- TPartList splitOutput = pOutput->split(maxSize/3, div);
+ TPartList splitOutput;
+ pOutput->splitZ(splitOutput, maxSize/3, SIZE_MAX, div);
+#if 0
+ TPartList splitOutput2;
+ for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
+ boost::shared_ptr<CPart> outputPart = *i_out;
+ outputPart.get()->splitX(splitOutput2, SIZE_MAX, SIZE_MAX, 1);
+ }
+ splitOutput.clear();
+ for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
+ boost::shared_ptr<CPart> outputPart = *i_out;
+ outputPart.get()->splitY(splitOutput, SIZE_MAX, SIZE_MAX, 1);
+ }
+ splitOutput2.clear();
+#endif
+
for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j)
{
@@ -120,8 +158,21 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div
size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2;
- TPartList splitInput = input->split(remainingSize, 1);
+ TPartList splitInput;
+ input->splitZ(splitInput, remainingSize, maxBlockDim, 1);
delete input;
+ TPartList splitInput2;
+ for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
+ boost::shared_ptr<CPart> inputPart = *i_in;
+ inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1);
+ }
+ splitInput.clear();
+ for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
+ boost::shared_ptr<CPart> inputPart = *i_in;
+ inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1);
+ }
+ splitInput2.clear();
+
ASTRA_DEBUG("Input split into %d parts", splitInput.size());
for (TPartList::iterator i_in = splitInput.begin();
@@ -305,37 +356,41 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce
static size_t ceildiv(size_t a, size_t b) {
- return (a + b - 1) / b;
+ return (a + b - 1) / b;
}
-static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount)
+static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount)
{
- size_t blockSize = maxBlock;
- size_t blockCount = ceildiv(sliceCount, blockSize);
-
- // Increase number of blocks to be divisible by div
- size_t divCount = div * ceildiv(blockCount, div);
-
- // If divCount is above sqrt(number of slices), then
- // we can't guarantee divisibility by div, but let's try anyway
- if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
- blockCount = divCount;
- } else {
- // If divisibility isn't achievable, we may want to optimize
- // differently.
- // TODO: Figure out how to model and optimize this.
- }
+ size_t blockSize = maxBlock;
+ size_t blockCount;
+ if (sliceCount <= blockSize)
+ blockCount = 1;
+ else
+ blockCount = ceildiv(sliceCount, blockSize);
+
+ // Increase number of blocks to be divisible by div
+ size_t divCount = div * ceildiv(blockCount, div);
+
+ // If divCount is above sqrt(number of slices), then
+ // we can't guarantee divisibility by div, but let's try anyway
+ if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
+ blockCount = divCount;
+ } else {
+ // If divisibility isn't achievable, we may want to optimize
+ // differently.
+ // TODO: Figure out how to model and optimize this.
+ }
- // Final adjustment to make blocks more evenly sized
- // (This can't make the blocks larger)
- blockSize = ceildiv(sliceCount, blockCount);
+ // Final adjustment to make blocks more evenly sized
+ // (This can't make the blocks larger)
+ blockSize = ceildiv(sliceCount, blockCount);
- ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
+ ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
- assert(blockSize <= maxBlock);
- assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
+ assert(blockSize <= maxBlock);
+ assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
- return blockSize;
+ return blockSize;
}
template<class V, class P>
@@ -391,7 +446,17 @@ SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* p
template<class V>
-static void translateProjectionVectors(V* pProjs, int count, double dv)
+static void translateProjectionVectorsU(V* pProjs, int count, double du)
+{
+ for (int i = 0; i < count; ++i) {
+ pProjs[i].fDetSX += du * pProjs[i].fDetUX;
+ pProjs[i].fDetSY += du * pProjs[i].fDetUY;
+ pProjs[i].fDetSZ += du * pProjs[i].fDetUZ;
+ }
+}
+
+template<class V>
+static void translateProjectionVectorsV(V* pProjs, int count, double dv)
{
for (int i = 0; i < count; ++i) {
pProjs[i].fDetSX += dv * pProjs[i].fDetVX;
@@ -401,8 +466,58 @@ static void translateProjectionVectors(V* pProjs, int count, double dv)
}
+static CProjectionGeometry3D* getSubProjectionGeometryU(const CProjectionGeometry3D* pProjGeom, int u, int size)
+{
+ // First convert to vectors, then translate, then convert into new object
+
+ const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom);
+ const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom);
+ const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom);
+ const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom);
+
+ if (conegeom || conevec3dgeom) {
+ SConeProjection* pConeProjs;
+ if (conegeom) {
+ pConeProjs = getProjectionVectors<SConeProjection>(conegeom);
+ } else {
+ pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom);
+ }
+
+ translateProjectionVectorsU(pConeProjs, pProjGeom->getProjectionCount(), u);
+
+ CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+ pProjGeom->getDetectorRowCount(),
+ size,
+ pConeProjs);
-static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry3D* pProjGeom, int v, int size)
+
+ delete[] pConeProjs;
+ return ret;
+ } else {
+ assert(par3dgeom || parvec3dgeom);
+ SPar3DProjection* pParProjs;
+ if (par3dgeom) {
+ pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom);
+ } else {
+ pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom);
+ }
+
+ translateProjectionVectorsU(pParProjs, pProjGeom->getProjectionCount(), u);
+
+ CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
+ pProjGeom->getDetectorRowCount(),
+ size,
+ pParProjs);
+
+ delete[] pParProjs;
+ return ret;
+ }
+
+}
+
+
+
+static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometry3D* pProjGeom, int v, int size)
{
// First convert to vectors, then translate, then convert into new object
@@ -419,7 +534,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom);
}
- translateProjectionVectors(pConeProjs, pProjGeom->getProjectionCount(), v);
+ translateProjectionVectorsV(pConeProjs, pProjGeom->getProjectionCount(), v);
CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
size,
@@ -438,7 +553,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom);
}
- translateProjectionVectors(pParProjs, pProjGeom->getProjectionCount(), v);
+ translateProjectionVectorsV(pParProjs, pProjGeom->getProjectionCount(), v);
CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
size,
@@ -457,17 +572,110 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry
// - each no bigger than maxSize
// - number of sub-parts is divisible by div
// - maybe all approximately the same size?
-CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::split(size_t maxSize, int div)
+void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
{
- TPartList ret;
+ if (true) {
+ // Split in vertical direction only at first, until we figure out
+ // a model for splitting in other directions
+
+ size_t sliceSize = ((size_t) pGeom->getGridSliceCount()) * pGeom->getGridRowCount();
+ int sliceCount = pGeom->getGridColCount();
+ size_t m = std::min(maxSize / sliceSize, maxDim);
+ size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+ int rem = sliceCount % blockSize;
+
+ ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
+
+ for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
+ int newsubX = x;
+ if (newsubX < 0) newsubX = 0;
+ int endX = x + blockSize;
+ if (endX > sliceCount) endX = sliceCount;
+ int size = endX - newsubX;
+
+ CVolumePart *sub = new CVolumePart();
+ sub->subX = this->subX + newsubX;
+ sub->subY = this->subY;
+ sub->subZ = this->subZ;
+
+ ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+ double shift = pGeom->getPixelLengthX() * newsubX;
+
+ sub->pData = pData;
+ sub->pGeom = new CVolumeGeometry3D(size,
+ pGeom->getGridRowCount(),
+ pGeom->getGridSliceCount(),
+ pGeom->getWindowMinX() + shift,
+ pGeom->getWindowMinY(),
+ pGeom->getWindowMinZ(),
+ pGeom->getWindowMinX() + shift + size * pGeom->getPixelLengthX(),
+ pGeom->getWindowMaxY(),
+ pGeom->getWindowMaxZ());
+
+ out.push_back(boost::shared_ptr<CPart>(sub));
+ }
+ }
+}
+void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
+{
+ if (true) {
+ // Split in vertical direction only at first, until we figure out
+ // a model for splitting in other directions
+
+ size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridSliceCount();
+ int sliceCount = pGeom->getGridRowCount();
+ size_t m = std::min(maxSize / sliceSize, maxDim);
+ size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+ int rem = sliceCount % blockSize;
+
+ ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
+
+ for (int y = -(rem / 2); y < sliceCount; y += blockSize) {
+ int newsubY = y;
+ if (newsubY < 0) newsubY = 0;
+ int endY = y + blockSize;
+ if (endY > sliceCount) endY = sliceCount;
+ int size = endY - newsubY;
+
+ CVolumePart *sub = new CVolumePart();
+ sub->subX = this->subX;
+ sub->subY = this->subY + newsubY;
+ sub->subZ = this->subZ;
+
+ ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+ double shift = pGeom->getPixelLengthY() * newsubY;
+
+ sub->pData = pData;
+ sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
+ size,
+ pGeom->getGridSliceCount(),
+ pGeom->getWindowMinX(),
+ pGeom->getWindowMinY() + shift,
+ pGeom->getWindowMinZ(),
+ pGeom->getWindowMaxX(),
+ pGeom->getWindowMinY() + shift + size * pGeom->getPixelLengthY(),
+ pGeom->getWindowMaxZ());
+
+ out.push_back(boost::shared_ptr<CPart>(sub));
+ }
+ }
+}
+
+void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
+{
if (true) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount();
int sliceCount = pGeom->getGridSliceCount();
- size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+ size_t m = std::min(maxSize / sliceSize, maxDim);
+ size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = sliceCount % blockSize;
@@ -500,11 +708,9 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::spl
pGeom->getWindowMaxY(),
pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ());
- ret.push_back(boost::shared_ptr<CPart>(sub));
+ out.push_back(boost::shared_ptr<CPart>(sub));
}
}
-
- return ret;
}
CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const
@@ -611,7 +817,7 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re
if (_vmin == _vmax) {
sub->pGeom = 0;
} else {
- sub->pGeom = getSubProjectionGeometry(pGeom, _vmin, _vmax - _vmin);
+ sub->pGeom = getSubProjectionGeometryV(pGeom, _vmin, _vmax - _vmin);
}
ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax);
@@ -620,17 +826,58 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re
}
-CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart::split(size_t maxSize, int div)
+void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
{
- TPartList ret;
+ if (true) {
+ // Split in vertical direction only at first, until we figure out
+ // a model for splitting in other directions
+ size_t sliceSize = ((size_t) pGeom->getDetectorRowCount()) * pGeom->getProjectionCount();
+ int sliceCount = pGeom->getDetectorColCount();
+ size_t m = std::min(maxSize / sliceSize, maxDim);
+ size_t blockSize = computeLinearSplit(m, div, sliceCount);
+
+ int rem = sliceCount % blockSize;
+
+ for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
+ int newsubX = x;
+ if (newsubX < 0) newsubX = 0;
+ int endX = x + blockSize;
+ if (endX > sliceCount) endX = sliceCount;
+ int size = endX - newsubX;
+
+ CProjectionPart *sub = new CProjectionPart();
+ sub->subX = this->subX + newsubX;
+ sub->subY = this->subY;
+ sub->subZ = this->subZ;
+
+ ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
+
+ sub->pData = pData;
+
+ sub->pGeom = getSubProjectionGeometryU(pGeom, newsubX, size);
+
+ out.push_back(boost::shared_ptr<CPart>(sub));
+ }
+ }
+}
+
+void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
+{
+ // TODO
+ out.push_back(boost::shared_ptr<CPart>(clone()));
+}
+
+void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
+{
if (true) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount();
int sliceCount = pGeom->getDetectorRowCount();
- size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount);
+ size_t m = std::min(maxSize / sliceSize, maxDim);
+ size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = sliceCount % blockSize;
@@ -650,14 +897,12 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart:
sub->pData = pData;
- sub->pGeom = getSubProjectionGeometry(pGeom, newsubZ, size);
+ sub->pGeom = getSubProjectionGeometryV(pGeom, newsubZ, size);
- ret.push_back(boost::shared_ptr<CPart>(sub));
+ out.push_back(boost::shared_ptr<CPart>(sub));
}
}
- return ret;
-
}
CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const
@@ -665,13 +910,12 @@ CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjecti
return new CProjectionPart(*this);
}
-
-bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
- CFloat32ProjectionData3DMemory *pProjData)
+CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector,
+ CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData)
{
- ASTRA_DEBUG("CCompositeGeometryManager::doFP");
+ ASTRA_DEBUG("CCompositeGeometryManager::createJobFP");
// Create single job for FP
- // Run result
CVolumePart *input = new CVolumePart();
input->pData = pVolData;
@@ -696,18 +940,15 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeDat
FP.eType = SJob::JOB_FP;
FP.eMode = SJob::MODE_SET;
- TJobList L;
- L.push_back(FP);
-
- return doJobs(L);
+ return FP;
}
-bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
- CFloat32ProjectionData3DMemory *pProjData)
+CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector,
+ CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData)
{
- ASTRA_DEBUG("CCompositeGeometryManager::doBP");
+ ASTRA_DEBUG("CCompositeGeometryManager::createJobBP");
// Create single job for BP
- // Run result
CProjectionPart *input = new CProjectionPart();
input->pData = pProjData;
@@ -730,8 +971,23 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat
BP.eType = SJob::JOB_BP;
BP.eMode = SJob::MODE_SET;
+ return BP;
+}
+
+bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData)
+{
+ TJobList L;
+ L.push_back(createJobFP(pProjector, pVolData, pProjData));
+
+ return doJobs(L);
+}
+
+bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData,
+ CFloat32ProjectionData3DMemory *pProjData)
+{
TJobList L;
- L.push_back(BP);
+ L.push_back(createJobBP(pProjector, pVolData, pProjData));
return doJobs(L);
}
@@ -848,6 +1104,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector
+static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter)
+{
+ CCompositeGeometryManager::CPart* output = iter->first;
+ const CCompositeGeometryManager::TJobList& L = iter->second;
+
+ assert(!L.empty());
+
+ bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET;
+
+ size_t outx, outy, outz;
+ output->getDims(outx, outy, outz);
+
+ if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) {
+ // just zero output?
+ if (zero) {
+ for (size_t z = 0; z < outz; ++z) {
+ for (size_t y = 0; y < outy; ++y) {
+ float* ptr = output->pData->getData();
+ ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
+ ptr += (y + output->subY) * (size_t)output->pData->getWidth();
+ ptr += output->subX;
+ memset(ptr, 0, sizeof(float) * outx);
+ }
+ }
+ }
+ return true;
+ }
+
+
+ astraCUDA3d::SSubDimensions3D dstdims;
+ dstdims.nx = output->pData->getWidth();
+ dstdims.pitch = dstdims.nx;
+ dstdims.ny = output->pData->getHeight();
+ dstdims.nz = output->pData->getDepth();
+ dstdims.subnx = outx;
+ dstdims.subny = outy;
+ dstdims.subnz = outz;
+ ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
+ dstdims.subx = output->subX;
+ dstdims.suby = output->subY;
+ dstdims.subz = output->subZ;
+ float *dst = output->pData->getData();
+
+ astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
+ bool ok = outputMem;
+
+ for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) {
+ const CCompositeGeometryManager::SJob &j = *i;
+
+ assert(j.pInput);
+
+ CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector);
+ Cuda3DProjectionKernel projKernel = ker3d_default;
+ int detectorSuperSampling = 1;
+ int voxelSuperSampling = 1;
+ if (projector) {
+ projKernel = projector->getProjectionKernel();
+ detectorSuperSampling = projector->getDetectorSuperSampling();
+ voxelSuperSampling = projector->getVoxelSuperSampling();
+ }
+
+ size_t inx, iny, inz;
+ j.pInput->getDims(inx, iny, inz);
+ astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
+
+ astraCUDA3d::SSubDimensions3D srcdims;
+ srcdims.nx = j.pInput->pData->getWidth();
+ srcdims.pitch = srcdims.nx;
+ srcdims.ny = j.pInput->pData->getHeight();
+ srcdims.nz = j.pInput->pData->getDepth();
+ srcdims.subnx = inx;
+ srcdims.subny = iny;
+ srcdims.subnz = inz;
+ srcdims.subx = j.pInput->subX;
+ srcdims.suby = j.pInput->subY;
+ srcdims.subz = j.pInput->subZ;
+ const float *src = j.pInput->pData->getDataConst();
+
+ ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
+ if (!ok) ASTRA_ERROR("Error copying input data to GPU");
+
+ if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) {
+ assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get()));
+ assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get()));
+
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
+
+ ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
+ if (!ok) ASTRA_ERROR("Error performing sub-FP");
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
+ } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) {
+ assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get()));
+ assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get()));
+
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
+
+ ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
+ if (!ok) ASTRA_ERROR("Error performing sub-BP");
+ ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
+ } else {
+ assert(false);
+ }
+
+ ok = astraCUDA3d::freeGPUMemory(inputMem);
+ if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+
+ }
+
+ ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
+ if (!ok) ASTRA_ERROR("Error copying output data from GPU");
+
+ ok = astraCUDA3d::freeGPUMemory(outputMem);
+ if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+
+ return true;
+}
+
+
+class WorkQueue {
+public:
+ WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) {
+#ifdef USE_PTHREADS
+ pthread_mutex_init(&m_mutex, 0);
+#endif
+ m_iter = m_jobs.begin();
+ }
+ bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) {
+ lock();
+
+ if (m_iter == m_jobs.end()) {
+ unlock();
+ return false;
+ }
+
+ i = m_iter++;
+
+ unlock();
+
+ return true;
+ }
+#ifdef USE_PTHREADS
+ void lock() {
+ // TODO: check mutex op return values
+ pthread_mutex_lock(&m_mutex);
+ }
+ void unlock() {
+ // TODO: check mutex op return values
+ pthread_mutex_unlock(&m_mutex);
+ }
+#else
+ void lock() {
+ m_mutex.lock();
+ }
+ void unlock() {
+ m_mutex.unlock();
+ }
+#endif
+
+private:
+ CCompositeGeometryManager::TJobSet &m_jobs;
+ CCompositeGeometryManager::TJobSet::const_iterator m_iter;
+#ifdef USE_PTHREADS
+ pthread_mutex_t m_mutex;
+#else
+ boost::mutex m_mutex;
+#endif
+};
+
+struct WorkThreadInfo {
+ WorkQueue* m_queue;
+ unsigned int m_iGPU;
+};
+
+#ifndef USE_PTHREADS
+
+void runEntries_boost(WorkThreadInfo* info)
+{
+ ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
+ CCompositeGeometryManager::TJobSet::const_iterator i;
+ while (info->m_queue->receive(i)) {
+ ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
+ astraCUDA3d::setGPUIndex(info->m_iGPU);
+ boost::this_thread::interruption_point();
+ doJob(i);
+ boost::this_thread::interruption_point();
+ }
+ ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
+}
+
+
+#else
+
+void* runEntries_pthreads(void* data) {
+ WorkThreadInfo* info = (WorkThreadInfo*)data;
+
+ ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
+
+ CCompositeGeometryManager::TJobSet::const_iterator i;
+
+ while (info->m_queue->receive(i)) {
+ ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
+ astraCUDA3d::setGPUIndex(info->m_iGPU);
+ pthread_testcancel();
+ doJob(i);
+ pthread_testcancel();
+ }
+ ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
+
+ return 0;
+}
+
+#endif
+
+
+void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) {
+ int iThreadCount = iGPUIndices.size();
+
+ std::vector<WorkThreadInfo> infos;
+#ifdef USE_PTHREADS
+ std::vector<pthread_t> threads;
+#else
+ std::vector<boost::thread*> threads;
+#endif
+ infos.resize(iThreadCount);
+ threads.resize(iThreadCount);
+
+ for (int i = 0; i < iThreadCount; ++i) {
+ infos[i].m_queue = &queue;
+ infos[i].m_iGPU = iGPUIndices[i];
+#ifdef USE_PTHREADS
+ pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]);
+#else
+ threads[i] = new boost::thread(runEntries_boost, &infos[i]);
+#endif
+ }
+
+ // Wait for them to finish
+ for (int i = 0; i < iThreadCount; ++i) {
+#ifdef USE_PTHREADS
+ pthread_join(threads[i], 0);
+#else
+ threads[i]->join();
+ delete threads[i];
+ threads[i] = 0;
+#endif
+ }
+}
+
+
+void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices)
+{
+ m_GPUIndices = GPUIndices;
+}
+
bool CCompositeGeometryManager::doJobs(TJobList &jobs)
{
ASTRA_DEBUG("CCompositeGeometryManager::doJobs");
@@ -859,140 +1369,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs)
jobset[i->pOutput.get()].push_back(*i);
}
- size_t maxSize = astraCUDA3d::availableGPUMemory();
+ size_t maxSize = m_iMaxSize;
if (maxSize == 0) {
- ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
- maxSize = 1024 * 1024 * 1024;
+ // Get memory from first GPU. Not optimal...
+ if (!m_GPUIndices.empty())
+ astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
+ maxSize = astraCUDA3d::availableGPUMemory();
+ if (maxSize == 0) {
+ ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
+ maxSize = 1024 * 1024 * 1024;
+ } else {
+ ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
+ }
} else {
- ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
+ ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize);
}
maxSize = (maxSize * 9) / 10;
maxSize /= sizeof(float);
int div = 1;
-
- // TODO: Multi-GPU support
+ if (!m_GPUIndices.empty())
+ div = m_GPUIndices.size();
// Split jobs to fit
TJobSet split;
splitJobs(jobset, maxSize, div, split);
jobset.clear();
- // Run jobs
-
- for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) {
-
- CPart* output = iter->first;
- TJobList& L = iter->second;
+ if (m_GPUIndices.size() <= 1) {
- assert(!L.empty());
+ // Run jobs
+ ASTRA_DEBUG("Running single-threaded");
- bool zero = L.begin()->eMode == SJob::MODE_SET;
+ if (!m_GPUIndices.empty())
+ astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
- size_t outx, outy, outz;
- output->getDims(outx, outy, outz);
-
- if (L.begin()->eType == SJob::JOB_NOP) {
- // just zero output?
- if (zero) {
- for (size_t z = 0; z < outz; ++z) {
- for (size_t y = 0; y < outy; ++y) {
- float* ptr = output->pData->getData();
- ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
- ptr += (y + output->subY) * (size_t)output->pData->getWidth();
- ptr += output->subX;
- memset(ptr, 0, sizeof(float) * outx);
- }
- }
- }
- continue;
+ for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) {
+ doJob(iter);
}
+ } else {
- astraCUDA3d::SSubDimensions3D dstdims;
- dstdims.nx = output->pData->getWidth();
- dstdims.pitch = dstdims.nx;
- dstdims.ny = output->pData->getHeight();
- dstdims.nz = output->pData->getDepth();
- dstdims.subnx = outx;
- dstdims.subny = outy;
- dstdims.subnz = outz;
- ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
- dstdims.subx = output->subX;
- dstdims.suby = output->subY;
- dstdims.subz = output->subZ;
- float *dst = output->pData->getData();
-
- astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
- bool ok = outputMem;
-
- for (TJobList::iterator i = L.begin(); i != L.end(); ++i) {
- SJob &j = *i;
-
- assert(j.pInput);
-
- CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector);
- Cuda3DProjectionKernel projKernel = ker3d_default;
- int detectorSuperSampling = 1;
- int voxelSuperSampling = 1;
- if (projector) {
- projKernel = projector->getProjectionKernel();
- detectorSuperSampling = projector->getDetectorSuperSampling();
- voxelSuperSampling = projector->getVoxelSuperSampling();
- }
-
- size_t inx, iny, inz;
- j.pInput->getDims(inx, iny, inz);
- astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
-
- astraCUDA3d::SSubDimensions3D srcdims;
- srcdims.nx = j.pInput->pData->getWidth();
- srcdims.pitch = srcdims.nx;
- srcdims.ny = j.pInput->pData->getHeight();
- srcdims.nz = j.pInput->pData->getDepth();
- srcdims.subnx = inx;
- srcdims.subny = iny;
- srcdims.subnz = inz;
- srcdims.subx = j.pInput->subX;
- srcdims.suby = j.pInput->subY;
- srcdims.subz = j.pInput->subZ;
- const float *src = j.pInput->pData->getDataConst();
-
- ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims);
- if (!ok) ASTRA_ERROR("Error copying input data to GPU");
-
- if (j.eType == SJob::JOB_FP) {
- assert(dynamic_cast<CVolumePart*>(j.pInput.get()));
- assert(dynamic_cast<CProjectionPart*>(j.pOutput.get()));
-
- ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
-
- ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel);
- if (!ok) ASTRA_ERROR("Error performing sub-FP");
- ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
- } else if (j.eType == SJob::JOB_BP) {
- assert(dynamic_cast<CVolumePart*>(j.pOutput.get()));
- assert(dynamic_cast<CProjectionPart*>(j.pInput.get()));
-
- ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
-
- ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling);
- if (!ok) ASTRA_ERROR("Error performing sub-BP");
- ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
- } else {
- assert(false);
- }
+ ASTRA_DEBUG("Running multi-threaded");
- ok = astraCUDA3d::freeGPUMemory(inputMem);
- if (!ok) ASTRA_ERROR("Error freeing GPU memory");
+ WorkQueue wq(split);
- }
+ runWorkQueue(wq, m_GPUIndices);
- ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims);
- if (!ok) ASTRA_ERROR("Error copying output data from GPU");
-
- ok = astraCUDA3d::freeGPUMemory(outputMem);
- if (!ok) ASTRA_ERROR("Error freeing GPU memory");
}
return true;
@@ -1000,6 +1423,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs)
+
+//static
+void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params)
+{
+ delete s_params;
+
+ s_params = new SGPUParams;
+ *s_params = params;
+
+ ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:");
+ std::ostringstream s;
+ s << "GPU indices:";
+ for (unsigned int i = 0; i < params.GPUIndices.size(); ++i)
+ s << " " << params.GPUIndices[i];
+ std::string ss = s.str();
+ ASTRA_DEBUG(ss.c_str());
+ ASTRA_DEBUG("Memory: %llu", params.memory);
+}
+
+
}
#endif
diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp
index 99b4bf4..96b04fb 100644
--- a/src/ConeProjectionGeometry3D.cpp
+++ b/src/ConeProjectionGeometry3D.cpp
@@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ,
// Scale fS to detector plane
fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD );
-
- ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU);
-
}
void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV,