diff options
-rw-r--r-- | build/msvc/gen.py | 7 | ||||
-rw-r--r-- | cuda/3d/mem3d.cu | 19 | ||||
-rw-r--r-- | cuda/3d/mem3d.h | 3 | ||||
-rw-r--r-- | include/astra/CompositeGeometryManager.h | 35 | ||||
-rw-r--r-- | matlab/mex/astra_mex_c.cpp | 45 | ||||
-rw-r--r-- | python/astra/__init__.py | 8 | ||||
-rw-r--r-- | python/astra/astra.py | 4 | ||||
-rw-r--r-- | python/astra/astra_c.pyx | 21 | ||||
-rw-r--r-- | samples/matlab/s020_3d_multiGPU.m | 38 | ||||
-rw-r--r-- | samples/python/s019_experimental_multires.py (renamed from samples/python/s018_experimental_multires.py) | 0 | ||||
-rw-r--r-- | samples/python/s020_3d_multiGPU.py | 57 | ||||
-rw-r--r-- | src/CompositeGeometryManager.cpp | 787 | ||||
-rw-r--r-- | src/ConeProjectionGeometry3D.cpp | 3 |
13 files changed, 834 insertions, 193 deletions
diff --git a/build/msvc/gen.py b/build/msvc/gen.py index 999710f..cc69a62 100644 --- a/build/msvc/gen.py +++ b/build/msvc/gen.py @@ -1,6 +1,8 @@ from __future__ import print_function import sys import os +import codecs +import six vcppguid = "8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942" # C++ project siguid = "2150E333-8FDC-42A3-9474-1A3956D46DE8" # project group @@ -428,7 +430,10 @@ P_astra["files"].sort() projects = [ P_astra, F_astra_mex, P0, P1, P2, P3, P4, P5, P6, P7, P8 ] -bom = "\xef\xbb\xbf" +if six.PY2: + bom = "\xef\xbb\xbf" +else: + bom = codecs.BOM_UTF8.decode("utf-8") class Configuration: def __init__(self, debug, cuda, x64): diff --git a/cuda/3d/mem3d.cu b/cuda/3d/mem3d.cu index 6d81dc0..0320117 100644 --- a/cuda/3d/mem3d.cu +++ b/cuda/3d/mem3d.cu @@ -62,6 +62,25 @@ size_t availableGPUMemory() return free; } +int maxBlockDimension() +{ + int dev; + cudaError_t err = cudaGetDevice(&dev); + if (err != cudaSuccess) { + ASTRA_WARN("Error querying device"); + return 0; + } + + cudaDeviceProp props; + err = cudaGetDeviceProperties(&props, dev); + if (err != cudaSuccess) { + ASTRA_WARN("Error querying device %d properties", dev); + return 0; + } + + return std::min(props.maxTexture3D[0], std::min(props.maxTexture3D[1], props.maxTexture3D[2])); +} + MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero) { SMemHandle3D_internal hnd; diff --git a/cuda/3d/mem3d.h b/cuda/3d/mem3d.h index 82bad19..6fff80b 100644 --- a/cuda/3d/mem3d.h +++ b/cuda/3d/mem3d.h @@ -78,6 +78,7 @@ enum Mem3DZeroMode { }; size_t availableGPUMemory(); +int maxBlockDimension(); MemHandle3D allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, Mem3DZeroMode zero); @@ -87,6 +88,8 @@ bool copyFromGPUMemory(float *dst, MemHandle3D src, const SSubDimensions3D &pos) bool freeGPUMemory(MemHandle3D handle); +bool setGPUIndex(int index); + bool FP(const astra::CProjectionGeometry3D* pProjGeom, MemHandle3D projData, const astra::CVolumeGeometry3D* pVolGeom, MemHandle3D volData, int iDetectorSuperSampling, astra::Cuda3DProjectionKernel projKernel); diff --git a/include/astra/CompositeGeometryManager.h b/include/astra/CompositeGeometryManager.h index 6610151..18dd72f 100644 --- a/include/astra/CompositeGeometryManager.h +++ b/include/astra/CompositeGeometryManager.h @@ -50,9 +50,16 @@ class CProjectionGeometry3D; class CProjector3D; +struct SGPUParams { + std::vector<int> GPUIndices; + size_t memory; +}; + class _AstraExport CCompositeGeometryManager { public: + CCompositeGeometryManager(); + class CPart; typedef std::list<boost::shared_ptr<CPart> > TPartList; class CPart { @@ -72,7 +79,9 @@ public: bool uploadToGPU(); bool downloadFromGPU(/*mode?*/); - virtual TPartList split(size_t maxSize, int div) = 0; + virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0; + virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0; + virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div) = 0; virtual CPart* reduce(const CPart *other) = 0; virtual void getDims(size_t &x, size_t &y, size_t &z) = 0; size_t getSize(); @@ -86,7 +95,9 @@ public: CVolumeGeometry3D* pGeom; - virtual TPartList split(size_t maxSize, int div); + virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div); + virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div); + virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div); virtual CPart* reduce(const CPart *other); virtual void getDims(size_t &x, size_t &y, size_t &z); @@ -100,7 +111,9 @@ public: CProjectionGeometry3D* pGeom; - virtual TPartList split(size_t maxSize, int div); + virtual void splitX(TPartList& out, size_t maxSize, size_t maxDim, int div); + virtual void splitY(TPartList& out, size_t maxSize, size_t maxDim, int div); + virtual void splitZ(TPartList& out, size_t maxSize, size_t maxDim, int div); virtual CPart* reduce(const CPart *other); virtual void getDims(size_t &x, size_t &y, size_t &z); @@ -130,6 +143,13 @@ public: bool doJobs(TJobList &jobs); + SJob createJobFP(CProjector3D *pProjector, + CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData); + SJob createJobBP(CProjector3D *pProjector, + CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData); + // Convenience functions for creating and running a single FP or BP job bool doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, CFloat32ProjectionData3DMemory *pProjData); @@ -139,10 +159,19 @@ public: bool doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); bool doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3DMemory *>& volData, const std::vector<CFloat32ProjectionData3DMemory *>& projData); + void setGPUIndices(const std::vector<int>& GPUIndices); + + static void setGlobalGPUParams(const SGPUParams& params); + protected: bool splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split); + std::vector<int> m_GPUIndices; + size_t m_iMaxSize; + + + static SGPUParams* s_params; }; } diff --git a/matlab/mex/astra_mex_c.cpp b/matlab/mex/astra_mex_c.cpp index d34334c..fdf4f33 100644 --- a/matlab/mex/astra_mex_c.cpp +++ b/matlab/mex/astra_mex_c.cpp @@ -38,6 +38,7 @@ $Id$ #include "astra/Globals.h" #ifdef ASTRA_CUDA #include "../cuda/2d/darthelper.h" +#include "astra/CompositeGeometryManager.h" #endif using namespace std; using namespace astra; @@ -83,12 +84,46 @@ void astra_mex_use_cuda(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs * Set active GPU */ void astra_mex_set_gpu_index(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) -{ +{ #ifdef ASTRA_CUDA - if (nrhs >= 2) { - bool ret = astraCUDA::setGPUIndex((int)mxGetScalar(prhs[1])); - if (!ret) - mexPrintf("Failed to set GPU %d\n", (int)mxGetScalar(prhs[1])); + bool usage = false; + if (nrhs != 2 && nrhs != 4) { + usage = true; + } + + astra::SGPUParams params; + params.memory = 0; + + if (!usage && nrhs >= 4) { + std::string s = mexToString(prhs[2]); + if (s != "memory") { + usage = true; + } else { + params.memory = (size_t)mxGetScalar(prhs[3]); + } + } + + if (!usage && nrhs >= 2) { + int n = mxGetN(prhs[1]) * mxGetM(prhs[1]); + params.GPUIndices.resize(n); + double* pdMatlabData = mxGetPr(prhs[1]); + for (int i = 0; i < n; ++i) + params.GPUIndices[i] = (int)pdMatlabData[i]; + + + astra::CCompositeGeometryManager::setGlobalGPUParams(params); + + + // Set first GPU + if (n >= 1) { + bool ret = astraCUDA::setGPUIndex((int)pdMatlabData[0]); + if (!ret) + mexPrintf("Failed to set GPU %d\n", (int)pdMatlabData[0]); + } + } + + if (usage) { + mexPrintf("Usage: astra_mex('set_gpu_index', index/indices [, 'memory', memory])"); } #endif } diff --git a/python/astra/__init__.py b/python/astra/__init__.py index 10ed74d..515d9a2 100644 --- a/python/astra/__init__.py +++ b/python/astra/__init__.py @@ -39,7 +39,7 @@ from . import log from .optomo import OpTomo import os -try: - astra.set_gpu_index(int(os.environ['ASTRA_GPU_INDEX'])) -except KeyError: - pass + +if 'ASTRA_GPU_INDEX' in os.environ: + L = [ int(x) for x in os.environ['ASTRA_GPU_INDEX'].split(',') ] + astra.set_gpu_index(L) diff --git a/python/astra/astra.py b/python/astra/astra.py index 26b1ff0..9328b6b 100644 --- a/python/astra/astra.py +++ b/python/astra/astra.py @@ -49,10 +49,10 @@ def version(printToScreen=False): """ return a.version(printToScreen) -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): """Set default GPU index to use. :param idx: GPU index :type idx: :class:`int` """ - a.set_gpu_index(idx) + a.set_gpu_index(idx, memory) diff --git a/python/astra/astra_c.pyx b/python/astra/astra_c.pyx index 5075fed..65192b5 100644 --- a/python/astra/astra_c.pyx +++ b/python/astra/astra_c.pyx @@ -31,6 +31,7 @@ import six from .utils import wrap_from_bytes from libcpp.string cimport string +from libcpp.vector cimport vector from libcpp cimport bool cdef extern from "astra/Globals.h" namespace "astra": int getVersion() @@ -43,6 +44,12 @@ IF HAVE_CUDA==True: ELSE: def setGPUIndex(): pass +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra": + cdef cppclass SGPUParams: + vector[int] GPUIndices + size_t memory +cdef extern from "astra/CompositeGeometryManager.h" namespace "astra::CCompositeGeometryManager": + void setGlobalGPUParams(SGPUParams&) def credits(): six.print_("""The ASTRA Toolbox has been developed at the University of Antwerp and CWI, Amsterdam by @@ -70,8 +77,16 @@ def version(printToScreen=False): else: return getVersion() -def set_gpu_index(idx): +def set_gpu_index(idx, memory=0): + import types + import collections + cdef SGPUParams params if use_cuda()==True: - ret = setGPUIndex(idx) + if not isinstance(idx, collections.Iterable) or isinstance(idx, types.StringTypes): + idx = (idx,) + params.memory = memory + params.GPUIndices = idx + setGlobalGPUParams(params) + ret = setGPUIndex(params.GPUIndices[0]) if not ret: - six.print_("Failed to set GPU " + str(idx)) + six.print_("Failed to set GPU " + str(params.GPUIndices[0])) diff --git a/samples/matlab/s020_3d_multiGPU.m b/samples/matlab/s020_3d_multiGPU.m new file mode 100644 index 0000000..bade325 --- /dev/null +++ b/samples/matlab/s020_3d_multiGPU.m @@ -0,0 +1,38 @@ +% ----------------------------------------------------------------------- +% This file is part of the ASTRA Toolbox +% +% Copyright: 2010-2015, iMinds-Vision Lab, University of Antwerp +% 2014-2015, CWI, Amsterdam +% License: Open Source under GPLv3 +% Contact: astra@uantwerpen.be +% Website: http://sf.net/projects/astra-toolbox +% ----------------------------------------------------------------------- + + +% Set up multi-GPU usage. +% This only works for 3D GPU forward projection and back projection. +astra_mex('set_gpu_index', [0 1]); + +% Optionally, you can also restrict the amount of GPU memory ASTRA will use. +% The line commented below sets this to 1GB. +%astra_mex('set_gpu_index', [0 1], 'memory', 1024*1024*1024); + +vol_geom = astra_create_vol_geom(1024, 1024, 1024); + +angles = linspace2(0, pi, 1024); +proj_geom = astra_create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles); + +% Create a simple hollow cube phantom +cube = zeros(1024,1024,1024); +cube(129:896,129:896,129:896) = 1; +cube(257:768,257:768,257:768) = 0; + +% Create projection data from this +[proj_id, proj_data] = astra_create_sino3d_cuda(cube, proj_geom, vol_geom); + +% Backproject projection data +[bproj_id, bproj_data] = astra_create_backprojection3d_cuda(proj_data, proj_geom, vol_geom); + +astra_mex_data3d('delete', proj_id); +astra_mex_data3d('delete', bproj_id); + diff --git a/samples/python/s018_experimental_multires.py b/samples/python/s019_experimental_multires.py index cf38e53..cf38e53 100644 --- a/samples/python/s018_experimental_multires.py +++ b/samples/python/s019_experimental_multires.py diff --git a/samples/python/s020_3d_multiGPU.py b/samples/python/s020_3d_multiGPU.py new file mode 100644 index 0000000..d6799c4 --- /dev/null +++ b/samples/python/s020_3d_multiGPU.py @@ -0,0 +1,57 @@ +#----------------------------------------------------------------------- +#Copyright 2013 Centrum Wiskunde & Informatica, Amsterdam +# +#Author: Daniel M. Pelt +#Contact: D.M.Pelt@cwi.nl +#Website: http://dmpelt.github.io/pyastratoolbox/ +# +# +#This file is part of the Python interface to the +#All Scale Tomographic Reconstruction Antwerp Toolbox ("ASTRA Toolbox"). +# +#The Python interface to the ASTRA Toolbox is free software: you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation, either version 3 of the License, or +#(at your option) any later version. +# +#The Python interface to the ASTRA Toolbox is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +#GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +#along with the Python interface to the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. +# +#----------------------------------------------------------------------- + +import astra +import numpy as np + +# Set up multi-GPU usage. +# This only works for 3D GPU forward projection and back projection. +astra.astra.set_gpu_index([0,1]) + +# Optionally, you can also restrict the amount of GPU memory ASTRA will use. +# The line commented below sets this to 1GB. +#astra.astra.set_gpu_index([0,1], memory=1024*1024*1024) + +vol_geom = astra.create_vol_geom(1024, 1024, 1024) + +angles = np.linspace(0, np.pi, 1024,False) +proj_geom = astra.create_proj_geom('parallel3d', 1.0, 1.0, 1024, 1024, angles) + +# Create a simple hollow cube phantom +cube = np.zeros((1024,1024,1024)) +cube[128:895,128:895,128:895] = 1 +cube[256:767,256:767,256:767] = 0 + +# Create projection data from this +proj_id, proj_data = astra.create_sino3d_gpu(cube, proj_geom, vol_geom) + +# Backproject projection data +bproj_id, bproj_data = astra.create_backprojection3d_gpu(proj_data, proj_geom, vol_geom) + +# Clean up. Note that GPU memory is tied up in the algorithm object, +# and main RAM in the data objects. +astra.data3d.delete(proj_id) +astra.data3d.delete(bproj_id) diff --git a/src/CompositeGeometryManager.cpp b/src/CompositeGeometryManager.cpp index 41f6319..c9cbaaa 100644 --- a/src/CompositeGeometryManager.cpp +++ b/src/CompositeGeometryManager.cpp @@ -44,11 +44,32 @@ along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. #include "../cuda/3d/mem3d.h" #include <cstring> +#include <sstream> +#include <stdint.h> + +#ifndef USE_PTHREADS +#include <boost/thread/mutex.hpp> +#include <boost/thread.hpp> +#endif + namespace astra { +SGPUParams* CCompositeGeometryManager::s_params = 0; + +CCompositeGeometryManager::CCompositeGeometryManager() +{ + m_iMaxSize = 0; + + if (s_params) { + m_iMaxSize = s_params->memory; + m_GPUIndices = s_params->GPUIndices; + } +} + + // JOB: -// +// // VolumePart // ProjectionPart // FP-or-BP @@ -76,9 +97,11 @@ namespace astra { // (First approach: 0.5/0.5) - bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { + int maxBlockDim = astraCUDA3d::maxBlockDimension(); + ASTRA_DEBUG("Found max block dim %d", maxBlockDim); + split.clear(); for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i) @@ -92,7 +115,22 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div // b. split input part // c. create jobs for new (input,output) subparts - TPartList splitOutput = pOutput->split(maxSize/3, div); + TPartList splitOutput; + pOutput->splitZ(splitOutput, maxSize/3, SIZE_MAX, div); +#if 0 + TPartList splitOutput2; + for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) { + boost::shared_ptr<CPart> outputPart = *i_out; + outputPart.get()->splitX(splitOutput2, SIZE_MAX, SIZE_MAX, 1); + } + splitOutput.clear(); + for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) { + boost::shared_ptr<CPart> outputPart = *i_out; + outputPart.get()->splitY(splitOutput, SIZE_MAX, SIZE_MAX, 1); + } + splitOutput2.clear(); +#endif + for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j) { @@ -120,8 +158,21 @@ bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2; - TPartList splitInput = input->split(remainingSize, 1); + TPartList splitInput; + input->splitZ(splitInput, remainingSize, maxBlockDim, 1); delete input; + TPartList splitInput2; + for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) { + boost::shared_ptr<CPart> inputPart = *i_in; + inputPart.get()->splitX(splitInput2, SIZE_MAX, maxBlockDim, 1); + } + splitInput.clear(); + for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) { + boost::shared_ptr<CPart> inputPart = *i_in; + inputPart.get()->splitY(splitInput, SIZE_MAX, maxBlockDim, 1); + } + splitInput2.clear(); + ASTRA_DEBUG("Input split into %d parts", splitInput.size()); for (TPartList::iterator i_in = splitInput.begin(); @@ -305,37 +356,41 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce static size_t ceildiv(size_t a, size_t b) { - return (a + b - 1) / b; + return (a + b - 1) / b; } -static size_t computeVerticalSplit(size_t maxBlock, int div, size_t sliceCount) +static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount) { - size_t blockSize = maxBlock; - size_t blockCount = ceildiv(sliceCount, blockSize); - - // Increase number of blocks to be divisible by div - size_t divCount = div * ceildiv(blockCount, div); - - // If divCount is above sqrt(number of slices), then - // we can't guarantee divisibility by div, but let's try anyway - if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { - blockCount = divCount; - } else { - // If divisibility isn't achievable, we may want to optimize - // differently. - // TODO: Figure out how to model and optimize this. - } + size_t blockSize = maxBlock; + size_t blockCount; + if (sliceCount <= blockSize) + blockCount = 1; + else + blockCount = ceildiv(sliceCount, blockSize); + + // Increase number of blocks to be divisible by div + size_t divCount = div * ceildiv(blockCount, div); + + // If divCount is above sqrt(number of slices), then + // we can't guarantee divisibility by div, but let's try anyway + if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { + blockCount = divCount; + } else { + // If divisibility isn't achievable, we may want to optimize + // differently. + // TODO: Figure out how to model and optimize this. + } - // Final adjustment to make blocks more evenly sized - // (This can't make the blocks larger) - blockSize = ceildiv(sliceCount, blockCount); + // Final adjustment to make blocks more evenly sized + // (This can't make the blocks larger) + blockSize = ceildiv(sliceCount, blockCount); - ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); + ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); - assert(blockSize <= maxBlock); - assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); + assert(blockSize <= maxBlock); + assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); - return blockSize; + return blockSize; } template<class V, class P> @@ -391,7 +446,17 @@ SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* p template<class V> -static void translateProjectionVectors(V* pProjs, int count, double dv) +static void translateProjectionVectorsU(V* pProjs, int count, double du) +{ + for (int i = 0; i < count; ++i) { + pProjs[i].fDetSX += du * pProjs[i].fDetUX; + pProjs[i].fDetSY += du * pProjs[i].fDetUY; + pProjs[i].fDetSZ += du * pProjs[i].fDetUZ; + } +} + +template<class V> +static void translateProjectionVectorsV(V* pProjs, int count, double dv) { for (int i = 0; i < count; ++i) { pProjs[i].fDetSX += dv * pProjs[i].fDetVX; @@ -401,8 +466,58 @@ static void translateProjectionVectors(V* pProjs, int count, double dv) } +static CProjectionGeometry3D* getSubProjectionGeometryU(const CProjectionGeometry3D* pProjGeom, int u, int size) +{ + // First convert to vectors, then translate, then convert into new object + + const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom); + const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); + const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom); + const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom); + + if (conegeom || conevec3dgeom) { + SConeProjection* pConeProjs; + if (conegeom) { + pConeProjs = getProjectionVectors<SConeProjection>(conegeom); + } else { + pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom); + } + + translateProjectionVectorsU(pConeProjs, pProjGeom->getProjectionCount(), u); + + CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(), + pProjGeom->getDetectorRowCount(), + size, + pConeProjs); -static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry3D* pProjGeom, int v, int size) + + delete[] pConeProjs; + return ret; + } else { + assert(par3dgeom || parvec3dgeom); + SPar3DProjection* pParProjs; + if (par3dgeom) { + pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom); + } else { + pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom); + } + + translateProjectionVectorsU(pParProjs, pProjGeom->getProjectionCount(), u); + + CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(), + pProjGeom->getDetectorRowCount(), + size, + pParProjs); + + delete[] pParProjs; + return ret; + } + +} + + + +static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometry3D* pProjGeom, int v, int size) { // First convert to vectors, then translate, then convert into new object @@ -419,7 +534,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom); } - translateProjectionVectors(pConeProjs, pProjGeom->getProjectionCount(), v); + translateProjectionVectorsV(pConeProjs, pProjGeom->getProjectionCount(), v); CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(), size, @@ -438,7 +553,7 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom); } - translateProjectionVectors(pParProjs, pProjGeom->getProjectionCount(), v); + translateProjectionVectorsV(pParProjs, pProjGeom->getProjectionCount(), v); CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(), size, @@ -457,17 +572,110 @@ static CProjectionGeometry3D* getSubProjectionGeometry(const CProjectionGeometry // - each no bigger than maxSize // - number of sub-parts is divisible by div // - maybe all approximately the same size? -CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::split(size_t maxSize, int div) +void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) { - TPartList ret; + if (true) { + // Split in vertical direction only at first, until we figure out + // a model for splitting in other directions + + size_t sliceSize = ((size_t) pGeom->getGridSliceCount()) * pGeom->getGridRowCount(); + int sliceCount = pGeom->getGridColCount(); + size_t m = std::min(maxSize / sliceSize, maxDim); + size_t blockSize = computeLinearSplit(m, div, sliceCount); + + int rem = sliceCount % blockSize; + + ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); + + for (int x = -(rem / 2); x < sliceCount; x += blockSize) { + int newsubX = x; + if (newsubX < 0) newsubX = 0; + int endX = x + blockSize; + if (endX > sliceCount) endX = sliceCount; + int size = endX - newsubX; + + CVolumePart *sub = new CVolumePart(); + sub->subX = this->subX + newsubX; + sub->subY = this->subY; + sub->subZ = this->subZ; + + ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); + + double shift = pGeom->getPixelLengthX() * newsubX; + + sub->pData = pData; + sub->pGeom = new CVolumeGeometry3D(size, + pGeom->getGridRowCount(), + pGeom->getGridSliceCount(), + pGeom->getWindowMinX() + shift, + pGeom->getWindowMinY(), + pGeom->getWindowMinZ(), + pGeom->getWindowMinX() + shift + size * pGeom->getPixelLengthX(), + pGeom->getWindowMaxY(), + pGeom->getWindowMaxZ()); + + out.push_back(boost::shared_ptr<CPart>(sub)); + } + } +} +void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) +{ + if (true) { + // Split in vertical direction only at first, until we figure out + // a model for splitting in other directions + + size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridSliceCount(); + int sliceCount = pGeom->getGridRowCount(); + size_t m = std::min(maxSize / sliceSize, maxDim); + size_t blockSize = computeLinearSplit(m, div, sliceCount); + + int rem = sliceCount % blockSize; + + ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); + + for (int y = -(rem / 2); y < sliceCount; y += blockSize) { + int newsubY = y; + if (newsubY < 0) newsubY = 0; + int endY = y + blockSize; + if (endY > sliceCount) endY = sliceCount; + int size = endY - newsubY; + + CVolumePart *sub = new CVolumePart(); + sub->subX = this->subX; + sub->subY = this->subY + newsubY; + sub->subZ = this->subZ; + + ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); + + double shift = pGeom->getPixelLengthY() * newsubY; + + sub->pData = pData; + sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), + size, + pGeom->getGridSliceCount(), + pGeom->getWindowMinX(), + pGeom->getWindowMinY() + shift, + pGeom->getWindowMinZ(), + pGeom->getWindowMaxX(), + pGeom->getWindowMinY() + shift + size * pGeom->getPixelLengthY(), + pGeom->getWindowMaxZ()); + + out.push_back(boost::shared_ptr<CPart>(sub)); + } + } +} + +void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) +{ if (true) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount(); int sliceCount = pGeom->getGridSliceCount(); - size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount); + size_t m = std::min(maxSize / sliceSize, maxDim); + size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = sliceCount % blockSize; @@ -500,11 +708,9 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CVolumePart::spl pGeom->getWindowMaxY(), pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ()); - ret.push_back(boost::shared_ptr<CPart>(sub)); + out.push_back(boost::shared_ptr<CPart>(sub)); } } - - return ret; } CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const @@ -611,7 +817,7 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re if (_vmin == _vmax) { sub->pGeom = 0; } else { - sub->pGeom = getSubProjectionGeometry(pGeom, _vmin, _vmax - _vmin); + sub->pGeom = getSubProjectionGeometryV(pGeom, _vmin, _vmax - _vmin); } ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax); @@ -620,17 +826,58 @@ CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::re } -CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart::split(size_t maxSize, int div) +void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) { - TPartList ret; + if (true) { + // Split in vertical direction only at first, until we figure out + // a model for splitting in other directions + size_t sliceSize = ((size_t) pGeom->getDetectorRowCount()) * pGeom->getProjectionCount(); + int sliceCount = pGeom->getDetectorColCount(); + size_t m = std::min(maxSize / sliceSize, maxDim); + size_t blockSize = computeLinearSplit(m, div, sliceCount); + + int rem = sliceCount % blockSize; + + for (int x = -(rem / 2); x < sliceCount; x += blockSize) { + int newsubX = x; + if (newsubX < 0) newsubX = 0; + int endX = x + blockSize; + if (endX > sliceCount) endX = sliceCount; + int size = endX - newsubX; + + CProjectionPart *sub = new CProjectionPart(); + sub->subX = this->subX + newsubX; + sub->subY = this->subY; + sub->subZ = this->subZ; + + ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); + + sub->pData = pData; + + sub->pGeom = getSubProjectionGeometryU(pGeom, newsubX, size); + + out.push_back(boost::shared_ptr<CPart>(sub)); + } + } +} + +void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) +{ + // TODO + out.push_back(boost::shared_ptr<CPart>(clone())); +} + +void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) +{ if (true) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount(); int sliceCount = pGeom->getDetectorRowCount(); - size_t blockSize = computeVerticalSplit(maxSize / sliceSize, div, sliceCount); + size_t m = std::min(maxSize / sliceSize, maxDim); + size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = sliceCount % blockSize; @@ -650,14 +897,12 @@ CCompositeGeometryManager::TPartList CCompositeGeometryManager::CProjectionPart: sub->pData = pData; - sub->pGeom = getSubProjectionGeometry(pGeom, newsubZ, size); + sub->pGeom = getSubProjectionGeometryV(pGeom, newsubZ, size); - ret.push_back(boost::shared_ptr<CPart>(sub)); + out.push_back(boost::shared_ptr<CPart>(sub)); } } - return ret; - } CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const @@ -665,13 +910,12 @@ CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjecti return new CProjectionPart(*this); } - -bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, - CFloat32ProjectionData3DMemory *pProjData) +CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector, + CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) { - ASTRA_DEBUG("CCompositeGeometryManager::doFP"); + ASTRA_DEBUG("CCompositeGeometryManager::createJobFP"); // Create single job for FP - // Run result CVolumePart *input = new CVolumePart(); input->pData = pVolData; @@ -696,18 +940,15 @@ bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeDat FP.eType = SJob::JOB_FP; FP.eMode = SJob::MODE_SET; - TJobList L; - L.push_back(FP); - - return doJobs(L); + return FP; } -bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, - CFloat32ProjectionData3DMemory *pProjData) +CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector, + CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) { - ASTRA_DEBUG("CCompositeGeometryManager::doBP"); + ASTRA_DEBUG("CCompositeGeometryManager::createJobBP"); // Create single job for BP - // Run result CProjectionPart *input = new CProjectionPart(); input->pData = pProjData; @@ -730,8 +971,23 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeDat BP.eType = SJob::JOB_BP; BP.eMode = SJob::MODE_SET; + return BP; +} + +bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) +{ + TJobList L; + L.push_back(createJobFP(pProjector, pVolData, pProjData)); + + return doJobs(L); +} + +bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3DMemory *pVolData, + CFloat32ProjectionData3DMemory *pProjData) +{ TJobList L; - L.push_back(BP); + L.push_back(createJobBP(pProjector, pVolData, pProjData)); return doJobs(L); } @@ -848,6 +1104,260 @@ bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector +static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) +{ + CCompositeGeometryManager::CPart* output = iter->first; + const CCompositeGeometryManager::TJobList& L = iter->second; + + assert(!L.empty()); + + bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; + + size_t outx, outy, outz; + output->getDims(outx, outy, outz); + + if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { + // just zero output? + if (zero) { + for (size_t z = 0; z < outz; ++z) { + for (size_t y = 0; y < outy; ++y) { + float* ptr = output->pData->getData(); + ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); + ptr += (y + output->subY) * (size_t)output->pData->getWidth(); + ptr += output->subX; + memset(ptr, 0, sizeof(float) * outx); + } + } + } + return true; + } + + + astraCUDA3d::SSubDimensions3D dstdims; + dstdims.nx = output->pData->getWidth(); + dstdims.pitch = dstdims.nx; + dstdims.ny = output->pData->getHeight(); + dstdims.nz = output->pData->getDepth(); + dstdims.subnx = outx; + dstdims.subny = outy; + dstdims.subnz = outz; + ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); + dstdims.subx = output->subX; + dstdims.suby = output->subY; + dstdims.subz = output->subZ; + float *dst = output->pData->getData(); + + astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); + bool ok = outputMem; + + for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { + const CCompositeGeometryManager::SJob &j = *i; + + assert(j.pInput); + + CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); + Cuda3DProjectionKernel projKernel = ker3d_default; + int detectorSuperSampling = 1; + int voxelSuperSampling = 1; + if (projector) { + projKernel = projector->getProjectionKernel(); + detectorSuperSampling = projector->getDetectorSuperSampling(); + voxelSuperSampling = projector->getVoxelSuperSampling(); + } + + size_t inx, iny, inz; + j.pInput->getDims(inx, iny, inz); + astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); + + astraCUDA3d::SSubDimensions3D srcdims; + srcdims.nx = j.pInput->pData->getWidth(); + srcdims.pitch = srcdims.nx; + srcdims.ny = j.pInput->pData->getHeight(); + srcdims.nz = j.pInput->pData->getDepth(); + srcdims.subnx = inx; + srcdims.subny = iny; + srcdims.subnz = inz; + srcdims.subx = j.pInput->subX; + srcdims.suby = j.pInput->subY; + srcdims.subz = j.pInput->subZ; + const float *src = j.pInput->pData->getDataConst(); + + ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); + if (!ok) ASTRA_ERROR("Error copying input data to GPU"); + + if (j.eType == CCompositeGeometryManager::SJob::JOB_FP) { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); + + ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); + if (!ok) ASTRA_ERROR("Error performing sub-FP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); + } else if (j.eType == CCompositeGeometryManager::SJob::JOB_BP) { + assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); + assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); + + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); + + ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); + if (!ok) ASTRA_ERROR("Error performing sub-BP"); + ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); + } else { + assert(false); + } + + ok = astraCUDA3d::freeGPUMemory(inputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + } + + ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); + if (!ok) ASTRA_ERROR("Error copying output data from GPU"); + + ok = astraCUDA3d::freeGPUMemory(outputMem); + if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + + return true; +} + + +class WorkQueue { +public: + WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { +#ifdef USE_PTHREADS + pthread_mutex_init(&m_mutex, 0); +#endif + m_iter = m_jobs.begin(); + } + bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { + lock(); + + if (m_iter == m_jobs.end()) { + unlock(); + return false; + } + + i = m_iter++; + + unlock(); + + return true; + } +#ifdef USE_PTHREADS + void lock() { + // TODO: check mutex op return values + pthread_mutex_lock(&m_mutex); + } + void unlock() { + // TODO: check mutex op return values + pthread_mutex_unlock(&m_mutex); + } +#else + void lock() { + m_mutex.lock(); + } + void unlock() { + m_mutex.unlock(); + } +#endif + +private: + CCompositeGeometryManager::TJobSet &m_jobs; + CCompositeGeometryManager::TJobSet::const_iterator m_iter; +#ifdef USE_PTHREADS + pthread_mutex_t m_mutex; +#else + boost::mutex m_mutex; +#endif +}; + +struct WorkThreadInfo { + WorkQueue* m_queue; + unsigned int m_iGPU; +}; + +#ifndef USE_PTHREADS + +void runEntries_boost(WorkThreadInfo* info) +{ + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + CCompositeGeometryManager::TJobSet::const_iterator i; + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + boost::this_thread::interruption_point(); + doJob(i); + boost::this_thread::interruption_point(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); +} + + +#else + +void* runEntries_pthreads(void* data) { + WorkThreadInfo* info = (WorkThreadInfo*)data; + + ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); + + CCompositeGeometryManager::TJobSet::const_iterator i; + + while (info->m_queue->receive(i)) { + ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); + astraCUDA3d::setGPUIndex(info->m_iGPU); + pthread_testcancel(); + doJob(i); + pthread_testcancel(); + } + ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); + + return 0; +} + +#endif + + +void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) { + int iThreadCount = iGPUIndices.size(); + + std::vector<WorkThreadInfo> infos; +#ifdef USE_PTHREADS + std::vector<pthread_t> threads; +#else + std::vector<boost::thread*> threads; +#endif + infos.resize(iThreadCount); + threads.resize(iThreadCount); + + for (int i = 0; i < iThreadCount; ++i) { + infos[i].m_queue = &queue; + infos[i].m_iGPU = iGPUIndices[i]; +#ifdef USE_PTHREADS + pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); +#else + threads[i] = new boost::thread(runEntries_boost, &infos[i]); +#endif + } + + // Wait for them to finish + for (int i = 0; i < iThreadCount; ++i) { +#ifdef USE_PTHREADS + pthread_join(threads[i], 0); +#else + threads[i]->join(); + delete threads[i]; + threads[i] = 0; +#endif + } +} + + +void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices) +{ + m_GPUIndices = GPUIndices; +} + bool CCompositeGeometryManager::doJobs(TJobList &jobs) { ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); @@ -859,140 +1369,53 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) jobset[i->pOutput.get()].push_back(*i); } - size_t maxSize = astraCUDA3d::availableGPUMemory(); + size_t maxSize = m_iMaxSize; if (maxSize == 0) { - ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); - maxSize = 1024 * 1024 * 1024; + // Get memory from first GPU. Not optimal... + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); + maxSize = astraCUDA3d::availableGPUMemory(); + if (maxSize == 0) { + ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); + maxSize = 1024 * 1024 * 1024; + } else { + ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + } } else { - ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); + ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize); } maxSize = (maxSize * 9) / 10; maxSize /= sizeof(float); int div = 1; - - // TODO: Multi-GPU support + if (!m_GPUIndices.empty()) + div = m_GPUIndices.size(); // Split jobs to fit TJobSet split; splitJobs(jobset, maxSize, div, split); jobset.clear(); - // Run jobs - - for (TJobSet::iterator iter = split.begin(); iter != split.end(); ++iter) { - - CPart* output = iter->first; - TJobList& L = iter->second; + if (m_GPUIndices.size() <= 1) { - assert(!L.empty()); + // Run jobs + ASTRA_DEBUG("Running single-threaded"); - bool zero = L.begin()->eMode == SJob::MODE_SET; + if (!m_GPUIndices.empty()) + astraCUDA3d::setGPUIndex(m_GPUIndices[0]); - size_t outx, outy, outz; - output->getDims(outx, outy, outz); - - if (L.begin()->eType == SJob::JOB_NOP) { - // just zero output? - if (zero) { - for (size_t z = 0; z < outz; ++z) { - for (size_t y = 0; y < outy; ++y) { - float* ptr = output->pData->getData(); - ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); - ptr += (y + output->subY) * (size_t)output->pData->getWidth(); - ptr += output->subX; - memset(ptr, 0, sizeof(float) * outx); - } - } - } - continue; + for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { + doJob(iter); } + } else { - astraCUDA3d::SSubDimensions3D dstdims; - dstdims.nx = output->pData->getWidth(); - dstdims.pitch = dstdims.nx; - dstdims.ny = output->pData->getHeight(); - dstdims.nz = output->pData->getDepth(); - dstdims.subnx = outx; - dstdims.subny = outy; - dstdims.subnz = outz; - ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); - dstdims.subx = output->subX; - dstdims.suby = output->subY; - dstdims.subz = output->subZ; - float *dst = output->pData->getData(); - - astraCUDA3d::MemHandle3D outputMem = astraCUDA3d::allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); - bool ok = outputMem; - - for (TJobList::iterator i = L.begin(); i != L.end(); ++i) { - SJob &j = *i; - - assert(j.pInput); - - CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); - Cuda3DProjectionKernel projKernel = ker3d_default; - int detectorSuperSampling = 1; - int voxelSuperSampling = 1; - if (projector) { - projKernel = projector->getProjectionKernel(); - detectorSuperSampling = projector->getDetectorSuperSampling(); - voxelSuperSampling = projector->getVoxelSuperSampling(); - } - - size_t inx, iny, inz; - j.pInput->getDims(inx, iny, inz); - astraCUDA3d::MemHandle3D inputMem = astraCUDA3d::allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); - - astraCUDA3d::SSubDimensions3D srcdims; - srcdims.nx = j.pInput->pData->getWidth(); - srcdims.pitch = srcdims.nx; - srcdims.ny = j.pInput->pData->getHeight(); - srcdims.nz = j.pInput->pData->getDepth(); - srcdims.subnx = inx; - srcdims.subny = iny; - srcdims.subnz = inz; - srcdims.subx = j.pInput->subX; - srcdims.suby = j.pInput->subY; - srcdims.subz = j.pInput->subZ; - const float *src = j.pInput->pData->getDataConst(); - - ok = astraCUDA3d::copyToGPUMemory(src, inputMem, srcdims); - if (!ok) ASTRA_ERROR("Error copying input data to GPU"); - - if (j.eType == SJob::JOB_FP) { - assert(dynamic_cast<CVolumePart*>(j.pInput.get())); - assert(dynamic_cast<CProjectionPart*>(j.pOutput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); - - ok = astraCUDA3d::FP(((CProjectionPart*)j.pOutput.get())->pGeom, outputMem, ((CVolumePart*)j.pInput.get())->pGeom, inputMem, detectorSuperSampling, projKernel); - if (!ok) ASTRA_ERROR("Error performing sub-FP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); - } else if (j.eType == SJob::JOB_BP) { - assert(dynamic_cast<CVolumePart*>(j.pOutput.get())); - assert(dynamic_cast<CProjectionPart*>(j.pInput.get())); - - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); - - ok = astraCUDA3d::BP(((CProjectionPart*)j.pInput.get())->pGeom, inputMem, ((CVolumePart*)j.pOutput.get())->pGeom, outputMem, voxelSuperSampling); - if (!ok) ASTRA_ERROR("Error performing sub-BP"); - ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); - } else { - assert(false); - } + ASTRA_DEBUG("Running multi-threaded"); - ok = astraCUDA3d::freeGPUMemory(inputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); + WorkQueue wq(split); - } + runWorkQueue(wq, m_GPUIndices); - ok = astraCUDA3d::copyFromGPUMemory(dst, outputMem, dstdims); - if (!ok) ASTRA_ERROR("Error copying output data from GPU"); - - ok = astraCUDA3d::freeGPUMemory(outputMem); - if (!ok) ASTRA_ERROR("Error freeing GPU memory"); } return true; @@ -1000,6 +1423,26 @@ bool CCompositeGeometryManager::doJobs(TJobList &jobs) + +//static +void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) +{ + delete s_params; + + s_params = new SGPUParams; + *s_params = params; + + ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); + std::ostringstream s; + s << "GPU indices:"; + for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) + s << " " << params.GPUIndices[i]; + std::string ss = s.str(); + ASTRA_DEBUG(ss.c_str()); + ASTRA_DEBUG("Memory: %llu", params.memory); +} + + } #endif diff --git a/src/ConeProjectionGeometry3D.cpp b/src/ConeProjectionGeometry3D.cpp index 99b4bf4..96b04fb 100644 --- a/src/ConeProjectionGeometry3D.cpp +++ b/src/ConeProjectionGeometry3D.cpp @@ -256,9 +256,6 @@ void CConeProjectionGeometry3D::projectPoint(double fX, double fY, double fZ, // Scale fS to detector plane fU = detectorOffsetXToColIndexFloat( (fS * (m_fOriginSourceDistance + m_fOriginDetectorDistance)) / fD ); - - ASTRA_DEBUG("alpha: %f, D: %f, V: %f, S: %f, U: %f", alpha, fD, fV, fS, fU); - } void CConeProjectionGeometry3D::backprojectPointX(int iAngleIndex, double fU, double fV, |