/* ----------------------------------------------------------------------- Copyright: 2010-2016, iMinds-Vision Lab, University of Antwerp 2014-2016, CWI, Amsterdam Contact: astra@uantwerpen.be Website: http://www.astra-toolbox.com/ This file is part of the ASTRA Toolbox. The ASTRA Toolbox is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. The ASTRA Toolbox is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with the ASTRA Toolbox. If not, see <http://www.gnu.org/licenses/>. ----------------------------------------------------------------------- */ #include "astra/CompositeGeometryManager.h" #ifdef ASTRA_CUDA #include "astra/GeometryUtil3D.h" #include "astra/VolumeGeometry3D.h" #include "astra/ConeProjectionGeometry3D.h" #include "astra/ConeVecProjectionGeometry3D.h" #include "astra/ParallelProjectionGeometry3D.h" #include "astra/ParallelVecProjectionGeometry3D.h" #include "astra/Projector3D.h" #include "astra/CudaProjector3D.h" #include "astra/Float32ProjectionData3DMemory.h" #include "astra/Float32VolumeData3DMemory.h" #include "astra/Float32ProjectionData3DGPU.h" #include "astra/Float32VolumeData3DGPU.h" #include "astra/Logging.h" #include "../cuda/3d/mem3d.h" #include <cstring> #include <sstream> #include <climits> #ifndef USE_PTHREADS #include <boost/thread/mutex.hpp> #include <boost/thread.hpp> #endif namespace astra { SGPUParams* CCompositeGeometryManager::s_params = 0; CCompositeGeometryManager::CCompositeGeometryManager() { m_iMaxSize = 0; if (s_params) { m_iMaxSize = s_params->memory; m_GPUIndices = s_params->GPUIndices; } } // JOB: // // VolumePart // ProjectionPart // FP-or-BP // SET-or-ADD // Running a set of jobs: // // [ Assume OUTPUT Parts in a single JobSet don't alias?? ] // Group jobs by output Part // One thread per group? // Automatically split parts if too large // Performance model for odd-sized tasks? // Automatically split parts if not enough tasks to fill available GPUs // Splitting: // Constraints: // number of sub-parts divisible by N // max size of sub-parts // For splitting on both input and output side: // How to divide up memory? (Optimization problem; compute/benchmark) // (First approach: 0.5/0.5) class _AstraExport CFloat32CustomGPUMemory { public: astraCUDA3d::MemHandle3D hnd; // Only required to be valid between allocate/free virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero)=0; virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0; virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0; virtual bool freeGPUMemory()=0; virtual ~CFloat32CustomGPUMemory() { } }; class CFloat32ExistingGPUMemory : public astra::CFloat32CustomGPUMemory { public: CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d); virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero); virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos); virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos); virtual bool freeGPUMemory(); protected: unsigned int x, y, z; }; class CFloat32DefaultGPUMemory : public astra::CFloat32CustomGPUMemory { public: CFloat32DefaultGPUMemory(CFloat32Data3DMemory* d) { ptr = d->getData(); } virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero) { hnd = astraCUDA3d::allocateGPUMemory(x, y, z, zero); return (bool)hnd; } virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) { return astraCUDA3d::copyToGPUMemory(ptr, hnd, pos); } virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) { return astraCUDA3d::copyFromGPUMemory(ptr, hnd, pos); } virtual bool freeGPUMemory() { return astraCUDA3d::freeGPUMemory(hnd); } protected: float *ptr; }; CFloat32ExistingGPUMemory::CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d) { hnd = d->getHandle(); x = d->getWidth(); y = d->getHeight(); z = d->getDepth(); } bool CFloat32ExistingGPUMemory::allocateGPUMemory(unsigned int x_, unsigned int y_, unsigned int z_, astraCUDA3d::Mem3DZeroMode zero) { assert(x_ == x); assert(y_ == y); assert(z_ == z); if (zero == astraCUDA3d::INIT_ZERO) return astraCUDA3d::zeroGPUMemory(hnd, x, y, z); else return true; } bool CFloat32ExistingGPUMemory::copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) { assert(pos.nx == x); assert(pos.ny == y); assert(pos.nz == z); assert(pos.pitch == x); assert(pos.subx == 0); assert(pos.suby == 0); assert(pos.subnx == x); assert(pos.subny == y); // These are less necessary than x/y, but allowing access to // subvolumes needs an interface change assert(pos.subz == 0); assert(pos.subnz == z); return true; } bool CFloat32ExistingGPUMemory::copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) { assert(pos.nx == x); assert(pos.ny == y); assert(pos.nz == z); assert(pos.pitch == x); assert(pos.subx == 0); assert(pos.suby == 0); assert(pos.subnx == x); assert(pos.subny == y); // These are less necessary than x/y, but allowing access to // subvolumes needs an interface change assert(pos.subz == 0); assert(pos.subnz == z); return true; } bool CFloat32ExistingGPUMemory::freeGPUMemory() { return true; } CFloat32CustomGPUMemory * createGPUMemoryHandler(CFloat32Data3D *d) { CFloat32Data3DMemory *dMem = dynamic_cast<CFloat32Data3DMemory*>(d); CFloat32Data3DGPU *dGPU = dynamic_cast<CFloat32Data3DGPU*>(d); if (dMem) return new CFloat32DefaultGPUMemory(dMem); else return new CFloat32ExistingGPUMemory(dGPU); } bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split) { int maxBlockDim = astraCUDA3d::maxBlockDimension(); ASTRA_DEBUG("Found max block dim %d", maxBlockDim); split.clear(); for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i) { CPart* pOutput = i->first; const TJobList &L = i->second; // 1. Split output part // 2. Per sub-part: // a. reduce input part // b. split input part // c. create jobs for new (input,output) subparts TPartList splitOutput; pOutput->splitZ(splitOutput, maxSize/3, UINT_MAX, div); #if 0 TPartList splitOutput2; for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) { boost::shared_ptr<CPart> outputPart = *i_out; outputPart.get()->splitX(splitOutput2, UINT_MAX, UINT_MAX, 1); } splitOutput.clear(); for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) { boost::shared_ptr<CPart> outputPart = *i_out; outputPart.get()->splitY(splitOutput, UINT_MAX, UINT_MAX, 1); } splitOutput2.clear(); #endif for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j) { const SJob &job = *j; for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) { boost::shared_ptr<CPart> outputPart = *i_out; SJob newjob; newjob.pOutput = outputPart; newjob.eType = j->eType; newjob.eMode = j->eMode; newjob.pProjector = j->pProjector; newjob.FDKSettings = j->FDKSettings; CPart* input = job.pInput->reduce(outputPart.get()); if (input->getSize() == 0) { ASTRA_DEBUG("Empty input"); newjob.eType = SJob::JOB_NOP; split[outputPart.get()].push_back(newjob); continue; } size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2; TPartList splitInput; input->splitZ(splitInput, remainingSize, maxBlockDim, 1); delete input; TPartList splitInput2; for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) { boost::shared_ptr<CPart> inputPart = *i_in; inputPart.get()->splitX(splitInput2, UINT_MAX, maxBlockDim, 1); } splitInput.clear(); for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) { boost::shared_ptr<CPart> inputPart = *i_in; inputPart.get()->splitY(splitInput, UINT_MAX, maxBlockDim, 1); } splitInput2.clear(); ASTRA_DEBUG("Input split into %d parts", splitInput.size()); for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) { newjob.pInput = *i_in; split[outputPart.get()].push_back(newjob); // Second and later (input) parts should always be added to // output of first (input) part. newjob.eMode = SJob::MODE_ADD; } } } } return true; } static std::pair<double, double> reduceProjectionVertical(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom) { double vmin_g, vmax_g; // reduce self to only cover intersection with projection of VolumePart // (Project corners of volume, take bounding box) assert(pProjGeom->getProjectionCount() > 0); for (int i = 0; i < pProjGeom->getProjectionCount(); ++i) { double vol_u[8]; double vol_v[8]; double pixx = pVolGeom->getPixelLengthX(); double pixy = pVolGeom->getPixelLengthY(); double pixz = pVolGeom->getPixelLengthZ(); // TODO: Is 0.5 sufficient? double xmin = pVolGeom->getWindowMinX() - 0.5 * pixx; double xmax = pVolGeom->getWindowMaxX() + 0.5 * pixx; double ymin = pVolGeom->getWindowMinY() - 0.5 * pixy; double ymax = pVolGeom->getWindowMaxY() + 0.5 * pixy; double zmin = pVolGeom->getWindowMinZ() - 0.5 * pixz; double zmax = pVolGeom->getWindowMaxZ() + 0.5 * pixz; pProjGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]); pProjGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]); pProjGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]); pProjGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]); pProjGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]); pProjGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]); pProjGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]); pProjGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]); double vmin = vol_v[0]; double vmax = vol_v[0]; for (int j = 1; j < 8; ++j) { if (vol_v[j] < vmin) vmin = vol_v[j]; if (vol_v[j] > vmax) vmax = vol_v[j]; } if (i == 0 || vmin < vmin_g) vmin_g = vmin; if (i == 0 || vmax > vmax_g) vmax_g = vmax; } if (vmin_g < -1.0) vmin_g = -1.0; if (vmax_g > pProjGeom->getDetectorRowCount()) vmax_g = pProjGeom->getDetectorRowCount(); if (vmin_g >= vmax_g) vmin_g = vmax_g = 0.0; return std::pair<double, double>(vmin_g, vmax_g); } CCompositeGeometryManager::CPart::CPart(const CPart& other) { eType = other.eType; pData = other.pData; subX = other.subX; subY = other.subY; subZ = other.subZ; } CCompositeGeometryManager::CVolumePart::CVolumePart(const CVolumePart& other) : CPart(other) { pGeom = other.pGeom->clone(); } CCompositeGeometryManager::CVolumePart::~CVolumePart() { delete pGeom; } void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z) const { if (!pGeom) { x = y = z = 0; return; } x = pGeom->getGridColCount(); y = pGeom->getGridRowCount(); z = pGeom->getGridSliceCount(); } size_t CCompositeGeometryManager::CPart::getSize() const { size_t x, y, z; getDims(x, y, z); return x * y * z; } bool CCompositeGeometryManager::CPart::isFull() const { size_t x, y, z; getDims(x, y, z); return x == (size_t)pData->getWidth() && y == (size_t)pData->getHeight() && z == (size_t)pData->getDepth(); } bool CCompositeGeometryManager::CPart::canSplitAndReduce() const { return dynamic_cast<CFloat32Data3DMemory *>(pData) != 0; } static bool testVolumeRange(const std::pair<double, double>& fullRange, const CVolumeGeometry3D *pVolGeom, const CProjectionGeometry3D *pProjGeom, int zmin, int zmax) { double pixz = pVolGeom->getPixelLengthZ(); CVolumeGeometry3D test(pVolGeom->getGridColCount(), pVolGeom->getGridRowCount(), zmax - zmin, pVolGeom->getWindowMinX(), pVolGeom->getWindowMinY(), pVolGeom->getWindowMinZ() + zmin * pixz, pVolGeom->getWindowMaxX(), pVolGeom->getWindowMaxY(), pVolGeom->getWindowMinZ() + zmax * pixz); std::pair<double, double> subRange = reduceProjectionVertical(&test, pProjGeom); // empty if (subRange.first == subRange.second) return true; // fully outside of fullRange if (subRange.first >= fullRange.second || subRange.second <= fullRange.first) return true; return false; } CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other) { if (!canSplitAndReduce()) return clone(); const CProjectionPart *other = dynamic_cast<const CProjectionPart *>(_other); assert(other); std::pair<double, double> fullRange = reduceProjectionVertical(pGeom, other->pGeom); int top_slice = 0, bottom_slice = 0; if (fullRange.first < fullRange.second) { // TOP SLICE int zmin = 0; int zmax = pGeom->getGridSliceCount()-1; // (Don't try empty region) // Setting top slice to zmin is always valid. while (zmin < zmax) { int zmid = (zmin + zmax + 1) / 2; bool ok = testVolumeRange(fullRange, pGeom, other->pGeom, 0, zmid); ASTRA_DEBUG("binsearch min: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much"); if (ok) zmin = zmid; else zmax = zmid - 1; } top_slice = zmin; // BOTTOM SLICE zmin = top_slice + 1; // (Don't try empty region) zmax = pGeom->getGridSliceCount(); // Setting bottom slice to zmax is always valid while (zmin < zmax) { int zmid = (zmin + zmax) / 2; bool ok = testVolumeRange(fullRange, pGeom, other->pGeom, zmid, pGeom->getGridSliceCount()); ASTRA_DEBUG("binsearch max: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much"); if (ok) zmax = zmid; else zmin = zmid + 1; } bottom_slice = zmax; } ASTRA_DEBUG("found extent: %d - %d", top_slice, bottom_slice); top_slice -= 1; if (top_slice < 0) top_slice = 0; bottom_slice += 1; if (bottom_slice >= pGeom->getGridSliceCount()) bottom_slice = pGeom->getGridSliceCount(); ASTRA_DEBUG("adjusted extent: %d - %d", top_slice, bottom_slice); double pixz = pGeom->getPixelLengthZ(); CVolumePart *sub = new CVolumePart(); sub->subX = this->subX; sub->subY = this->subY; sub->subZ = this->subZ + top_slice; sub->pData = pData; if (top_slice == bottom_slice) { sub->pGeom = 0; } else { sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), pGeom->getGridRowCount(), bottom_slice - top_slice, pGeom->getWindowMinX(), pGeom->getWindowMinY(), pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMaxX(), pGeom->getWindowMaxY(), pGeom->getWindowMinZ() + bottom_slice * pixz); } ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d ( %f - %f )", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + top_slice, this->subZ + bottom_slice, pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMinZ() + bottom_slice * pixz); return sub; } static size_t ceildiv(size_t a, size_t b) { return (a + b - 1) / b; } static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount) { size_t blockSize = maxBlock; size_t blockCount; if (sliceCount <= blockSize) blockCount = 1; else blockCount = ceildiv(sliceCount, blockSize); // Increase number of blocks to be divisible by div size_t divCount = div * ceildiv(blockCount, div); // If divCount is above sqrt(number of slices), then // we can't guarantee divisibility by div, but let's try anyway if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) { blockCount = divCount; } else { // If divisibility isn't achievable, we may want to optimize // differently. // TODO: Figure out how to model and optimize this. } // Final adjustment to make blocks more evenly sized // (This can't make the blocks larger) blockSize = ceildiv(sliceCount, blockCount); ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize); assert(blockSize <= maxBlock); assert((divCount * divCount > sliceCount) || (blockCount % div) == 0); return blockSize; } template<class V, class P> static V* getProjectionVectors(const P* geom); template<> SConeProjection* getProjectionVectors(const CConeProjectionGeometry3D* pProjGeom) { return genConeProjections(pProjGeom->getProjectionCount(), pProjGeom->getDetectorColCount(), pProjGeom->getDetectorRowCount(), pProjGeom->getOriginSourceDistance(), pProjGeom->getOriginDetectorDistance(), pProjGeom->getDetectorSpacingX(), pProjGeom->getDetectorSpacingY(), pProjGeom->getProjectionAngles()); } template<> SConeProjection* getProjectionVectors(const CConeVecProjectionGeometry3D* pProjGeom) { int nth = pProjGeom->getProjectionCount(); SConeProjection* pProjs = new SConeProjection[nth]; for (int i = 0; i < nth; ++i) pProjs[i] = pProjGeom->getProjectionVectors()[i]; return pProjs; } template<> SPar3DProjection* getProjectionVectors(const CParallelProjectionGeometry3D* pProjGeom) { return genPar3DProjections(pProjGeom->getProjectionCount(), pProjGeom->getDetectorColCount(), pProjGeom->getDetectorRowCount(), pProjGeom->getDetectorSpacingX(), pProjGeom->getDetectorSpacingY(), pProjGeom->getProjectionAngles()); } template<> SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* pProjGeom) { int nth = pProjGeom->getProjectionCount(); SPar3DProjection* pProjs = new SPar3DProjection[nth]; for (int i = 0; i < nth; ++i) pProjs[i] = pProjGeom->getProjectionVectors()[i]; return pProjs; } template<class V> static void translateProjectionVectorsU(V* pProjs, int count, double du) { for (int i = 0; i < count; ++i) { pProjs[i].fDetSX += du * pProjs[i].fDetUX; pProjs[i].fDetSY += du * pProjs[i].fDetUY; pProjs[i].fDetSZ += du * pProjs[i].fDetUZ; } } template<class V> static void translateProjectionVectorsV(V* pProjs, int count, double dv) { for (int i = 0; i < count; ++i) { pProjs[i].fDetSX += dv * pProjs[i].fDetVX; pProjs[i].fDetSY += dv * pProjs[i].fDetVY; pProjs[i].fDetSZ += dv * pProjs[i].fDetVZ; } } static CProjectionGeometry3D* getSubProjectionGeometryU(const CProjectionGeometry3D* pProjGeom, int u, int size) { // First convert to vectors, then translate, then convert into new object const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom); const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom); const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom); if (conegeom || conevec3dgeom) { SConeProjection* pConeProjs; if (conegeom) { pConeProjs = getProjectionVectors<SConeProjection>(conegeom); } else { pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom); } translateProjectionVectorsU(pConeProjs, pProjGeom->getProjectionCount(), u); CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(), pProjGeom->getDetectorRowCount(), size, pConeProjs); delete[] pConeProjs; return ret; } else { assert(par3dgeom || parvec3dgeom); SPar3DProjection* pParProjs; if (par3dgeom) { pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom); } else { pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom); } translateProjectionVectorsU(pParProjs, pProjGeom->getProjectionCount(), u); CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(), pProjGeom->getDetectorRowCount(), size, pParProjs); delete[] pParProjs; return ret; } } static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometry3D* pProjGeom, int v, int size) { // First convert to vectors, then translate, then convert into new object const CConeProjectionGeometry3D* conegeom = dynamic_cast<const CConeProjectionGeometry3D*>(pProjGeom); const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast<const CParallelProjectionGeometry3D*>(pProjGeom); const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast<const CParallelVecProjectionGeometry3D*>(pProjGeom); const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast<const CConeVecProjectionGeometry3D*>(pProjGeom); if (conegeom || conevec3dgeom) { SConeProjection* pConeProjs; if (conegeom) { pConeProjs = getProjectionVectors<SConeProjection>(conegeom); } else { pConeProjs = getProjectionVectors<SConeProjection>(conevec3dgeom); } translateProjectionVectorsV(pConeProjs, pProjGeom->getProjectionCount(), v); CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(), size, pProjGeom->getDetectorColCount(), pConeProjs); delete[] pConeProjs; return ret; } else { assert(par3dgeom || parvec3dgeom); SPar3DProjection* pParProjs; if (par3dgeom) { pParProjs = getProjectionVectors<SPar3DProjection>(par3dgeom); } else { pParProjs = getProjectionVectors<SPar3DProjection>(parvec3dgeom); } translateProjectionVectorsV(pParProjs, pProjGeom->getProjectionCount(), v); CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(), size, pProjGeom->getDetectorColCount(), pParProjs); delete[] pParProjs; return ret; } } // split self into sub-parts: // - each no bigger than maxSize // - number of sub-parts is divisible by div // - maybe all approximately the same size? void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) { if (canSplitAndReduce()) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getGridSliceCount()) * pGeom->getGridRowCount(); int sliceCount = pGeom->getGridColCount(); size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = blockSize - (sliceCount % blockSize); if ((size_t)rem == blockSize) rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int x = -(rem / 2); x < sliceCount; x += blockSize) { int newsubX = x; if (newsubX < 0) newsubX = 0; int endX = x + blockSize; if (endX > sliceCount) endX = sliceCount; int size = endX - newsubX; CVolumePart *sub = new CVolumePart(); sub->subX = this->subX + newsubX; sub->subY = this->subY; sub->subZ = this->subZ; ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); double shift = pGeom->getPixelLengthX() * newsubX; sub->pData = pData; sub->pGeom = new CVolumeGeometry3D(size, pGeom->getGridRowCount(), pGeom->getGridSliceCount(), pGeom->getWindowMinX() + shift, pGeom->getWindowMinY(), pGeom->getWindowMinZ(), pGeom->getWindowMinX() + shift + size * pGeom->getPixelLengthX(), pGeom->getWindowMaxY(), pGeom->getWindowMaxZ()); out.push_back(boost::shared_ptr<CPart>(sub)); } } else { out.push_back(boost::shared_ptr<CPart>(clone())); } } void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) { if (canSplitAndReduce()) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridSliceCount(); int sliceCount = pGeom->getGridRowCount(); size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = blockSize - (sliceCount % blockSize); if ((size_t)rem == blockSize) rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int y = -(rem / 2); y < sliceCount; y += blockSize) { int newsubY = y; if (newsubY < 0) newsubY = 0; int endY = y + blockSize; if (endY > sliceCount) endY = sliceCount; int size = endY - newsubY; CVolumePart *sub = new CVolumePart(); sub->subX = this->subX; sub->subY = this->subY + newsubY; sub->subZ = this->subZ; ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); double shift = pGeom->getPixelLengthY() * newsubY; sub->pData = pData; sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), size, pGeom->getGridSliceCount(), pGeom->getWindowMinX(), pGeom->getWindowMinY() + shift, pGeom->getWindowMinZ(), pGeom->getWindowMaxX(), pGeom->getWindowMinY() + shift + size * pGeom->getPixelLengthY(), pGeom->getWindowMaxZ()); out.push_back(boost::shared_ptr<CPart>(sub)); } } else { out.push_back(boost::shared_ptr<CPart>(clone())); } } void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div) { if (canSplitAndReduce()) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount(); int sliceCount = pGeom->getGridSliceCount(); size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = blockSize - (sliceCount % blockSize); if ((size_t)rem == blockSize) rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int z = -(rem / 2); z < sliceCount; z += blockSize) { int newsubZ = z; if (newsubZ < 0) newsubZ = 0; int endZ = z + blockSize; if (endZ > sliceCount) endZ = sliceCount; int size = endZ - newsubZ; CVolumePart *sub = new CVolumePart(); sub->subX = this->subX; sub->subY = this->subY; sub->subZ = this->subZ + newsubZ; ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); double shift = pGeom->getPixelLengthZ() * newsubZ; sub->pData = pData; sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(), pGeom->getGridRowCount(), size, pGeom->getWindowMinX(), pGeom->getWindowMinY(), pGeom->getWindowMinZ() + shift, pGeom->getWindowMaxX(), pGeom->getWindowMaxY(), pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ()); out.push_back(boost::shared_ptr<CPart>(sub)); } } else { out.push_back(boost::shared_ptr<CPart>(clone())); } } CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const { return new CVolumePart(*this); } CCompositeGeometryManager::CProjectionPart::CProjectionPart(const CProjectionPart& other) : CPart(other) { pGeom = other.pGeom->clone(); } CCompositeGeometryManager::CProjectionPart::~CProjectionPart() { delete pGeom; } void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z) const { if (!pGeom) { x = y = z = 0; return; } x = pGeom->getDetectorColCount(); y = pGeom->getProjectionCount(); z = pGeom->getDetectorRowCount(); } CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other) { if (!canSplitAndReduce()) return clone(); const CVolumePart *other = dynamic_cast<const CVolumePart *>(_other); assert(other); std::pair<double, double> r = reduceProjectionVertical(other->pGeom, pGeom); // fprintf(stderr, "v extent: %f %f\n", r.first, r.second); int _vmin = (int)floor(r.first - 1.0); int _vmax = (int)ceil(r.second + 1.0); if (_vmin < 0) _vmin = 0; if (_vmax > pGeom->getDetectorRowCount()) _vmax = pGeom->getDetectorRowCount(); if (_vmin >= _vmax) { _vmin = _vmax = 0; } CProjectionPart *sub = new CProjectionPart(); sub->subX = this->subX; sub->subY = this->subY; sub->subZ = this->subZ + _vmin; sub->pData = pData; if (_vmin == _vmax) { sub->pGeom = 0; } else { sub->pGeom = getSubProjectionGeometryV(pGeom, _vmin, _vmax - _vmin); } ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax); return sub; } void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) { if (canSplitAndReduce()) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getDetectorRowCount()) * pGeom->getProjectionCount(); int sliceCount = pGeom->getDetectorColCount(); size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = blockSize - (sliceCount % blockSize); if ((size_t)rem == blockSize) rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int x = -(rem / 2); x < sliceCount; x += blockSize) { int newsubX = x; if (newsubX < 0) newsubX = 0; int endX = x + blockSize; if (endX > sliceCount) endX = sliceCount; int size = endX - newsubX; CProjectionPart *sub = new CProjectionPart(); sub->subX = this->subX + newsubX; sub->subY = this->subY; sub->subZ = this->subZ; ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); sub->pData = pData; sub->pGeom = getSubProjectionGeometryU(pGeom, newsubX, size); out.push_back(boost::shared_ptr<CPart>(sub)); } } else { out.push_back(boost::shared_ptr<CPart>(clone())); } } void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) { // TODO out.push_back(boost::shared_ptr<CPart>(clone())); } void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div) { if (canSplitAndReduce()) { // Split in vertical direction only at first, until we figure out // a model for splitting in other directions size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount(); int sliceCount = pGeom->getDetectorRowCount(); size_t m = std::min(maxSize / sliceSize, maxDim); size_t blockSize = computeLinearSplit(m, div, sliceCount); int rem = blockSize - (sliceCount % blockSize); if ((size_t)rem == blockSize) rem = 0; ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize); for (int z = -(rem / 2); z < sliceCount; z += blockSize) { int newsubZ = z; if (newsubZ < 0) newsubZ = 0; int endZ = z + blockSize; if (endZ > sliceCount) endZ = sliceCount; int size = endZ - newsubZ; CProjectionPart *sub = new CProjectionPart(); sub->subX = this->subX; sub->subY = this->subY; sub->subZ = this->subZ + newsubZ; ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub); sub->pData = pData; sub->pGeom = getSubProjectionGeometryV(pGeom, newsubZ, size); out.push_back(boost::shared_ptr<CPart>(sub)); } } else { out.push_back(boost::shared_ptr<CPart>(clone())); } } CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const { return new CProjectionPart(*this); } CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData, CFloat32ProjectionData3D *pProjData) { ASTRA_DEBUG("CCompositeGeometryManager::createJobFP"); // Create single job for FP CVolumePart *input = new CVolumePart(); input->pData = pVolData; input->subX = 0; input->subY = 0; input->subZ = 0; input->pGeom = pVolData->getGeometry()->clone(); ASTRA_DEBUG("Main FP VolumePart -> %p", (void*)input); CProjectionPart *output = new CProjectionPart(); output->pData = pProjData; output->subX = 0; output->subY = 0; output->subZ = 0; output->pGeom = pProjData->getGeometry()->clone(); ASTRA_DEBUG("Main FP ProjectionPart -> %p", (void*)output); SJob FP; FP.pInput = boost::shared_ptr<CPart>(input); FP.pOutput = boost::shared_ptr<CPart>(output); FP.pProjector = pProjector; FP.eType = SJob::JOB_FP; FP.eMode = SJob::MODE_SET; return FP; } CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData, CFloat32ProjectionData3D *pProjData) { ASTRA_DEBUG("CCompositeGeometryManager::createJobBP"); // Create single job for BP CProjectionPart *input = new CProjectionPart(); input->pData = pProjData; input->subX = 0; input->subY = 0; input->subZ = 0; input->pGeom = pProjData->getGeometry()->clone(); CVolumePart *output = new CVolumePart(); output->pData = pVolData; output->subX = 0; output->subY = 0; output->subZ = 0; output->pGeom = pVolData->getGeometry()->clone(); SJob BP; BP.pInput = boost::shared_ptr<CPart>(input); BP.pOutput = boost::shared_ptr<CPart>(output); BP.pProjector = pProjector; BP.eType = SJob::JOB_BP; BP.eMode = SJob::MODE_SET; return BP; } bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData, CFloat32ProjectionData3D *pProjData) { TJobList L; L.push_back(createJobFP(pProjector, pVolData, pProjData)); return doJobs(L); } bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData, CFloat32ProjectionData3D *pProjData) { TJobList L; L.push_back(createJobBP(pProjector, pVolData, pProjData)); return doJobs(L); } bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData, CFloat32ProjectionData3D *pProjData, bool bShortScan, const float *pfFilter) { if (!dynamic_cast<CConeProjectionGeometry3D*>(pProjData->getGeometry())) { ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required"); return false; } SJob job = createJobBP(pProjector, pVolData, pProjData); job.eType = SJob::JOB_FDK; job.FDKSettings.bShortScan = bShortScan; job.FDKSettings.pfFilter = pfFilter; TJobList L; L.push_back(job); return doJobs(L); } bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3D *>& volData, const std::vector<CFloat32ProjectionData3D *>& projData) { ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume"); std::vector<CFloat32VolumeData3D *>::const_iterator i; std::vector<boost::shared_ptr<CPart> > inputs; for (i = volData.begin(); i != volData.end(); ++i) { CVolumePart *input = new CVolumePart(); input->pData = *i; input->subX = 0; input->subY = 0; input->subZ = 0; input->pGeom = (*i)->getGeometry()->clone(); inputs.push_back(boost::shared_ptr<CPart>(input)); } std::vector<CFloat32ProjectionData3D *>::const_iterator j; std::vector<boost::shared_ptr<CPart> > outputs; for (j = projData.begin(); j != projData.end(); ++j) { CProjectionPart *output = new CProjectionPart(); output->pData = *j; output->subX = 0; output->subY = 0; output->subZ = 0; output->pGeom = (*j)->getGeometry()->clone(); outputs.push_back(boost::shared_ptr<CPart>(output)); } std::vector<boost::shared_ptr<CPart> >::iterator i2; std::vector<boost::shared_ptr<CPart> >::iterator j2; TJobList L; for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) { SJob FP; FP.eMode = SJob::MODE_SET; for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) { FP.pInput = *j2; FP.pOutput = *i2; FP.pProjector = pProjector; FP.eType = SJob::JOB_FP; L.push_back(FP); // Set first, add rest FP.eMode = SJob::MODE_ADD; } } return doJobs(L); } bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector<CFloat32VolumeData3D *>& volData, const std::vector<CFloat32ProjectionData3D *>& projData) { ASTRA_DEBUG("CCompositeGeometryManager::doBP, multi-volume"); std::vector<CFloat32VolumeData3D *>::const_iterator i; std::vector<boost::shared_ptr<CPart> > outputs; for (i = volData.begin(); i != volData.end(); ++i) { CVolumePart *output = new CVolumePart(); output->pData = *i; output->subX = 0; output->subY = 0; output->subZ = 0; output->pGeom = (*i)->getGeometry()->clone(); outputs.push_back(boost::shared_ptr<CPart>(output)); } std::vector<CFloat32ProjectionData3D *>::const_iterator j; std::vector<boost::shared_ptr<CPart> > inputs; for (j = projData.begin(); j != projData.end(); ++j) { CProjectionPart *input = new CProjectionPart(); input->pData = *j; input->subX = 0; input->subY = 0; input->subZ = 0; input->pGeom = (*j)->getGeometry()->clone(); inputs.push_back(boost::shared_ptr<CPart>(input)); } std::vector<boost::shared_ptr<CPart> >::iterator i2; std::vector<boost::shared_ptr<CPart> >::iterator j2; TJobList L; for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) { SJob BP; BP.eMode = SJob::MODE_SET; for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) { BP.pInput = *j2; BP.pOutput = *i2; BP.pProjector = pProjector; BP.eType = SJob::JOB_BP; L.push_back(BP); // Set first, add rest BP.eMode = SJob::MODE_ADD; } } return doJobs(L); } static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter) { CCompositeGeometryManager::CPart* output = iter->first; const CCompositeGeometryManager::TJobList& L = iter->second; assert(!L.empty()); bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET; size_t outx, outy, outz; output->getDims(outx, outy, outz); if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) { // just zero output? if (zero) { // TODO: This function shouldn't have to know about this difference // between Memory/GPU CFloat32Data3DMemory *hostMem = dynamic_cast<CFloat32Data3DMemory *>(output->pData); if (hostMem) { for (size_t z = 0; z < outz; ++z) { for (size_t y = 0; y < outy; ++y) { float* ptr = hostMem->getData(); ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth(); ptr += (y + output->subY) * (size_t)output->pData->getWidth(); ptr += output->subX; memset(ptr, 0, sizeof(float) * outx); } } } else { CFloat32Data3DGPU *gpuMem = dynamic_cast<CFloat32Data3DGPU *>(output->pData); assert(gpuMem); assert(output->isFull()); // TODO: zero subset? zeroGPUMemory(gpuMem->getHandle(), outx, outy, outz); } } return true; } astraCUDA3d::SSubDimensions3D dstdims; dstdims.nx = output->pData->getWidth(); dstdims.pitch = dstdims.nx; dstdims.ny = output->pData->getHeight(); dstdims.nz = output->pData->getDepth(); dstdims.subnx = outx; dstdims.subny = outy; dstdims.subnz = outz; ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz); dstdims.subx = output->subX; dstdims.suby = output->subY; dstdims.subz = output->subZ; CFloat32CustomGPUMemory *dstMem = createGPUMemoryHandler(output->pData); bool ok = dstMem->allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO); if (!ok) ASTRA_ERROR("Error allocating GPU memory"); for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) { const CCompositeGeometryManager::SJob &j = *i; assert(j.pInput); CCudaProjector3D *projector = dynamic_cast<CCudaProjector3D*>(j.pProjector); Cuda3DProjectionKernel projKernel = ker3d_default; int detectorSuperSampling = 1; int voxelSuperSampling = 1; bool densityWeighting = false; if (projector) { projKernel = projector->getProjectionKernel(); detectorSuperSampling = projector->getDetectorSuperSampling(); voxelSuperSampling = projector->getVoxelSuperSampling(); densityWeighting = projector->getDensityWeighting(); } size_t inx, iny, inz; j.pInput->getDims(inx, iny, inz); CFloat32CustomGPUMemory *srcMem = createGPUMemoryHandler(j.pInput->pData); astraCUDA3d::SSubDimensions3D srcdims; srcdims.nx = j.pInput->pData->getWidth(); srcdims.pitch = srcdims.nx; srcdims.ny = j.pInput->pData->getHeight(); srcdims.nz = j.pInput->pData->getDepth(); srcdims.subnx = inx; srcdims.subny = iny; srcdims.subnz = inz; srcdims.subx = j.pInput->subX; srcdims.suby = j.pInput->subY; srcdims.subz = j.pInput->subZ; ok = srcMem->allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO); if (!ok) ASTRA_ERROR("Error allocating GPU memory"); ok = srcMem->copyToGPUMemory(srcdims); if (!ok) ASTRA_ERROR("Error copying input data to GPU"); switch (j.eType) { case CCompositeGeometryManager::SJob::JOB_FP: { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pInput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pOutput.get())); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP"); ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, dstMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, srcMem->hnd, detectorSuperSampling, projKernel); if (!ok) ASTRA_ERROR("Error performing sub-FP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done"); } break; case CCompositeGeometryManager::SJob::JOB_BP: { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP"); ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, voxelSuperSampling, densityWeighting); if (!ok) ASTRA_ERROR("Error performing sub-BP"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done"); } break; case CCompositeGeometryManager::SJob::JOB_FDK: { assert(dynamic_cast<CCompositeGeometryManager::CVolumePart*>(j.pOutput.get())); assert(dynamic_cast<CCompositeGeometryManager::CProjectionPart*>(j.pInput.get())); if (srcdims.subx || srcdims.suby) { ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK"); ok = false; } else { ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK"); ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter); if (!ok) ASTRA_ERROR("Error performing sub-FDK"); ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done"); } } break; default: assert(false); } ok = srcMem->freeGPUMemory(); if (!ok) ASTRA_ERROR("Error freeing GPU memory"); delete srcMem; } ok = dstMem->copyFromGPUMemory(dstdims); if (!ok) ASTRA_ERROR("Error copying output data from GPU"); ok = dstMem->freeGPUMemory(); if (!ok) ASTRA_ERROR("Error freeing GPU memory"); delete dstMem; return true; } class WorkQueue { public: WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) { #ifdef USE_PTHREADS pthread_mutex_init(&m_mutex, 0); #endif m_iter = m_jobs.begin(); } bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) { lock(); if (m_iter == m_jobs.end()) { unlock(); return false; } i = m_iter++; unlock(); return true; } #ifdef USE_PTHREADS void lock() { // TODO: check mutex op return values pthread_mutex_lock(&m_mutex); } void unlock() { // TODO: check mutex op return values pthread_mutex_unlock(&m_mutex); } #else void lock() { m_mutex.lock(); } void unlock() { m_mutex.unlock(); } #endif private: CCompositeGeometryManager::TJobSet &m_jobs; CCompositeGeometryManager::TJobSet::const_iterator m_iter; #ifdef USE_PTHREADS pthread_mutex_t m_mutex; #else boost::mutex m_mutex; #endif }; struct WorkThreadInfo { WorkQueue* m_queue; unsigned int m_iGPU; }; #ifndef USE_PTHREADS void runEntries_boost(WorkThreadInfo* info) { ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); CCompositeGeometryManager::TJobSet::const_iterator i; while (info->m_queue->receive(i)) { ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); astraCUDA3d::setGPUIndex(info->m_iGPU); boost::this_thread::interruption_point(); doJob(i); boost::this_thread::interruption_point(); } ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); } #else void* runEntries_pthreads(void* data) { WorkThreadInfo* info = (WorkThreadInfo*)data; ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU); CCompositeGeometryManager::TJobSet::const_iterator i; while (info->m_queue->receive(i)) { ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU); astraCUDA3d::setGPUIndex(info->m_iGPU); pthread_testcancel(); doJob(i); pthread_testcancel(); } ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU); return 0; } #endif void runWorkQueue(WorkQueue &queue, const std::vector<int> & iGPUIndices) { int iThreadCount = iGPUIndices.size(); std::vector<WorkThreadInfo> infos; #ifdef USE_PTHREADS std::vector<pthread_t> threads; #else std::vector<boost::thread*> threads; #endif infos.resize(iThreadCount); threads.resize(iThreadCount); for (int i = 0; i < iThreadCount; ++i) { infos[i].m_queue = &queue; infos[i].m_iGPU = iGPUIndices[i]; #ifdef USE_PTHREADS pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]); #else threads[i] = new boost::thread(runEntries_boost, &infos[i]); #endif } // Wait for them to finish for (int i = 0; i < iThreadCount; ++i) { #ifdef USE_PTHREADS pthread_join(threads[i], 0); #else threads[i]->join(); delete threads[i]; threads[i] = 0; #endif } } void CCompositeGeometryManager::setGPUIndices(const std::vector<int>& GPUIndices) { m_GPUIndices = GPUIndices; } bool CCompositeGeometryManager::doJobs(TJobList &jobs) { // TODO: Proper clean up if substeps fail (Or as proper as possible) ASTRA_DEBUG("CCompositeGeometryManager::doJobs"); // Sort job list into job set by output part TJobSet jobset; for (TJobList::iterator i = jobs.begin(); i != jobs.end(); ++i) { jobset[i->pOutput.get()].push_back(*i); } size_t maxSize = m_iMaxSize; if (maxSize == 0) { // Get memory from first GPU. Not optimal... if (!m_GPUIndices.empty()) astraCUDA3d::setGPUIndex(m_GPUIndices[0]); maxSize = astraCUDA3d::availableGPUMemory(); if (maxSize == 0) { ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB."); maxSize = 1024 * 1024 * 1024; } else { ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize); } } else { ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize); } maxSize = (maxSize * 9) / 10; maxSize /= sizeof(float); int div = 1; if (!m_GPUIndices.empty()) div = m_GPUIndices.size(); // Split jobs to fit TJobSet split; splitJobs(jobset, maxSize, div, split); jobset.clear(); if (m_GPUIndices.size() <= 1) { // Run jobs ASTRA_DEBUG("Running single-threaded"); if (!m_GPUIndices.empty()) astraCUDA3d::setGPUIndex(m_GPUIndices[0]); for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) { doJob(iter); } } else { ASTRA_DEBUG("Running multi-threaded"); WorkQueue wq(split); runWorkQueue(wq, m_GPUIndices); } return true; } //static void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params) { delete s_params; s_params = new SGPUParams; *s_params = params; ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:"); std::ostringstream s; s << "GPU indices:"; for (unsigned int i = 0; i < params.GPUIndices.size(); ++i) s << " " << params.GPUIndices[i]; std::string ss = s.str(); ASTRA_DEBUG(ss.c_str()); ASTRA_DEBUG("Memory: %llu", params.memory); } } #endif