/*
-----------------------------------------------------------------------
Copyright: 2010-2016, iMinds-Vision Lab, University of Antwerp
2014-2016, CWI, Amsterdam
Contact: astra@uantwerpen.be
Website: http://www.astra-toolbox.com/
This file is part of the ASTRA Toolbox.
The ASTRA Toolbox is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
The ASTRA Toolbox is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with the ASTRA Toolbox. If not, see .
-----------------------------------------------------------------------
*/
#include "astra/CompositeGeometryManager.h"
#ifdef ASTRA_CUDA
#include "astra/GeometryUtil3D.h"
#include "astra/VolumeGeometry3D.h"
#include "astra/ConeProjectionGeometry3D.h"
#include "astra/ConeVecProjectionGeometry3D.h"
#include "astra/ParallelProjectionGeometry3D.h"
#include "astra/ParallelVecProjectionGeometry3D.h"
#include "astra/Projector3D.h"
#include "astra/CudaProjector3D.h"
#include "astra/Float32ProjectionData3DMemory.h"
#include "astra/Float32VolumeData3DMemory.h"
#include "astra/Float32ProjectionData3DGPU.h"
#include "astra/Float32VolumeData3DGPU.h"
#include "astra/Logging.h"
#include "../cuda/3d/mem3d.h"
#include
#include
#include
#ifndef USE_PTHREADS
#include
#include
#endif
namespace astra {
SGPUParams* CCompositeGeometryManager::s_params = 0;
CCompositeGeometryManager::CCompositeGeometryManager()
{
m_iMaxSize = 0;
if (s_params) {
m_iMaxSize = s_params->memory;
m_GPUIndices = s_params->GPUIndices;
}
}
// JOB:
//
// VolumePart
// ProjectionPart
// FP-or-BP
// SET-or-ADD
// Running a set of jobs:
//
// [ Assume OUTPUT Parts in a single JobSet don't alias?? ]
// Group jobs by output Part
// One thread per group?
// Automatically split parts if too large
// Performance model for odd-sized tasks?
// Automatically split parts if not enough tasks to fill available GPUs
// Splitting:
// Constraints:
// number of sub-parts divisible by N
// max size of sub-parts
// For splitting on both input and output side:
// How to divide up memory? (Optimization problem; compute/benchmark)
// (First approach: 0.5/0.5)
class _AstraExport CFloat32CustomGPUMemory {
public:
astraCUDA3d::MemHandle3D hnd; // Only required to be valid between allocate/free
virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero)=0;
virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0;
virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos)=0;
virtual bool freeGPUMemory()=0;
virtual ~CFloat32CustomGPUMemory() { }
};
class CFloat32ExistingGPUMemory : public astra::CFloat32CustomGPUMemory {
public:
CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d);
virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero);
virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos);
virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos);
virtual bool freeGPUMemory();
protected:
unsigned int x, y, z;
};
class CFloat32DefaultGPUMemory : public astra::CFloat32CustomGPUMemory {
public:
CFloat32DefaultGPUMemory(CFloat32Data3DMemory* d) {
ptr = d->getData();
}
virtual bool allocateGPUMemory(unsigned int x, unsigned int y, unsigned int z, astraCUDA3d::Mem3DZeroMode zero) {
hnd = astraCUDA3d::allocateGPUMemory(x, y, z, zero);
return (bool)hnd;
}
virtual bool copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
return astraCUDA3d::copyToGPUMemory(ptr, hnd, pos);
}
virtual bool copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
return astraCUDA3d::copyFromGPUMemory(ptr, hnd, pos);
}
virtual bool freeGPUMemory() {
return astraCUDA3d::freeGPUMemory(hnd);
}
protected:
float *ptr;
};
CFloat32ExistingGPUMemory::CFloat32ExistingGPUMemory(CFloat32Data3DGPU *d)
{
hnd = d->getHandle();
x = d->getWidth();
y = d->getHeight();
z = d->getDepth();
}
bool CFloat32ExistingGPUMemory::allocateGPUMemory(unsigned int x_, unsigned int y_, unsigned int z_, astraCUDA3d::Mem3DZeroMode zero) {
assert(x_ == x);
assert(y_ == y);
assert(z_ == z);
if (zero == astraCUDA3d::INIT_ZERO)
return astraCUDA3d::zeroGPUMemory(hnd, x, y, z);
else
return true;
}
bool CFloat32ExistingGPUMemory::copyToGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
assert(pos.nx == x);
assert(pos.ny == y);
assert(pos.nz == z);
assert(pos.pitch == x);
assert(pos.subx == 0);
assert(pos.suby == 0);
assert(pos.subnx == x);
assert(pos.subny == y);
// These are less necessary than x/y, but allowing access to
// subvolumes needs an interface change
assert(pos.subz == 0);
assert(pos.subnz == z);
return true;
}
bool CFloat32ExistingGPUMemory::copyFromGPUMemory(const astraCUDA3d::SSubDimensions3D &pos) {
assert(pos.nx == x);
assert(pos.ny == y);
assert(pos.nz == z);
assert(pos.pitch == x);
assert(pos.subx == 0);
assert(pos.suby == 0);
assert(pos.subnx == x);
assert(pos.subny == y);
// These are less necessary than x/y, but allowing access to
// subvolumes needs an interface change
assert(pos.subz == 0);
assert(pos.subnz == z);
return true;
}
bool CFloat32ExistingGPUMemory::freeGPUMemory() {
return true;
}
CFloat32CustomGPUMemory * createGPUMemoryHandler(CFloat32Data3D *d) {
CFloat32Data3DMemory *dMem = dynamic_cast(d);
CFloat32Data3DGPU *dGPU = dynamic_cast(d);
if (dMem)
return new CFloat32DefaultGPUMemory(dMem);
else
return new CFloat32ExistingGPUMemory(dGPU);
}
bool CCompositeGeometryManager::splitJobs(TJobSet &jobs, size_t maxSize, int div, TJobSet &split)
{
int maxBlockDim = astraCUDA3d::maxBlockDimension();
ASTRA_DEBUG("Found max block dim %d", maxBlockDim);
split.clear();
for (TJobSet::const_iterator i = jobs.begin(); i != jobs.end(); ++i)
{
CPart* pOutput = i->first;
const TJobList &L = i->second;
// 1. Split output part
// 2. Per sub-part:
// a. reduce input part
// b. split input part
// c. create jobs for new (input,output) subparts
TPartList splitOutput;
pOutput->splitZ(splitOutput, maxSize/3, UINT_MAX, div);
#if 0
TPartList splitOutput2;
for (TPartList::iterator i_out = splitOutput.begin(); i_out != splitOutput.end(); ++i_out) {
boost::shared_ptr outputPart = *i_out;
outputPart.get()->splitX(splitOutput2, UINT_MAX, UINT_MAX, 1);
}
splitOutput.clear();
for (TPartList::iterator i_out = splitOutput2.begin(); i_out != splitOutput2.end(); ++i_out) {
boost::shared_ptr outputPart = *i_out;
outputPart.get()->splitY(splitOutput, UINT_MAX, UINT_MAX, 1);
}
splitOutput2.clear();
#endif
for (TJobList::const_iterator j = L.begin(); j != L.end(); ++j)
{
const SJob &job = *j;
for (TPartList::iterator i_out = splitOutput.begin();
i_out != splitOutput.end(); ++i_out)
{
boost::shared_ptr outputPart = *i_out;
SJob newjob;
newjob.pOutput = outputPart;
newjob.eType = j->eType;
newjob.eMode = j->eMode;
newjob.pProjector = j->pProjector;
newjob.FDKSettings = j->FDKSettings;
CPart* input = job.pInput->reduce(outputPart.get());
if (input->getSize() == 0) {
ASTRA_DEBUG("Empty input");
newjob.eType = SJob::JOB_NOP;
split[outputPart.get()].push_back(newjob);
continue;
}
size_t remainingSize = ( maxSize - outputPart->getSize() ) / 2;
TPartList splitInput;
input->splitZ(splitInput, remainingSize, maxBlockDim, 1);
delete input;
TPartList splitInput2;
for (TPartList::iterator i_in = splitInput.begin(); i_in != splitInput.end(); ++i_in) {
boost::shared_ptr inputPart = *i_in;
inputPart.get()->splitX(splitInput2, UINT_MAX, maxBlockDim, 1);
}
splitInput.clear();
for (TPartList::iterator i_in = splitInput2.begin(); i_in != splitInput2.end(); ++i_in) {
boost::shared_ptr inputPart = *i_in;
inputPart.get()->splitY(splitInput, UINT_MAX, maxBlockDim, 1);
}
splitInput2.clear();
ASTRA_DEBUG("Input split into %d parts", splitInput.size());
for (TPartList::iterator i_in = splitInput.begin();
i_in != splitInput.end(); ++i_in)
{
newjob.pInput = *i_in;
split[outputPart.get()].push_back(newjob);
// Second and later (input) parts should always be added to
// output of first (input) part.
newjob.eMode = SJob::MODE_ADD;
}
}
}
}
return true;
}
static std::pair reduceProjectionVertical(const CVolumeGeometry3D* pVolGeom, const CProjectionGeometry3D* pProjGeom)
{
double vmin_g, vmax_g;
// reduce self to only cover intersection with projection of VolumePart
// (Project corners of volume, take bounding box)
assert(pProjGeom->getProjectionCount() > 0);
for (int i = 0; i < pProjGeom->getProjectionCount(); ++i) {
double vol_u[8];
double vol_v[8];
double pixx = pVolGeom->getPixelLengthX();
double pixy = pVolGeom->getPixelLengthY();
double pixz = pVolGeom->getPixelLengthZ();
// TODO: Is 0.5 sufficient?
double xmin = pVolGeom->getWindowMinX() - 0.5 * pixx;
double xmax = pVolGeom->getWindowMaxX() + 0.5 * pixx;
double ymin = pVolGeom->getWindowMinY() - 0.5 * pixy;
double ymax = pVolGeom->getWindowMaxY() + 0.5 * pixy;
double zmin = pVolGeom->getWindowMinZ() - 0.5 * pixz;
double zmax = pVolGeom->getWindowMaxZ() + 0.5 * pixz;
pProjGeom->projectPoint(xmin, ymin, zmin, i, vol_u[0], vol_v[0]);
pProjGeom->projectPoint(xmin, ymin, zmax, i, vol_u[1], vol_v[1]);
pProjGeom->projectPoint(xmin, ymax, zmin, i, vol_u[2], vol_v[2]);
pProjGeom->projectPoint(xmin, ymax, zmax, i, vol_u[3], vol_v[3]);
pProjGeom->projectPoint(xmax, ymin, zmin, i, vol_u[4], vol_v[4]);
pProjGeom->projectPoint(xmax, ymin, zmax, i, vol_u[5], vol_v[5]);
pProjGeom->projectPoint(xmax, ymax, zmin, i, vol_u[6], vol_v[6]);
pProjGeom->projectPoint(xmax, ymax, zmax, i, vol_u[7], vol_v[7]);
double vmin = vol_v[0];
double vmax = vol_v[0];
for (int j = 1; j < 8; ++j) {
if (vol_v[j] < vmin)
vmin = vol_v[j];
if (vol_v[j] > vmax)
vmax = vol_v[j];
}
if (i == 0 || vmin < vmin_g)
vmin_g = vmin;
if (i == 0 || vmax > vmax_g)
vmax_g = vmax;
}
if (vmin_g < -1.0)
vmin_g = -1.0;
if (vmax_g > pProjGeom->getDetectorRowCount())
vmax_g = pProjGeom->getDetectorRowCount();
if (vmin_g >= vmax_g)
vmin_g = vmax_g = 0.0;
return std::pair(vmin_g, vmax_g);
}
CCompositeGeometryManager::CPart::CPart(const CPart& other)
{
eType = other.eType;
pData = other.pData;
subX = other.subX;
subY = other.subY;
subZ = other.subZ;
}
CCompositeGeometryManager::CVolumePart::CVolumePart(const CVolumePart& other)
: CPart(other)
{
pGeom = other.pGeom->clone();
}
CCompositeGeometryManager::CVolumePart::~CVolumePart()
{
delete pGeom;
}
void CCompositeGeometryManager::CVolumePart::getDims(size_t &x, size_t &y, size_t &z) const
{
if (!pGeom) {
x = y = z = 0;
return;
}
x = pGeom->getGridColCount();
y = pGeom->getGridRowCount();
z = pGeom->getGridSliceCount();
}
size_t CCompositeGeometryManager::CPart::getSize() const
{
size_t x, y, z;
getDims(x, y, z);
return x * y * z;
}
bool CCompositeGeometryManager::CPart::isFull() const
{
size_t x, y, z;
getDims(x, y, z);
return x == (size_t)pData->getWidth() &&
y == (size_t)pData->getHeight() &&
z == (size_t)pData->getDepth();
}
bool CCompositeGeometryManager::CPart::canSplitAndReduce() const
{
return dynamic_cast(pData) != 0;
}
static bool testVolumeRange(const std::pair& fullRange,
const CVolumeGeometry3D *pVolGeom,
const CProjectionGeometry3D *pProjGeom,
int zmin, int zmax)
{
double pixz = pVolGeom->getPixelLengthZ();
CVolumeGeometry3D test(pVolGeom->getGridColCount(),
pVolGeom->getGridRowCount(),
zmax - zmin,
pVolGeom->getWindowMinX(),
pVolGeom->getWindowMinY(),
pVolGeom->getWindowMinZ() + zmin * pixz,
pVolGeom->getWindowMaxX(),
pVolGeom->getWindowMaxY(),
pVolGeom->getWindowMinZ() + zmax * pixz);
std::pair subRange = reduceProjectionVertical(&test, pProjGeom);
// empty
if (subRange.first == subRange.second)
return true;
// fully outside of fullRange
if (subRange.first >= fullRange.second || subRange.second <= fullRange.first)
return true;
return false;
}
CCompositeGeometryManager::CPart* CCompositeGeometryManager::CVolumePart::reduce(const CPart *_other)
{
if (!canSplitAndReduce())
return clone();
const CProjectionPart *other = dynamic_cast(_other);
assert(other);
std::pair fullRange = reduceProjectionVertical(pGeom, other->pGeom);
int top_slice = 0, bottom_slice = 0;
if (fullRange.first < fullRange.second) {
// TOP SLICE
int zmin = 0;
int zmax = pGeom->getGridSliceCount()-1; // (Don't try empty region)
// Setting top slice to zmin is always valid.
while (zmin < zmax) {
int zmid = (zmin + zmax + 1) / 2;
bool ok = testVolumeRange(fullRange, pGeom, other->pGeom,
0, zmid);
ASTRA_DEBUG("binsearch min: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much");
if (ok)
zmin = zmid;
else
zmax = zmid - 1;
}
top_slice = zmin;
// BOTTOM SLICE
zmin = top_slice + 1; // (Don't try empty region)
zmax = pGeom->getGridSliceCount();
// Setting bottom slice to zmax is always valid
while (zmin < zmax) {
int zmid = (zmin + zmax) / 2;
bool ok = testVolumeRange(fullRange, pGeom, other->pGeom,
zmid, pGeom->getGridSliceCount());
ASTRA_DEBUG("binsearch max: [%d,%d], %d, %s", zmin, zmax, zmid, ok ? "ok" : "removed too much");
if (ok)
zmax = zmid;
else
zmin = zmid + 1;
}
bottom_slice = zmax;
}
ASTRA_DEBUG("found extent: %d - %d", top_slice, bottom_slice);
top_slice -= 1;
if (top_slice < 0)
top_slice = 0;
bottom_slice += 1;
if (bottom_slice >= pGeom->getGridSliceCount())
bottom_slice = pGeom->getGridSliceCount();
ASTRA_DEBUG("adjusted extent: %d - %d", top_slice, bottom_slice);
double pixz = pGeom->getPixelLengthZ();
CVolumePart *sub = new CVolumePart();
sub->subX = this->subX;
sub->subY = this->subY;
sub->subZ = this->subZ + top_slice;
sub->pData = pData;
if (top_slice == bottom_slice) {
sub->pGeom = 0;
} else {
sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
pGeom->getGridRowCount(),
bottom_slice - top_slice,
pGeom->getWindowMinX(),
pGeom->getWindowMinY(),
pGeom->getWindowMinZ() + top_slice * pixz,
pGeom->getWindowMaxX(),
pGeom->getWindowMaxY(),
pGeom->getWindowMinZ() + bottom_slice * pixz);
}
ASTRA_DEBUG("Reduce volume from %d - %d to %d - %d ( %f - %f )", this->subZ, this->subZ + pGeom->getGridSliceCount(), this->subZ + top_slice, this->subZ + bottom_slice, pGeom->getWindowMinZ() + top_slice * pixz, pGeom->getWindowMinZ() + bottom_slice * pixz);
return sub;
}
static size_t ceildiv(size_t a, size_t b) {
return (a + b - 1) / b;
}
static size_t computeLinearSplit(size_t maxBlock, int div, size_t sliceCount)
{
size_t blockSize = maxBlock;
size_t blockCount;
if (sliceCount <= blockSize)
blockCount = 1;
else
blockCount = ceildiv(sliceCount, blockSize);
// Increase number of blocks to be divisible by div
size_t divCount = div * ceildiv(blockCount, div);
// If divCount is above sqrt(number of slices), then
// we can't guarantee divisibility by div, but let's try anyway
if (ceildiv(sliceCount, ceildiv(sliceCount, divCount)) % div == 0) {
blockCount = divCount;
} else {
// If divisibility isn't achievable, we may want to optimize
// differently.
// TODO: Figure out how to model and optimize this.
}
// Final adjustment to make blocks more evenly sized
// (This can't make the blocks larger)
blockSize = ceildiv(sliceCount, blockCount);
ASTRA_DEBUG("%ld %ld -> %ld * %ld", sliceCount, maxBlock, blockCount, blockSize);
assert(blockSize <= maxBlock);
assert((divCount * divCount > sliceCount) || (blockCount % div) == 0);
return blockSize;
}
template
static V* getProjectionVectors(const P* geom);
template<>
SConeProjection* getProjectionVectors(const CConeProjectionGeometry3D* pProjGeom)
{
return genConeProjections(pProjGeom->getProjectionCount(),
pProjGeom->getDetectorColCount(),
pProjGeom->getDetectorRowCount(),
pProjGeom->getOriginSourceDistance(),
pProjGeom->getOriginDetectorDistance(),
pProjGeom->getDetectorSpacingX(),
pProjGeom->getDetectorSpacingY(),
pProjGeom->getProjectionAngles());
}
template<>
SConeProjection* getProjectionVectors(const CConeVecProjectionGeometry3D* pProjGeom)
{
int nth = pProjGeom->getProjectionCount();
SConeProjection* pProjs = new SConeProjection[nth];
for (int i = 0; i < nth; ++i)
pProjs[i] = pProjGeom->getProjectionVectors()[i];
return pProjs;
}
template<>
SPar3DProjection* getProjectionVectors(const CParallelProjectionGeometry3D* pProjGeom)
{
return genPar3DProjections(pProjGeom->getProjectionCount(),
pProjGeom->getDetectorColCount(),
pProjGeom->getDetectorRowCount(),
pProjGeom->getDetectorSpacingX(),
pProjGeom->getDetectorSpacingY(),
pProjGeom->getProjectionAngles());
}
template<>
SPar3DProjection* getProjectionVectors(const CParallelVecProjectionGeometry3D* pProjGeom)
{
int nth = pProjGeom->getProjectionCount();
SPar3DProjection* pProjs = new SPar3DProjection[nth];
for (int i = 0; i < nth; ++i)
pProjs[i] = pProjGeom->getProjectionVectors()[i];
return pProjs;
}
template
static void translateProjectionVectorsU(V* pProjs, int count, double du)
{
for (int i = 0; i < count; ++i) {
pProjs[i].fDetSX += du * pProjs[i].fDetUX;
pProjs[i].fDetSY += du * pProjs[i].fDetUY;
pProjs[i].fDetSZ += du * pProjs[i].fDetUZ;
}
}
template
static void translateProjectionVectorsV(V* pProjs, int count, double dv)
{
for (int i = 0; i < count; ++i) {
pProjs[i].fDetSX += dv * pProjs[i].fDetVX;
pProjs[i].fDetSY += dv * pProjs[i].fDetVY;
pProjs[i].fDetSZ += dv * pProjs[i].fDetVZ;
}
}
static CProjectionGeometry3D* getSubProjectionGeometryU(const CProjectionGeometry3D* pProjGeom, int u, int size)
{
// First convert to vectors, then translate, then convert into new object
const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom);
const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom);
const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom);
const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom);
if (conegeom || conevec3dgeom) {
SConeProjection* pConeProjs;
if (conegeom) {
pConeProjs = getProjectionVectors(conegeom);
} else {
pConeProjs = getProjectionVectors(conevec3dgeom);
}
translateProjectionVectorsU(pConeProjs, pProjGeom->getProjectionCount(), u);
CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
pProjGeom->getDetectorRowCount(),
size,
pConeProjs);
delete[] pConeProjs;
return ret;
} else {
assert(par3dgeom || parvec3dgeom);
SPar3DProjection* pParProjs;
if (par3dgeom) {
pParProjs = getProjectionVectors(par3dgeom);
} else {
pParProjs = getProjectionVectors(parvec3dgeom);
}
translateProjectionVectorsU(pParProjs, pProjGeom->getProjectionCount(), u);
CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
pProjGeom->getDetectorRowCount(),
size,
pParProjs);
delete[] pParProjs;
return ret;
}
}
static CProjectionGeometry3D* getSubProjectionGeometryV(const CProjectionGeometry3D* pProjGeom, int v, int size)
{
// First convert to vectors, then translate, then convert into new object
const CConeProjectionGeometry3D* conegeom = dynamic_cast(pProjGeom);
const CParallelProjectionGeometry3D* par3dgeom = dynamic_cast(pProjGeom);
const CParallelVecProjectionGeometry3D* parvec3dgeom = dynamic_cast(pProjGeom);
const CConeVecProjectionGeometry3D* conevec3dgeom = dynamic_cast(pProjGeom);
if (conegeom || conevec3dgeom) {
SConeProjection* pConeProjs;
if (conegeom) {
pConeProjs = getProjectionVectors(conegeom);
} else {
pConeProjs = getProjectionVectors(conevec3dgeom);
}
translateProjectionVectorsV(pConeProjs, pProjGeom->getProjectionCount(), v);
CProjectionGeometry3D* ret = new CConeVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
size,
pProjGeom->getDetectorColCount(),
pConeProjs);
delete[] pConeProjs;
return ret;
} else {
assert(par3dgeom || parvec3dgeom);
SPar3DProjection* pParProjs;
if (par3dgeom) {
pParProjs = getProjectionVectors(par3dgeom);
} else {
pParProjs = getProjectionVectors(parvec3dgeom);
}
translateProjectionVectorsV(pParProjs, pProjGeom->getProjectionCount(), v);
CProjectionGeometry3D* ret = new CParallelVecProjectionGeometry3D(pProjGeom->getProjectionCount(),
size,
pProjGeom->getDetectorColCount(),
pParProjs);
delete[] pParProjs;
return ret;
}
}
// split self into sub-parts:
// - each no bigger than maxSize
// - number of sub-parts is divisible by div
// - maybe all approximately the same size?
void CCompositeGeometryManager::CVolumePart::splitX(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
{
if (canSplitAndReduce()) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getGridSliceCount()) * pGeom->getGridRowCount();
int sliceCount = pGeom->getGridColCount();
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = blockSize - (sliceCount % blockSize);
if ((size_t)rem == blockSize)
rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
int newsubX = x;
if (newsubX < 0) newsubX = 0;
int endX = x + blockSize;
if (endX > sliceCount) endX = sliceCount;
int size = endX - newsubX;
CVolumePart *sub = new CVolumePart();
sub->subX = this->subX + newsubX;
sub->subY = this->subY;
sub->subZ = this->subZ;
ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
double shift = pGeom->getPixelLengthX() * newsubX;
sub->pData = pData;
sub->pGeom = new CVolumeGeometry3D(size,
pGeom->getGridRowCount(),
pGeom->getGridSliceCount(),
pGeom->getWindowMinX() + shift,
pGeom->getWindowMinY(),
pGeom->getWindowMinZ(),
pGeom->getWindowMinX() + shift + size * pGeom->getPixelLengthX(),
pGeom->getWindowMaxY(),
pGeom->getWindowMaxZ());
out.push_back(boost::shared_ptr(sub));
}
} else {
out.push_back(boost::shared_ptr(clone()));
}
}
void CCompositeGeometryManager::CVolumePart::splitY(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
{
if (canSplitAndReduce()) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridSliceCount();
int sliceCount = pGeom->getGridRowCount();
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = blockSize - (sliceCount % blockSize);
if ((size_t)rem == blockSize)
rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int y = -(rem / 2); y < sliceCount; y += blockSize) {
int newsubY = y;
if (newsubY < 0) newsubY = 0;
int endY = y + blockSize;
if (endY > sliceCount) endY = sliceCount;
int size = endY - newsubY;
CVolumePart *sub = new CVolumePart();
sub->subX = this->subX;
sub->subY = this->subY + newsubY;
sub->subZ = this->subZ;
ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
double shift = pGeom->getPixelLengthY() * newsubY;
sub->pData = pData;
sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
size,
pGeom->getGridSliceCount(),
pGeom->getWindowMinX(),
pGeom->getWindowMinY() + shift,
pGeom->getWindowMinZ(),
pGeom->getWindowMaxX(),
pGeom->getWindowMinY() + shift + size * pGeom->getPixelLengthY(),
pGeom->getWindowMaxZ());
out.push_back(boost::shared_ptr(sub));
}
} else {
out.push_back(boost::shared_ptr(clone()));
}
}
void CCompositeGeometryManager::CVolumePart::splitZ(CCompositeGeometryManager::TPartList& out, size_t maxSize, size_t maxDim, int div)
{
if (canSplitAndReduce()) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getGridColCount()) * pGeom->getGridRowCount();
int sliceCount = pGeom->getGridSliceCount();
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = blockSize - (sliceCount % blockSize);
if ((size_t)rem == blockSize)
rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int z = -(rem / 2); z < sliceCount; z += blockSize) {
int newsubZ = z;
if (newsubZ < 0) newsubZ = 0;
int endZ = z + blockSize;
if (endZ > sliceCount) endZ = sliceCount;
int size = endZ - newsubZ;
CVolumePart *sub = new CVolumePart();
sub->subX = this->subX;
sub->subY = this->subY;
sub->subZ = this->subZ + newsubZ;
ASTRA_DEBUG("VolumePart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
double shift = pGeom->getPixelLengthZ() * newsubZ;
sub->pData = pData;
sub->pGeom = new CVolumeGeometry3D(pGeom->getGridColCount(),
pGeom->getGridRowCount(),
size,
pGeom->getWindowMinX(),
pGeom->getWindowMinY(),
pGeom->getWindowMinZ() + shift,
pGeom->getWindowMaxX(),
pGeom->getWindowMaxY(),
pGeom->getWindowMinZ() + shift + size * pGeom->getPixelLengthZ());
out.push_back(boost::shared_ptr(sub));
}
} else {
out.push_back(boost::shared_ptr(clone()));
}
}
CCompositeGeometryManager::CVolumePart* CCompositeGeometryManager::CVolumePart::clone() const
{
return new CVolumePart(*this);
}
CCompositeGeometryManager::CProjectionPart::CProjectionPart(const CProjectionPart& other)
: CPart(other)
{
pGeom = other.pGeom->clone();
}
CCompositeGeometryManager::CProjectionPart::~CProjectionPart()
{
delete pGeom;
}
void CCompositeGeometryManager::CProjectionPart::getDims(size_t &x, size_t &y, size_t &z) const
{
if (!pGeom) {
x = y = z = 0;
return;
}
x = pGeom->getDetectorColCount();
y = pGeom->getProjectionCount();
z = pGeom->getDetectorRowCount();
}
CCompositeGeometryManager::CPart* CCompositeGeometryManager::CProjectionPart::reduce(const CPart *_other)
{
if (!canSplitAndReduce())
return clone();
const CVolumePart *other = dynamic_cast(_other);
assert(other);
std::pair r = reduceProjectionVertical(other->pGeom, pGeom);
// fprintf(stderr, "v extent: %f %f\n", r.first, r.second);
int _vmin = (int)floor(r.first - 1.0);
int _vmax = (int)ceil(r.second + 1.0);
if (_vmin < 0)
_vmin = 0;
if (_vmax > pGeom->getDetectorRowCount())
_vmax = pGeom->getDetectorRowCount();
if (_vmin >= _vmax) {
_vmin = _vmax = 0;
}
CProjectionPart *sub = new CProjectionPart();
sub->subX = this->subX;
sub->subY = this->subY;
sub->subZ = this->subZ + _vmin;
sub->pData = pData;
if (_vmin == _vmax) {
sub->pGeom = 0;
} else {
sub->pGeom = getSubProjectionGeometryV(pGeom, _vmin, _vmax - _vmin);
}
ASTRA_DEBUG("Reduce projection from %d - %d to %d - %d", this->subZ, this->subZ + pGeom->getDetectorRowCount(), this->subZ + _vmin, this->subZ + _vmax);
return sub;
}
void CCompositeGeometryManager::CProjectionPart::splitX(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
{
if (canSplitAndReduce()) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getDetectorRowCount()) * pGeom->getProjectionCount();
int sliceCount = pGeom->getDetectorColCount();
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = blockSize - (sliceCount % blockSize);
if ((size_t)rem == blockSize)
rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int x = -(rem / 2); x < sliceCount; x += blockSize) {
int newsubX = x;
if (newsubX < 0) newsubX = 0;
int endX = x + blockSize;
if (endX > sliceCount) endX = sliceCount;
int size = endX - newsubX;
CProjectionPart *sub = new CProjectionPart();
sub->subX = this->subX + newsubX;
sub->subY = this->subY;
sub->subZ = this->subZ;
ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
sub->pData = pData;
sub->pGeom = getSubProjectionGeometryU(pGeom, newsubX, size);
out.push_back(boost::shared_ptr(sub));
}
} else {
out.push_back(boost::shared_ptr(clone()));
}
}
void CCompositeGeometryManager::CProjectionPart::splitY(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
{
// TODO
out.push_back(boost::shared_ptr(clone()));
}
void CCompositeGeometryManager::CProjectionPart::splitZ(CCompositeGeometryManager::TPartList &out, size_t maxSize, size_t maxDim, int div)
{
if (canSplitAndReduce()) {
// Split in vertical direction only at first, until we figure out
// a model for splitting in other directions
size_t sliceSize = ((size_t) pGeom->getDetectorColCount()) * pGeom->getProjectionCount();
int sliceCount = pGeom->getDetectorRowCount();
size_t m = std::min(maxSize / sliceSize, maxDim);
size_t blockSize = computeLinearSplit(m, div, sliceCount);
int rem = blockSize - (sliceCount % blockSize);
if ((size_t)rem == blockSize)
rem = 0;
ASTRA_DEBUG("From %d to %d step %d", -(rem / 2), sliceCount, blockSize);
for (int z = -(rem / 2); z < sliceCount; z += blockSize) {
int newsubZ = z;
if (newsubZ < 0) newsubZ = 0;
int endZ = z + blockSize;
if (endZ > sliceCount) endZ = sliceCount;
int size = endZ - newsubZ;
CProjectionPart *sub = new CProjectionPart();
sub->subX = this->subX;
sub->subY = this->subY;
sub->subZ = this->subZ + newsubZ;
ASTRA_DEBUG("ProjectionPart split %d %d %d -> %p", sub->subX, sub->subY, sub->subZ, (void*)sub);
sub->pData = pData;
sub->pGeom = getSubProjectionGeometryV(pGeom, newsubZ, size);
out.push_back(boost::shared_ptr(sub));
}
} else {
out.push_back(boost::shared_ptr(clone()));
}
}
CCompositeGeometryManager::CProjectionPart* CCompositeGeometryManager::CProjectionPart::clone() const
{
return new CProjectionPart(*this);
}
CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobFP(CProjector3D *pProjector,
CFloat32VolumeData3D *pVolData,
CFloat32ProjectionData3D *pProjData)
{
ASTRA_DEBUG("CCompositeGeometryManager::createJobFP");
// Create single job for FP
CVolumePart *input = new CVolumePart();
input->pData = pVolData;
input->subX = 0;
input->subY = 0;
input->subZ = 0;
input->pGeom = pVolData->getGeometry()->clone();
ASTRA_DEBUG("Main FP VolumePart -> %p", (void*)input);
CProjectionPart *output = new CProjectionPart();
output->pData = pProjData;
output->subX = 0;
output->subY = 0;
output->subZ = 0;
output->pGeom = pProjData->getGeometry()->clone();
ASTRA_DEBUG("Main FP ProjectionPart -> %p", (void*)output);
SJob FP;
FP.pInput = boost::shared_ptr(input);
FP.pOutput = boost::shared_ptr(output);
FP.pProjector = pProjector;
FP.eType = SJob::JOB_FP;
FP.eMode = SJob::MODE_SET;
return FP;
}
CCompositeGeometryManager::SJob CCompositeGeometryManager::createJobBP(CProjector3D *pProjector,
CFloat32VolumeData3D *pVolData,
CFloat32ProjectionData3D *pProjData)
{
ASTRA_DEBUG("CCompositeGeometryManager::createJobBP");
// Create single job for BP
CProjectionPart *input = new CProjectionPart();
input->pData = pProjData;
input->subX = 0;
input->subY = 0;
input->subZ = 0;
input->pGeom = pProjData->getGeometry()->clone();
CVolumePart *output = new CVolumePart();
output->pData = pVolData;
output->subX = 0;
output->subY = 0;
output->subZ = 0;
output->pGeom = pVolData->getGeometry()->clone();
SJob BP;
BP.pInput = boost::shared_ptr(input);
BP.pOutput = boost::shared_ptr(output);
BP.pProjector = pProjector;
BP.eType = SJob::JOB_BP;
BP.eMode = SJob::MODE_SET;
return BP;
}
bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
CFloat32ProjectionData3D *pProjData)
{
TJobList L;
L.push_back(createJobFP(pProjector, pVolData, pProjData));
return doJobs(L);
}
bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
CFloat32ProjectionData3D *pProjData)
{
TJobList L;
L.push_back(createJobBP(pProjector, pVolData, pProjData));
return doJobs(L);
}
bool CCompositeGeometryManager::doFDK(CProjector3D *pProjector, CFloat32VolumeData3D *pVolData,
CFloat32ProjectionData3D *pProjData, bool bShortScan,
const float *pfFilter)
{
if (!dynamic_cast(pProjData->getGeometry())) {
ASTRA_ERROR("CCompositeGeometryManager::doFDK: cone geometry required");
return false;
}
SJob job = createJobBP(pProjector, pVolData, pProjData);
job.eType = SJob::JOB_FDK;
job.FDKSettings.bShortScan = bShortScan;
job.FDKSettings.pfFilter = pfFilter;
TJobList L;
L.push_back(job);
return doJobs(L);
}
bool CCompositeGeometryManager::doFP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData)
{
ASTRA_DEBUG("CCompositeGeometryManager::doFP, multi-volume");
std::vector::const_iterator i;
std::vector > inputs;
for (i = volData.begin(); i != volData.end(); ++i) {
CVolumePart *input = new CVolumePart();
input->pData = *i;
input->subX = 0;
input->subY = 0;
input->subZ = 0;
input->pGeom = (*i)->getGeometry()->clone();
inputs.push_back(boost::shared_ptr(input));
}
std::vector::const_iterator j;
std::vector > outputs;
for (j = projData.begin(); j != projData.end(); ++j) {
CProjectionPart *output = new CProjectionPart();
output->pData = *j;
output->subX = 0;
output->subY = 0;
output->subZ = 0;
output->pGeom = (*j)->getGeometry()->clone();
outputs.push_back(boost::shared_ptr(output));
}
std::vector >::iterator i2;
std::vector >::iterator j2;
TJobList L;
for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) {
SJob FP;
FP.eMode = SJob::MODE_SET;
for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) {
FP.pInput = *j2;
FP.pOutput = *i2;
FP.pProjector = pProjector;
FP.eType = SJob::JOB_FP;
L.push_back(FP);
// Set first, add rest
FP.eMode = SJob::MODE_ADD;
}
}
return doJobs(L);
}
bool CCompositeGeometryManager::doBP(CProjector3D *pProjector, const std::vector& volData, const std::vector& projData)
{
ASTRA_DEBUG("CCompositeGeometryManager::doBP, multi-volume");
std::vector::const_iterator i;
std::vector > outputs;
for (i = volData.begin(); i != volData.end(); ++i) {
CVolumePart *output = new CVolumePart();
output->pData = *i;
output->subX = 0;
output->subY = 0;
output->subZ = 0;
output->pGeom = (*i)->getGeometry()->clone();
outputs.push_back(boost::shared_ptr(output));
}
std::vector::const_iterator j;
std::vector > inputs;
for (j = projData.begin(); j != projData.end(); ++j) {
CProjectionPart *input = new CProjectionPart();
input->pData = *j;
input->subX = 0;
input->subY = 0;
input->subZ = 0;
input->pGeom = (*j)->getGeometry()->clone();
inputs.push_back(boost::shared_ptr(input));
}
std::vector >::iterator i2;
std::vector >::iterator j2;
TJobList L;
for (i2 = outputs.begin(); i2 != outputs.end(); ++i2) {
SJob BP;
BP.eMode = SJob::MODE_SET;
for (j2 = inputs.begin(); j2 != inputs.end(); ++j2) {
BP.pInput = *j2;
BP.pOutput = *i2;
BP.pProjector = pProjector;
BP.eType = SJob::JOB_BP;
L.push_back(BP);
// Set first, add rest
BP.eMode = SJob::MODE_ADD;
}
}
return doJobs(L);
}
static bool doJob(const CCompositeGeometryManager::TJobSet::const_iterator& iter)
{
CCompositeGeometryManager::CPart* output = iter->first;
const CCompositeGeometryManager::TJobList& L = iter->second;
assert(!L.empty());
bool zero = L.begin()->eMode == CCompositeGeometryManager::SJob::MODE_SET;
size_t outx, outy, outz;
output->getDims(outx, outy, outz);
if (L.begin()->eType == CCompositeGeometryManager::SJob::JOB_NOP) {
// just zero output?
if (zero) {
// TODO: This function shouldn't have to know about this difference
// between Memory/GPU
CFloat32Data3DMemory *hostMem = dynamic_cast(output->pData);
if (hostMem) {
for (size_t z = 0; z < outz; ++z) {
for (size_t y = 0; y < outy; ++y) {
float* ptr = hostMem->getData();
ptr += (z + output->subX) * (size_t)output->pData->getHeight() * (size_t)output->pData->getWidth();
ptr += (y + output->subY) * (size_t)output->pData->getWidth();
ptr += output->subX;
memset(ptr, 0, sizeof(float) * outx);
}
}
} else {
CFloat32Data3DGPU *gpuMem = dynamic_cast(output->pData);
assert(gpuMem);
assert(output->isFull()); // TODO: zero subset?
zeroGPUMemory(gpuMem->getHandle(), outx, outy, outz);
}
}
return true;
}
astraCUDA3d::SSubDimensions3D dstdims;
dstdims.nx = output->pData->getWidth();
dstdims.pitch = dstdims.nx;
dstdims.ny = output->pData->getHeight();
dstdims.nz = output->pData->getDepth();
dstdims.subnx = outx;
dstdims.subny = outy;
dstdims.subnz = outz;
ASTRA_DEBUG("dstdims: %d,%d,%d in %d,%d,%d", dstdims.subnx, dstdims.subny, dstdims.subnz, dstdims.nx, dstdims.ny, dstdims.nz);
dstdims.subx = output->subX;
dstdims.suby = output->subY;
dstdims.subz = output->subZ;
CFloat32CustomGPUMemory *dstMem = createGPUMemoryHandler(output->pData);
bool ok = dstMem->allocateGPUMemory(outx, outy, outz, zero ? astraCUDA3d::INIT_ZERO : astraCUDA3d::INIT_NO);
if (!ok) ASTRA_ERROR("Error allocating GPU memory");
for (CCompositeGeometryManager::TJobList::const_iterator i = L.begin(); i != L.end(); ++i) {
const CCompositeGeometryManager::SJob &j = *i;
assert(j.pInput);
CCudaProjector3D *projector = dynamic_cast(j.pProjector);
Cuda3DProjectionKernel projKernel = ker3d_default;
int detectorSuperSampling = 1;
int voxelSuperSampling = 1;
bool densityWeighting = false;
if (projector) {
projKernel = projector->getProjectionKernel();
detectorSuperSampling = projector->getDetectorSuperSampling();
voxelSuperSampling = projector->getVoxelSuperSampling();
densityWeighting = projector->getDensityWeighting();
}
size_t inx, iny, inz;
j.pInput->getDims(inx, iny, inz);
CFloat32CustomGPUMemory *srcMem = createGPUMemoryHandler(j.pInput->pData);
astraCUDA3d::SSubDimensions3D srcdims;
srcdims.nx = j.pInput->pData->getWidth();
srcdims.pitch = srcdims.nx;
srcdims.ny = j.pInput->pData->getHeight();
srcdims.nz = j.pInput->pData->getDepth();
srcdims.subnx = inx;
srcdims.subny = iny;
srcdims.subnz = inz;
srcdims.subx = j.pInput->subX;
srcdims.suby = j.pInput->subY;
srcdims.subz = j.pInput->subZ;
ok = srcMem->allocateGPUMemory(inx, iny, inz, astraCUDA3d::INIT_NO);
if (!ok) ASTRA_ERROR("Error allocating GPU memory");
ok = srcMem->copyToGPUMemory(srcdims);
if (!ok) ASTRA_ERROR("Error copying input data to GPU");
switch (j.eType) {
case CCompositeGeometryManager::SJob::JOB_FP:
{
assert(dynamic_cast(j.pInput.get()));
assert(dynamic_cast(j.pOutput.get()));
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FP");
ok = astraCUDA3d::FP(((CCompositeGeometryManager::CProjectionPart*)j.pOutput.get())->pGeom, dstMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pInput.get())->pGeom, srcMem->hnd, detectorSuperSampling, projKernel);
if (!ok) ASTRA_ERROR("Error performing sub-FP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FP done");
}
break;
case CCompositeGeometryManager::SJob::JOB_BP:
{
assert(dynamic_cast(j.pOutput.get()));
assert(dynamic_cast(j.pInput.get()));
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing BP");
ok = astraCUDA3d::BP(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, voxelSuperSampling, densityWeighting);
if (!ok) ASTRA_ERROR("Error performing sub-BP");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: BP done");
}
break;
case CCompositeGeometryManager::SJob::JOB_FDK:
{
assert(dynamic_cast(j.pOutput.get()));
assert(dynamic_cast(j.pInput.get()));
if (srcdims.subx || srcdims.suby) {
ASTRA_ERROR("CCompositeGeometryManager::doJobs: data too large for FDK");
ok = false;
} else {
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: doing FDK");
ok = astraCUDA3d::FDK(((CCompositeGeometryManager::CProjectionPart*)j.pInput.get())->pGeom, srcMem->hnd, ((CCompositeGeometryManager::CVolumePart*)j.pOutput.get())->pGeom, dstMem->hnd, j.FDKSettings.bShortScan, j.FDKSettings.pfFilter);
if (!ok) ASTRA_ERROR("Error performing sub-FDK");
ASTRA_DEBUG("CCompositeGeometryManager::doJobs: FDK done");
}
}
break;
default:
assert(false);
}
ok = srcMem->freeGPUMemory();
if (!ok) ASTRA_ERROR("Error freeing GPU memory");
delete srcMem;
}
ok = dstMem->copyFromGPUMemory(dstdims);
if (!ok) ASTRA_ERROR("Error copying output data from GPU");
ok = dstMem->freeGPUMemory();
if (!ok) ASTRA_ERROR("Error freeing GPU memory");
delete dstMem;
return true;
}
class WorkQueue {
public:
WorkQueue(CCompositeGeometryManager::TJobSet &_jobs) : m_jobs(_jobs) {
#ifdef USE_PTHREADS
pthread_mutex_init(&m_mutex, 0);
#endif
m_iter = m_jobs.begin();
}
bool receive(CCompositeGeometryManager::TJobSet::const_iterator &i) {
lock();
if (m_iter == m_jobs.end()) {
unlock();
return false;
}
i = m_iter++;
unlock();
return true;
}
#ifdef USE_PTHREADS
void lock() {
// TODO: check mutex op return values
pthread_mutex_lock(&m_mutex);
}
void unlock() {
// TODO: check mutex op return values
pthread_mutex_unlock(&m_mutex);
}
#else
void lock() {
m_mutex.lock();
}
void unlock() {
m_mutex.unlock();
}
#endif
private:
CCompositeGeometryManager::TJobSet &m_jobs;
CCompositeGeometryManager::TJobSet::const_iterator m_iter;
#ifdef USE_PTHREADS
pthread_mutex_t m_mutex;
#else
boost::mutex m_mutex;
#endif
};
struct WorkThreadInfo {
WorkQueue* m_queue;
unsigned int m_iGPU;
};
#ifndef USE_PTHREADS
void runEntries_boost(WorkThreadInfo* info)
{
ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
CCompositeGeometryManager::TJobSet::const_iterator i;
while (info->m_queue->receive(i)) {
ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
astraCUDA3d::setGPUIndex(info->m_iGPU);
boost::this_thread::interruption_point();
doJob(i);
boost::this_thread::interruption_point();
}
ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
}
#else
void* runEntries_pthreads(void* data) {
WorkThreadInfo* info = (WorkThreadInfo*)data;
ASTRA_DEBUG("Launching thread on GPU %d\n", info->m_iGPU);
CCompositeGeometryManager::TJobSet::const_iterator i;
while (info->m_queue->receive(i)) {
ASTRA_DEBUG("Running block on GPU %d\n", info->m_iGPU);
astraCUDA3d::setGPUIndex(info->m_iGPU);
pthread_testcancel();
doJob(i);
pthread_testcancel();
}
ASTRA_DEBUG("Finishing thread on GPU %d\n", info->m_iGPU);
return 0;
}
#endif
void runWorkQueue(WorkQueue &queue, const std::vector & iGPUIndices) {
int iThreadCount = iGPUIndices.size();
std::vector infos;
#ifdef USE_PTHREADS
std::vector threads;
#else
std::vector threads;
#endif
infos.resize(iThreadCount);
threads.resize(iThreadCount);
for (int i = 0; i < iThreadCount; ++i) {
infos[i].m_queue = &queue;
infos[i].m_iGPU = iGPUIndices[i];
#ifdef USE_PTHREADS
pthread_create(&threads[i], 0, runEntries_pthreads, (void*)&infos[i]);
#else
threads[i] = new boost::thread(runEntries_boost, &infos[i]);
#endif
}
// Wait for them to finish
for (int i = 0; i < iThreadCount; ++i) {
#ifdef USE_PTHREADS
pthread_join(threads[i], 0);
#else
threads[i]->join();
delete threads[i];
threads[i] = 0;
#endif
}
}
void CCompositeGeometryManager::setGPUIndices(const std::vector& GPUIndices)
{
m_GPUIndices = GPUIndices;
}
bool CCompositeGeometryManager::doJobs(TJobList &jobs)
{
// TODO: Proper clean up if substeps fail (Or as proper as possible)
ASTRA_DEBUG("CCompositeGeometryManager::doJobs");
// Sort job list into job set by output part
TJobSet jobset;
for (TJobList::iterator i = jobs.begin(); i != jobs.end(); ++i) {
jobset[i->pOutput.get()].push_back(*i);
}
size_t maxSize = m_iMaxSize;
if (maxSize == 0) {
// Get memory from first GPU. Not optimal...
if (!m_GPUIndices.empty())
astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
maxSize = astraCUDA3d::availableGPUMemory();
if (maxSize == 0) {
ASTRA_WARN("Unable to get available GPU memory. Defaulting to 1GB.");
maxSize = 1024 * 1024 * 1024;
} else {
ASTRA_DEBUG("Detected %lu bytes of GPU memory", maxSize);
}
} else {
ASTRA_DEBUG("Set to %lu bytes of GPU memory", maxSize);
}
maxSize = (maxSize * 9) / 10;
maxSize /= sizeof(float);
int div = 1;
if (!m_GPUIndices.empty())
div = m_GPUIndices.size();
// Split jobs to fit
TJobSet split;
splitJobs(jobset, maxSize, div, split);
jobset.clear();
if (m_GPUIndices.size() <= 1) {
// Run jobs
ASTRA_DEBUG("Running single-threaded");
if (!m_GPUIndices.empty())
astraCUDA3d::setGPUIndex(m_GPUIndices[0]);
for (TJobSet::const_iterator iter = split.begin(); iter != split.end(); ++iter) {
doJob(iter);
}
} else {
ASTRA_DEBUG("Running multi-threaded");
WorkQueue wq(split);
runWorkQueue(wq, m_GPUIndices);
}
return true;
}
//static
void CCompositeGeometryManager::setGlobalGPUParams(const SGPUParams& params)
{
delete s_params;
s_params = new SGPUParams;
*s_params = params;
ASTRA_DEBUG("CompositeGeometryManager: Setting global GPU params:");
std::ostringstream s;
s << "GPU indices:";
for (unsigned int i = 0; i < params.GPUIndices.size(); ++i)
s << " " << params.GPUIndices[i];
std::string ss = s.str();
ASTRA_DEBUG(ss.c_str());
ASTRA_DEBUG("Memory: %llu", params.memory);
}
}
#endif