a few more optimizations after profiling the builder
parent
db444cd87f
commit
d09655dbe6
|
@ -23,6 +23,9 @@
|
||||||
#include <boost/static_assert.hpp>
|
#include <boost/static_assert.hpp>
|
||||||
#include <boost/tuple/tuple.hpp>
|
#include <boost/tuple/tuple.hpp>
|
||||||
|
|
||||||
|
/// Activate lots of extra checks
|
||||||
|
#define MTS_KD_DEBUG 1
|
||||||
|
|
||||||
/// Compile-time KD-tree depth limit
|
/// Compile-time KD-tree depth limit
|
||||||
#define MTS_KD_MAX_DEPTH 48
|
#define MTS_KD_MAX_DEPTH 48
|
||||||
|
|
||||||
|
@ -39,7 +42,7 @@
|
||||||
#define MTS_KD_BLOCKSIZE_KD (512*1024/sizeof(KDNode))
|
#define MTS_KD_BLOCKSIZE_KD (512*1024/sizeof(KDNode))
|
||||||
#define MTS_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t))
|
#define MTS_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t))
|
||||||
|
|
||||||
#if MTS_KD_DEBUG
|
#if defined(MTS_KD_DEBUG)
|
||||||
#define KDAssert(expr) Assert(expr)
|
#define KDAssert(expr) Assert(expr)
|
||||||
#define KDAssertEx(expr, text) AssertEx(expr, text)
|
#define KDAssertEx(expr, text) AssertEx(expr, text)
|
||||||
#else
|
#else
|
||||||
|
@ -113,7 +116,7 @@ public:
|
||||||
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
||||||
it != m_chunks.end(); ++it) {
|
it != m_chunks.end(); ++it) {
|
||||||
Chunk &chunk = *it;
|
Chunk &chunk = *it;
|
||||||
if (chunk.getRemainder() >= size) {
|
if (chunk.remainder() >= size) {
|
||||||
T* result = reinterpret_cast<T *>(chunk.cur);
|
T* result = reinterpret_cast<T *>(chunk.cur);
|
||||||
chunk.cur += size;
|
chunk.cur += size;
|
||||||
return result;
|
return result;
|
||||||
|
@ -190,7 +193,7 @@ public:
|
||||||
/**
|
/**
|
||||||
* \brief Return the total amount of chunk memory in bytes
|
* \brief Return the total amount of chunk memory in bytes
|
||||||
*/
|
*/
|
||||||
size_t getSize() const {
|
size_t size() const {
|
||||||
size_t result = 0;
|
size_t result = 0;
|
||||||
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
||||||
it != m_chunks.end(); ++it)
|
it != m_chunks.end(); ++it)
|
||||||
|
@ -201,11 +204,11 @@ public:
|
||||||
/**
|
/**
|
||||||
* \brief Return the total amount of used memory in bytes
|
* \brief Return the total amount of used memory in bytes
|
||||||
*/
|
*/
|
||||||
size_t getUsed() const {
|
size_t used() const {
|
||||||
size_t result = 0;
|
size_t result = 0;
|
||||||
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
||||||
it != m_chunks.end(); ++it)
|
it != m_chunks.end(); ++it)
|
||||||
result += (*it).getUsed();
|
result += (*it).used();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,18 +229,18 @@ private:
|
||||||
size_t size;
|
size_t size;
|
||||||
uint8_t *start, *cur;
|
uint8_t *start, *cur;
|
||||||
|
|
||||||
inline size_t getUsed() const {
|
inline size_t used() const {
|
||||||
return cur - start;
|
return cur - start;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t getRemainder() const {
|
inline size_t remainder() const {
|
||||||
return size - getUsed();
|
return size - used();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string toString() const {
|
std::string toString() const {
|
||||||
return formatString("0x%llx-0x%llx (size=" SIZE_T_FMT
|
return formatString("0x%llx-0x%llx (size=" SIZE_T_FMT
|
||||||
", used=" SIZE_T_FMT ")", start, start+size,
|
", used=" SIZE_T_FMT ")", start, start+size,
|
||||||
size, getUsed());
|
size, used());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -406,7 +409,7 @@ public:
|
||||||
return (*ptr >> shift) & 3;
|
return (*ptr >> shift) & 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t getSize() const {
|
inline size_t size() const {
|
||||||
return m_bufferSize;
|
return m_bufferSize;
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
|
@ -463,7 +466,7 @@ public:
|
||||||
* \brief Create a new kd-tree instance initialized with
|
* \brief Create a new kd-tree instance initialized with
|
||||||
* the default parameters.
|
* the default parameters.
|
||||||
*/
|
*/
|
||||||
GenericKDTree() : m_nodes(NULL), m_primIndices(NULL) {
|
GenericKDTree() : m_nodes(NULL), m_indices(NULL) {
|
||||||
m_traversalCost = 15;
|
m_traversalCost = 15;
|
||||||
m_intersectionCost = 20;
|
m_intersectionCost = 20;
|
||||||
m_emptySpaceBonus = 0.9f;
|
m_emptySpaceBonus = 0.9f;
|
||||||
|
@ -471,14 +474,14 @@ public:
|
||||||
m_stopPrims = 4;
|
m_stopPrims = 4;
|
||||||
m_maxBadRefines = 3;
|
m_maxBadRefines = 3;
|
||||||
m_exactPrimThreshold = 16384;
|
m_exactPrimThreshold = 16384;
|
||||||
m_maxDepth = 1024;
|
m_maxDepth = 0;
|
||||||
m_retract = true;
|
m_retract = true;
|
||||||
m_parallel = true;
|
m_parallel = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual ~GenericKDTree() {
|
virtual ~GenericKDTree() {
|
||||||
if (m_primIndices)
|
if (m_indices)
|
||||||
delete[] m_primIndices;
|
delete[] m_indices;
|
||||||
if (m_nodes)
|
if (m_nodes)
|
||||||
delete[] m_nodes;
|
delete[] m_nodes;
|
||||||
}
|
}
|
||||||
|
@ -498,8 +501,8 @@ public:
|
||||||
m_maxDepth = (int) (8 + 1.3f * log2i(primCount));
|
m_maxDepth = (int) (8 + 1.3f * log2i(primCount));
|
||||||
m_maxDepth = std::min(m_maxDepth, (size_type) MTS_KD_MAX_DEPTH);
|
m_maxDepth = std::min(m_maxDepth, (size_type) MTS_KD_MAX_DEPTH);
|
||||||
|
|
||||||
Log(EDebug, "Creating a preliminary index list (%.2f KiB)",
|
Log(EDebug, "Creating a preliminary index list (%s)",
|
||||||
primCount * sizeof(index_type) / 1024.0f);
|
memString(primCount * sizeof(index_type)).c_str());
|
||||||
|
|
||||||
OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
|
OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
|
||||||
index_type *indices = leftAlloc.allocate<index_type>(primCount);
|
index_type *indices = leftAlloc.allocate<index_type>(primCount);
|
||||||
|
@ -522,7 +525,7 @@ public:
|
||||||
Log(EDebug, " Scene bounding box (min) : %s", m_aabb.min.toString().c_str());
|
Log(EDebug, " Scene bounding box (min) : %s", m_aabb.min.toString().c_str());
|
||||||
Log(EDebug, " Scene bounding box (max) : %s", m_aabb.max.toString().c_str());
|
Log(EDebug, " Scene bounding box (max) : %s", m_aabb.max.toString().c_str());
|
||||||
Log(EDebug, " Min-max bins : %i", MTS_KD_MINMAX_BINS);
|
Log(EDebug, " Min-max bins : %i", MTS_KD_MINMAX_BINS);
|
||||||
Log(EDebug, " Greedy SAH optimization : <= %i primitives", m_exactPrimThreshold);
|
Log(EDebug, " Greedy SAH optimization : use for <= %i primitives", m_exactPrimThreshold);
|
||||||
Log(EDebug, " Perfect splits : %s", m_clip ? "yes" : "no");
|
Log(EDebug, " Perfect splits : %s", m_clip ? "yes" : "no");
|
||||||
Log(EDebug, " Retract bad splits : %s", m_retract ? "yes" : "no");
|
Log(EDebug, " Retract bad splits : %s", m_retract ? "yes" : "no");
|
||||||
Log(EDebug, " Stopping primitive count : %i", m_stopPrims);
|
Log(EDebug, " Stopping primitive count : %i", m_stopPrims);
|
||||||
|
@ -545,12 +548,12 @@ public:
|
||||||
|
|
||||||
m_indirectionLock = new Mutex();
|
m_indirectionLock = new Mutex();
|
||||||
KDNode *prelimRoot = ctx.nodes.allocate(1);
|
KDNode *prelimRoot = ctx.nodes.allocate(1);
|
||||||
Float finalSAHCost = buildTreeMinMax(ctx, 1, prelimRoot,
|
buildTreeMinMax(ctx, 1, prelimRoot, m_aabb, m_aabb,
|
||||||
m_aabb, m_aabb, indices, primCount, true, 0);
|
indices, primCount, true, 0);
|
||||||
ctx.leftAlloc.release(indices);
|
ctx.leftAlloc.release(indices);
|
||||||
|
|
||||||
KDAssert(ctx.leftAlloc.getUsed() == 0);
|
KDAssert(ctx.leftAlloc.used() == 0);
|
||||||
KDAssert(ctx.rightAlloc.getUsed() == 0);
|
KDAssert(ctx.rightAlloc.used() == 0);
|
||||||
|
|
||||||
if (m_parallel) {
|
if (m_parallel) {
|
||||||
m_interface.done = true;
|
m_interface.done = true;
|
||||||
|
@ -563,14 +566,16 @@ public:
|
||||||
Log(EDebug, "");
|
Log(EDebug, "");
|
||||||
|
|
||||||
Log(EDebug, "Temporary memory statistics:");
|
Log(EDebug, "Temporary memory statistics:");
|
||||||
Log(EDebug, " Classification storage : %.2f KiB",
|
Log(EDebug, " Classification storage : %s",
|
||||||
(ctx.classStorage.getSize() * (1+procCount)) / 1024.0f);
|
memString((ctx.classStorage.size() * (1+procCount))).c_str());
|
||||||
Log(EDebug, " Indirection entries : " SIZE_T_FMT " (%.2f KiB)",
|
Log(EDebug, " Indirection entries : " SIZE_T_FMT " (%s)",
|
||||||
m_indirections.size(),m_indirections.capacity()
|
m_indirections.size(), memString(m_indirections.capacity()
|
||||||
* sizeof(KDNode *) / 1024.0f);
|
* sizeof(KDNode *)).c_str());
|
||||||
|
|
||||||
Log(EDebug, " Main thread:");
|
Log(EDebug, " Main thread:");
|
||||||
ctx.printStats();
|
ctx.printStats();
|
||||||
|
size_t totalUsage = m_indirections.capacity()
|
||||||
|
* sizeof(KDNode *) + ctx.size();
|
||||||
|
|
||||||
/// Clean up event lists and print statistics
|
/// Clean up event lists and print statistics
|
||||||
ctx.leftAlloc.cleanup();
|
ctx.leftAlloc.cleanup();
|
||||||
|
@ -579,49 +584,71 @@ public:
|
||||||
Log(EDebug, " Worker thread %i:", i+1);
|
Log(EDebug, " Worker thread %i:", i+1);
|
||||||
BuildContext &subCtx = m_builders[i]->getContext();
|
BuildContext &subCtx = m_builders[i]->getContext();
|
||||||
subCtx.printStats();
|
subCtx.printStats();
|
||||||
|
totalUsage += subCtx.size();
|
||||||
subCtx.leftAlloc.cleanup();
|
subCtx.leftAlloc.cleanup();
|
||||||
subCtx.rightAlloc.cleanup();
|
subCtx.rightAlloc.cleanup();
|
||||||
ctx.accumulateStatisticsFrom(subCtx);
|
ctx.accumulateStatisticsFrom(subCtx);
|
||||||
}
|
}
|
||||||
|
Log(EDebug, " Total: %s", memString(totalUsage).c_str());
|
||||||
|
|
||||||
Log(EDebug, "");
|
Log(EDebug, "");
|
||||||
timer->reset();
|
timer->reset();
|
||||||
Log(EDebug, "Optimizing memory layout ..");
|
Log(EDebug, "Optimizing memory layout ..");
|
||||||
|
|
||||||
std::stack<boost::tuple<const KDNode *, KDNode *, BuildContext *, AABB> > stack;
|
std::stack<boost::tuple<const KDNode *, KDNode *,
|
||||||
|
const BuildContext *, AABB> > stack;
|
||||||
|
|
||||||
Float expTraversalSteps = 0;
|
Float expTraversalSteps = 0;
|
||||||
Float expLeavesVisited = 0;
|
Float expLeavesVisited = 0;
|
||||||
Float expPrimitivesIntersected = 0;
|
Float expPrimitivesIntersected = 0;
|
||||||
|
Float sahCost = 0;
|
||||||
size_type nodePtr = 0, indexPtr = 0;
|
size_type nodePtr = 0, indexPtr = 0;
|
||||||
|
|
||||||
m_nodes = new KDNode[ctx.innerNodeCount + ctx.leafNodeCount];
|
m_nodes = new KDNode[ctx.innerNodeCount + ctx.leafNodeCount];
|
||||||
m_primIndices = new index_type[ctx.primIndexCount];
|
m_indices = new index_type[ctx.primIndexCount];
|
||||||
|
|
||||||
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb));
|
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb));
|
||||||
while (!stack.empty()) {
|
while (!stack.empty()) {
|
||||||
const KDNode *node = boost::get<0>(stack.top());
|
const KDNode *node = boost::get<0>(stack.top());
|
||||||
KDNode *target = boost::get<1>(stack.top());
|
KDNode *target = boost::get<1>(stack.top());
|
||||||
BuildContext *context = boost::get<2>(stack.top());
|
const BuildContext *context = boost::get<2>(stack.top());
|
||||||
AABB aabb = boost::get<3>(stack.top());
|
AABB aabb = boost::get<3>(stack.top());
|
||||||
stack.pop();
|
stack.pop();
|
||||||
|
|
||||||
if (node->isLeaf()) {
|
if (node->isLeaf()) {
|
||||||
size_t primCount = node->getPrimEnd() - node->getPrimStart();
|
size_type primStart = node->getPrimStart(),
|
||||||
expLeavesVisited += aabb.getSurfaceArea();
|
primEnd = node->getPrimEnd(),
|
||||||
expPrimitivesIntersected += aabb.getSurfaceArea() * primCount;
|
primCount = primEnd-primStart;
|
||||||
target->initLeafNode(indexPtr, primCount);
|
target->initLeafNode(indexPtr, primCount);
|
||||||
indexPtr += primCount;
|
|
||||||
|
Float sa = aabb.getSurfaceArea(), weightedSA = sa * primCount;
|
||||||
|
expLeavesVisited += aabb.getSurfaceArea();
|
||||||
|
expPrimitivesIntersected += weightedSA;
|
||||||
|
sahCost += weightedSA * m_intersectionCost;
|
||||||
|
|
||||||
|
const BlockedVector<index_type, MTS_KD_BLOCKSIZE_IDX> &indices
|
||||||
|
= context->indices;
|
||||||
|
for (size_type idx = primStart; idx<primEnd; ++idx)
|
||||||
|
m_indices[indexPtr++] = indices[idx];
|
||||||
} else {
|
} else {
|
||||||
expTraversalSteps += aabb.getSurfaceArea();
|
typename std::map<const KDNode *, index_type>::const_iterator it
|
||||||
|
= m_interface.threadMap.find(node);
|
||||||
|
// Check if we're switching to a subtree built by a worker thread
|
||||||
|
if (it != m_interface.threadMap.end())
|
||||||
|
context = &m_builders[(*it).second]->getContext();
|
||||||
|
|
||||||
|
Float sa = aabb.getSurfaceArea();
|
||||||
|
expTraversalSteps += sa;
|
||||||
|
sahCost += sa * m_traversalCost;
|
||||||
const KDNode *left;
|
const KDNode *left;
|
||||||
if (EXPECT_TAKEN(!node->isIndirection()))
|
if (EXPECT_TAKEN(!node->isIndirection()))
|
||||||
left = node->getLeft();
|
left = node->getLeft();
|
||||||
else
|
else
|
||||||
left = m_indirections[node->getIndirectionIndex()];
|
left = m_indirections[node->getIndirectionIndex()];
|
||||||
|
|
||||||
KDNode *children = &m_nodes[nodePtr];
|
KDNode *children = &m_nodes[nodePtr];
|
||||||
nodePtr += 2;
|
nodePtr += 2;
|
||||||
uint8_t axis = node->getAxis();
|
int axis = node->getAxis();
|
||||||
float split = node->getSplit();
|
float split = node->getSplit();
|
||||||
bool result = target->initInnerNode(axis, split, children - target);
|
bool result = target->initInnerNode(axis, split, children - target);
|
||||||
if (!result)
|
if (!result)
|
||||||
|
@ -664,21 +691,26 @@ public:
|
||||||
expTraversalSteps /= rootSA;
|
expTraversalSteps /= rootSA;
|
||||||
expLeavesVisited /= rootSA;
|
expLeavesVisited /= rootSA;
|
||||||
expPrimitivesIntersected /= rootSA;
|
expPrimitivesIntersected /= rootSA;
|
||||||
|
sahCost /= rootSA;
|
||||||
|
|
||||||
Log(EDebug, "Detailed kd-tree statistics:");
|
Log(EDebug, "Detailed kd-tree statistics:");
|
||||||
Log(EDebug, " Inner nodes : %i", ctx.innerNodeCount);
|
Log(EDebug, " Inner nodes : %i", ctx.innerNodeCount);
|
||||||
Log(EDebug, " Leaf nodes : %i", ctx.leafNodeCount);
|
Log(EDebug, " Leaf nodes : %i", ctx.leafNodeCount);
|
||||||
Log(EDebug, " Nonempty leaf nodes : %i", ctx.nonemptyLeafNodeCount);
|
Log(EDebug, " Nonempty leaf nodes : %i", ctx.nonemptyLeafNodeCount);
|
||||||
Log(EDebug, " Node storage cost : %.2f KiB",
|
Log(EDebug, " Node storage cost : %s",
|
||||||
(nodePtr * sizeof(KDNode)) / 1024.0f);
|
memString(nodePtr * sizeof(KDNode)).c_str());
|
||||||
Log(EDebug, " Index storage cost : %.2f KiB",
|
Log(EDebug, " Index storage cost : %s",
|
||||||
(indexPtr * sizeof(index_type)) / 1024.0f);
|
memString(indexPtr * sizeof(index_type)).c_str());
|
||||||
|
Log(EDebug, " Parallel work units : " SIZE_T_FMT,
|
||||||
|
m_interface.threadMap.size());
|
||||||
Log(EDebug, " Retracted splits : %i", ctx.retractedSplits);
|
Log(EDebug, " Retracted splits : %i", ctx.retractedSplits);
|
||||||
Log(EDebug, " Pruned primitives : %i", ctx.pruned);
|
Log(EDebug, " Pruned primitives : %i", ctx.pruned);
|
||||||
Log(EDebug, " Exp. traversals/ray : %.2f", expTraversalSteps);
|
Log(EDebug, " Avg. prims/nonempty leaf : %.2f",
|
||||||
Log(EDebug, " Exp. leaf visits/ray : %.2f", expLeavesVisited);
|
ctx.primIndexCount / (Float) ctx.nonemptyLeafNodeCount);
|
||||||
Log(EDebug, " Exp. prim. visits/ray : %.2f", expPrimitivesIntersected);
|
Log(EDebug, " Expected traversals/ray : %.2f", expTraversalSteps);
|
||||||
Log(EDebug, " Final SAH cost : %.2f", finalSAHCost);
|
Log(EDebug, " Expected leaf visits/ray : %.2f", expLeavesVisited);
|
||||||
|
Log(EDebug, " Expected prim. visits/ray : %.2f", expPrimitivesIntersected);
|
||||||
|
Log(EDebug, " Final SAH cost : %.2f", sahCost);
|
||||||
Log(EDebug, "");
|
Log(EDebug, "");
|
||||||
|
|
||||||
|
|
||||||
|
@ -713,7 +745,7 @@ protected:
|
||||||
inline EdgeEvent() { }
|
inline EdgeEvent() { }
|
||||||
|
|
||||||
/// Create a new edge event
|
/// Create a new edge event
|
||||||
inline EdgeEvent(uint16_t type, uint16_t axis, float pos, index_type index)
|
inline EdgeEvent(uint16_t type, int axis, float pos, index_type index)
|
||||||
: pos(pos), index(index), type(type), axis(axis) { }
|
: pos(pos), index(index), type(type), axis(axis) { }
|
||||||
|
|
||||||
/// Return a string representation
|
/// Return a string representation
|
||||||
|
@ -742,9 +774,9 @@ protected:
|
||||||
/// Primitive index
|
/// Primitive index
|
||||||
index_type index;
|
index_type index;
|
||||||
/// Event type: end/planar/start
|
/// Event type: end/planar/start
|
||||||
uint16_t type;
|
unsigned int type:2;
|
||||||
/// Event axis
|
/// Event axis
|
||||||
uint16_t axis;
|
unsigned int axis:2;
|
||||||
};
|
};
|
||||||
|
|
||||||
BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
|
BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
|
||||||
|
@ -817,15 +849,22 @@ protected:
|
||||||
pruned = 0;
|
pruned = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t size() {
|
||||||
|
return leftAlloc.size() + rightAlloc.size()
|
||||||
|
+ nodes.capacity() * sizeof(KDNode)
|
||||||
|
+ indices.capacity() * sizeof(index_type)
|
||||||
|
+ classStorage.size();
|
||||||
|
}
|
||||||
|
|
||||||
void printStats() {
|
void printStats() {
|
||||||
Log(EDebug, " Left events : " SIZE_T_FMT " chunks (%.2f KiB)",
|
Log(EDebug, " Left events : " SIZE_T_FMT " chunks (%s)",
|
||||||
leftAlloc.getChunkCount(), leftAlloc.getSize() / 1024.0f);
|
leftAlloc.getChunkCount(), memString(leftAlloc.size()).c_str());
|
||||||
Log(EDebug, " Right events : " SIZE_T_FMT " chunks (%.2f KiB)",
|
Log(EDebug, " Right events : " SIZE_T_FMT " chunks (%s)",
|
||||||
rightAlloc.getChunkCount(), rightAlloc.getSize() / 1024.0f);
|
rightAlloc.getChunkCount(), memString(rightAlloc.size()).c_str());
|
||||||
Log(EDebug, " kd-tree nodes : " SIZE_T_FMT " entries, " SIZE_T_FMT " blocks (%.2f KiB)",
|
Log(EDebug, " kd-tree nodes : " SIZE_T_FMT " entries, " SIZE_T_FMT " blocks (%s)",
|
||||||
nodes.size(), nodes.blockCount(), (nodes.capacity() * sizeof(KDNode)) / 1024.0f);
|
nodes.size(), nodes.blockCount(), memString(nodes.capacity() * sizeof(KDNode)).c_str());
|
||||||
Log(EDebug, " Indices : " SIZE_T_FMT " entries, " SIZE_T_FMT " blocks (%.2f KiB)",
|
Log(EDebug, " Indices : " SIZE_T_FMT " entries, " SIZE_T_FMT " blocks (%s)",
|
||||||
indices.size(), indices.blockCount(), (indices.capacity() * sizeof(index_type)) / 1024.0f);
|
indices.size(), indices.blockCount(), memString(indices.capacity() * sizeof(index_type)).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void accumulateStatisticsFrom(const BuildContext &ctx) {
|
void accumulateStatisticsFrom(const BuildContext &ctx) {
|
||||||
|
@ -846,6 +885,7 @@ protected:
|
||||||
/* Communcation */
|
/* Communcation */
|
||||||
ref<Mutex> mutex;
|
ref<Mutex> mutex;
|
||||||
ref<ConditionVariable> cond, condJobTaken;
|
ref<ConditionVariable> cond, condJobTaken;
|
||||||
|
std::map<const KDNode *, index_type> threadMap;
|
||||||
bool done;
|
bool done;
|
||||||
|
|
||||||
/* Job description for building a subtree */
|
/* Job description for building a subtree */
|
||||||
|
@ -997,8 +1037,9 @@ protected:
|
||||||
*/
|
*/
|
||||||
class SAHTreeBuilder : public Thread {
|
class SAHTreeBuilder : public Thread {
|
||||||
public:
|
public:
|
||||||
SAHTreeBuilder(size_type idx, GenericKDTree *parent)
|
SAHTreeBuilder(index_type id, GenericKDTree *parent)
|
||||||
: Thread(formatString("bld%i", idx+1)),
|
: Thread(formatString("bld%i", id)),
|
||||||
|
m_id(id),
|
||||||
m_parent(parent),
|
m_parent(parent),
|
||||||
m_context(parent->cast()->getPrimitiveCount()),
|
m_context(parent->cast()->getPrimitiveCount()),
|
||||||
m_interface(parent->m_interface) {
|
m_interface(parent->m_interface) {
|
||||||
|
@ -1006,8 +1047,8 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
~SAHTreeBuilder() {
|
~SAHTreeBuilder() {
|
||||||
KDAssert(m_context.leftAlloc.getUsed() == 0);
|
KDAssert(m_context.leftAlloc.used() == 0);
|
||||||
KDAssert(m_context.rightAlloc.getUsed() == 0);
|
KDAssert(m_context.rightAlloc.used() == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void run() {
|
void run() {
|
||||||
|
@ -1029,6 +1070,7 @@ protected:
|
||||||
EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount),
|
EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount),
|
||||||
*eventEnd = eventStart + eventCount;
|
*eventEnd = eventStart + eventCount;
|
||||||
memcpy(eventStart, m_interface.eventStart, eventCount * sizeof(EdgeEvent));
|
memcpy(eventStart, m_interface.eventStart, eventCount * sizeof(EdgeEvent));
|
||||||
|
m_interface.threadMap[node] = m_id;
|
||||||
m_interface.node = NULL;
|
m_interface.node = NULL;
|
||||||
m_interface.condJobTaken->signal();
|
m_interface.condJobTaken->signal();
|
||||||
m_interface.mutex->unlock();
|
m_interface.mutex->unlock();
|
||||||
|
@ -1045,6 +1087,7 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
index_type m_id;
|
||||||
GenericKDTree *m_parent;
|
GenericKDTree *m_parent;
|
||||||
BuildContext m_context;
|
BuildContext m_context;
|
||||||
BuildInterface &m_interface;
|
BuildInterface &m_interface;
|
||||||
|
@ -1060,6 +1103,19 @@ protected:
|
||||||
return static_cast<Derived *>(this);
|
return static_cast<Derived *>(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Turn a memory size into a human-readable string
|
||||||
|
inline static std::string memString(size_t size) {
|
||||||
|
Float value = (Float) size;
|
||||||
|
const char *prefixes[] = {
|
||||||
|
"B", "KiB", "MiB", "GiB", "TiB"
|
||||||
|
};
|
||||||
|
int prefix = 0;
|
||||||
|
while (prefix < 4 && value > 1024.0f) {
|
||||||
|
value /= 1024.0f; ++prefix;
|
||||||
|
}
|
||||||
|
return formatString("%.2f %s", value, prefixes[prefix]);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Create an edge event list for a given list of primitives.
|
* \brief Create an edge event list for a given list of primitives.
|
||||||
*
|
*
|
||||||
|
@ -1181,9 +1237,11 @@ protected:
|
||||||
node->initLeafNode(start, primCount);
|
node->initLeafNode(start, primCount);
|
||||||
size_t actualCount = ctx.indices.size() - start;
|
size_t actualCount = ctx.indices.size() - start;
|
||||||
|
|
||||||
|
if (primCount > 0)
|
||||||
|
ctx.nonemptyLeafNodeCount++;
|
||||||
|
|
||||||
if (actualCount != primCount) {
|
if (actualCount != primCount) {
|
||||||
KDAssert(primCount > 0);
|
KDAssert(primCount > 0);
|
||||||
ctx.nonemptyLeafNodeCount++;
|
|
||||||
OrderedChunkAllocator &alloc = ctx.leftAlloc;
|
OrderedChunkAllocator &alloc = ctx.leftAlloc;
|
||||||
|
|
||||||
/* A temporary list is allocated to do the sorting (the indices
|
/* A temporary list is allocated to do the sorting (the indices
|
||||||
|
@ -1271,6 +1329,7 @@ protected:
|
||||||
m_interface.condJobTaken->wait();
|
m_interface.condJobTaken->wait();
|
||||||
m_interface.mutex->unlock();
|
m_interface.mutex->unlock();
|
||||||
|
|
||||||
|
|
||||||
// Never tear down this subtree (return a SAH cost of -infinity)
|
// Never tear down this subtree (return a SAH cost of -infinity)
|
||||||
sahCost = -std::numeric_limits<Float>::infinity();
|
sahCost = -std::numeric_limits<Float>::infinity();
|
||||||
} else {
|
} else {
|
||||||
|
@ -1376,7 +1435,6 @@ protected:
|
||||||
} else {
|
} else {
|
||||||
/* In the end, splitting didn't help to reduce the SAH cost.
|
/* In the end, splitting didn't help to reduce the SAH cost.
|
||||||
Tear up everything below this node and create a leaf */
|
Tear up everything below this node and create a leaf */
|
||||||
|
|
||||||
ctx.nodes.resize(nodePosBeforeSplit);
|
ctx.nodes.resize(nodePosBeforeSplit);
|
||||||
ctx.retractedSplits++;
|
ctx.retractedSplits++;
|
||||||
ctx.leafNodeCount = leafNodeCountBeforeSplit;
|
ctx.leafNodeCount = leafNodeCountBeforeSplit;
|
||||||
|
@ -1427,7 +1485,6 @@ protected:
|
||||||
}
|
}
|
||||||
|
|
||||||
SplitCandidate bestSplit;
|
SplitCandidate bestSplit;
|
||||||
Float invSA = 1.0f / nodeAABB.getSurfaceArea();
|
|
||||||
|
|
||||||
/* ==================================================================== */
|
/* ==================================================================== */
|
||||||
/* Split candidate search */
|
/* Split candidate search */
|
||||||
|
@ -1440,7 +1497,6 @@ protected:
|
||||||
/* Initially, the split plane is placed left of the scene
|
/* Initially, the split plane is placed left of the scene
|
||||||
and thus all geometry is on its right side */
|
and thus all geometry is on its right side */
|
||||||
size_type numLeft[3], numRight[3];
|
size_type numLeft[3], numRight[3];
|
||||||
AABB aabb(nodeAABB);
|
|
||||||
for (int i=0; i<3; ++i) {
|
for (int i=0; i<3; ++i) {
|
||||||
numLeft[i] = 0;
|
numLeft[i] = 0;
|
||||||
numRight[i] = primCount;
|
numRight[i] = primCount;
|
||||||
|
@ -1449,28 +1505,43 @@ protected:
|
||||||
int eventsByAxisCtr = 1;
|
int eventsByAxisCtr = 1;
|
||||||
eventsByAxis[0] = eventStart;
|
eventsByAxis[0] = eventStart;
|
||||||
|
|
||||||
|
const Vector extents(nodeAABB.getExtents());
|
||||||
|
const Float invSA = 0.5f / (extents.x * extents.y
|
||||||
|
+ extents.y*extents.z + extents.x*extents.z);
|
||||||
|
const Vector temp0 = Vector(
|
||||||
|
(extents[1] * extents[2]),
|
||||||
|
(extents[0] * extents[2]),
|
||||||
|
(extents[0] * extents[1])) * 2 * invSA;
|
||||||
|
|
||||||
|
const Vector temp1 = Vector(
|
||||||
|
(extents[1] + extents[2]),
|
||||||
|
(extents[0] + extents[2]),
|
||||||
|
(extents[0] + extents[1])) * 2 * invSA;
|
||||||
|
|
||||||
/* Iterate over all events on the current axis */
|
/* Iterate over all events on the current axis */
|
||||||
for (EdgeEvent *event = eventStart; event < eventEnd; ) {
|
for (EdgeEvent *event = eventStart; event < eventEnd; ) {
|
||||||
/* Record the current position and count all
|
/* Record the current position and count the number
|
||||||
other events, which are also here */
|
and type of remaining events, which are also here.
|
||||||
uint16_t axis = event->axis;
|
Due to the sort ordering, there is no need to worry
|
||||||
|
about an axis change in the loops below */
|
||||||
|
int axis = event->axis;
|
||||||
float pos = event->pos;
|
float pos = event->pos;
|
||||||
size_type numStart = 0, numEnd = 0, numPlanar = 0;
|
size_type numStart = 0, numEnd = 0, numPlanar = 0;
|
||||||
|
|
||||||
/* Count "end" events */
|
/* Count "end" events */
|
||||||
while (event < eventEnd && event->pos == pos && event->axis == axis
|
while (event < eventEnd && event->pos == pos
|
||||||
&& event->type == EdgeEvent::EEdgeEnd) {
|
&& event->type == EdgeEvent::EEdgeEnd) {
|
||||||
++numEnd; ++event;
|
++numEnd; ++event;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Count "planar" events */
|
/* Count "planar" events */
|
||||||
while (event < eventEnd && event->pos == pos && event->axis == axis
|
while (event < eventEnd && event->pos == pos
|
||||||
&& event->type == EdgeEvent::EEdgePlanar) {
|
&& event->type == EdgeEvent::EEdgePlanar) {
|
||||||
++numPlanar; ++event;
|
++numPlanar; ++event;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Count "start" events */
|
/* Count "start" events */
|
||||||
while (event < eventEnd && event->pos == pos && event->axis == axis
|
while (event < eventEnd && event->pos == pos
|
||||||
&& event->type == EdgeEvent::EEdgeStart) {
|
&& event->type == EdgeEvent::EEdgeStart) {
|
||||||
++numStart; ++event;
|
++numStart; ++event;
|
||||||
}
|
}
|
||||||
|
@ -1487,24 +1558,33 @@ protected:
|
||||||
|
|
||||||
/* Calculate a score using the surface area heuristic */
|
/* Calculate a score using the surface area heuristic */
|
||||||
if (EXPECT_TAKEN(pos >= nodeAABB.min[axis] && pos <= nodeAABB.max[axis])) {
|
if (EXPECT_TAKEN(pos >= nodeAABB.min[axis] && pos <= nodeAABB.max[axis])) {
|
||||||
size_type nL = numLeft[axis], nR = numRight[axis];
|
const size_type nL = numLeft[axis], nR = numRight[axis];
|
||||||
Float tmp = nodeAABB.max[axis];
|
const Float nLF = (Float) nL, nRF = (Float) nR;
|
||||||
aabb.max[axis] = pos;
|
|
||||||
Float pLeft = invSA * aabb.getSurfaceArea();
|
Float pLeft = temp0[axis] + temp1[axis] * (pos - nodeAABB.min[axis]);
|
||||||
aabb.max[axis] = tmp;
|
Float pRight = temp0[axis] + temp1[axis] * (nodeAABB.max[axis] - pos);
|
||||||
tmp = aabb.min[axis];
|
|
||||||
aabb.min[axis] = pos;
|
if (numPlanar == 0) {
|
||||||
Float pRight = invSA * aabb.getSurfaceArea();
|
Float sahCost = m_intersectionCost + m_traversalCost
|
||||||
aabb.min[axis] = tmp;
|
* (pLeft * nLF + pRight * nRF);
|
||||||
Float sahCostPlanarLeft = m_traversalCost + m_intersectionCost
|
if (nL == 0 || nR == 0)
|
||||||
* (pLeft * (nL + numPlanar) + pRight * nR);
|
sahCost *= m_emptySpaceBonus;
|
||||||
Float sahCostPlanarRight = m_traversalCost + m_intersectionCost
|
if (sahCost < bestSplit.sahCost) {
|
||||||
* (pLeft * nL + pRight * (nR + numPlanar));
|
bestSplit.pos = pos;
|
||||||
|
bestSplit.axis = axis;
|
||||||
|
bestSplit.sahCost = sahCost;
|
||||||
|
bestSplit.numLeft = nL;
|
||||||
|
bestSplit.numRight = nR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Float sahCostPlanarLeft = m_intersectionCost + m_traversalCost
|
||||||
|
* (pLeft * (nL+numPlanar) + pRight * nRF);
|
||||||
|
Float sahCostPlanarRight = m_intersectionCost + m_traversalCost
|
||||||
|
* (pLeft * nLF + pRight * (nR+numPlanar));
|
||||||
if (nL + numPlanar == 0 || nR == 0)
|
if (nL + numPlanar == 0 || nR == 0)
|
||||||
sahCostPlanarLeft *= m_emptySpaceBonus;
|
sahCostPlanarLeft *= m_emptySpaceBonus;
|
||||||
if (nL == 0 || nR + numPlanar == 0)
|
if (nL == 0 || nR + numPlanar == 0)
|
||||||
sahCostPlanarRight *= m_emptySpaceBonus;
|
sahCostPlanarRight *= m_emptySpaceBonus;
|
||||||
|
|
||||||
if (sahCostPlanarLeft < bestSplit.sahCost || sahCostPlanarRight < bestSplit.sahCost) {
|
if (sahCostPlanarLeft < bestSplit.sahCost || sahCostPlanarRight < bestSplit.sahCost) {
|
||||||
bestSplit.pos = pos;
|
bestSplit.pos = pos;
|
||||||
bestSplit.axis = axis;
|
bestSplit.axis = axis;
|
||||||
|
@ -1520,6 +1600,7 @@ protected:
|
||||||
bestSplit.planarLeft = false;
|
bestSplit.planarLeft = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
/* When primitive clipping is active, this should
|
/* When primitive clipping is active, this should
|
||||||
never happen! */
|
never happen! */
|
||||||
|
@ -2083,7 +2164,7 @@ protected:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
KDNode *m_nodes;
|
KDNode *m_nodes;
|
||||||
index_type *m_primIndices;
|
index_type *m_indices;
|
||||||
Float m_traversalCost;
|
Float m_traversalCost;
|
||||||
Float m_intersectionCost;
|
Float m_intersectionCost;
|
||||||
Float m_emptySpaceBonus;
|
Float m_emptySpaceBonus;
|
||||||
|
|
|
@ -112,7 +112,7 @@ public:
|
||||||
|
|
||||||
void test02_buildSimple() {
|
void test02_buildSimple() {
|
||||||
Properties bunnyProps("ply");
|
Properties bunnyProps("ply");
|
||||||
bunnyProps.setString("filename", "tools/tests/happy.ply");
|
bunnyProps.setString("filename", "tools/tests/xyzrgb_statuette.ply");
|
||||||
|
|
||||||
ref<TriMesh> mesh = static_cast<TriMesh *> (PluginManager::getInstance()->
|
ref<TriMesh> mesh = static_cast<TriMesh *> (PluginManager::getInstance()->
|
||||||
createObject(TriMesh::m_theClass, bunnyProps));
|
createObject(TriMesh::m_theClass, bunnyProps));
|
||||||
|
|
Loading…
Reference in New Issue