2658 lines
83 KiB
C++
2658 lines
83 KiB
C++
/*
|
|
This file is part of Mitsuba, a physically based rendering system.
|
|
|
|
Copyright (c) 2007-2011 by Wenzel Jakob and others.
|
|
|
|
Mitsuba is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License Version 3
|
|
as published by the Free Software Foundation.
|
|
|
|
Mitsuba is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#if !defined(__KDTREE_GENERIC_H)
|
|
#define __KDTREE_GENERIC_H
|
|
|
|
#include <mitsuba/core/timer.h>
|
|
#include <mitsuba/core/random.h>
|
|
#include <mitsuba/core/lock.h>
|
|
#include <boost/static_assert.hpp>
|
|
#include <boost/tuple/tuple.hpp>
|
|
#include <stack>
|
|
|
|
/// Activate lots of extra checks
|
|
// #define MTS_KD_DEBUG 1
|
|
|
|
/** Compile-time KD-tree depth limit. Allows to put certain
|
|
data structures on the stack */
|
|
#define MTS_KD_MAXDEPTH 48
|
|
|
|
/// OrderedChunkAllocator: don't create chunks smaller than 512 KiB
|
|
#define MTS_KD_MIN_ALLOC 512*1024
|
|
|
|
/// Allocate nodes & index lists in blocks of 512 KiB
|
|
#define MTS_KD_BLOCKSIZE_KD (512*1024/sizeof(KDNode))
|
|
#define MTS_KD_BLOCKSIZE_IDX (512*1024/sizeof(uint32_t))
|
|
|
|
#if defined(MTS_KD_DEBUG)
|
|
#define KDAssert(expr) SAssert(expr)
|
|
#define KDAssertEx(expr, text) SAssertEx(expr, text)
|
|
#else
|
|
#define KDAssert(expr)
|
|
#define KDAssertEx(expr, text)
|
|
#endif
|
|
|
|
MTS_NAMESPACE_BEGIN
|
|
|
|
/**
|
|
* \brief Special "ordered" memory allocator
|
|
*
|
|
* During kd-tree construction, large amounts of memory are required
|
|
* to temporarily hold index and edge event lists. When not implemented
|
|
* properly, these allocations can become a critical bottleneck.
|
|
* The class \ref OrderedChunkAllocator provides a specialized
|
|
* memory allocator, which reserves memory in chunks of at least
|
|
* 128KiB. An important assumption made by the allocator is that
|
|
* memory will be released in the exact same order, in which it was
|
|
* previously allocated. This makes it possible to create an
|
|
* implementation with a very low memory overhead. Note that no locking
|
|
* is done, hence each thread will need its own allocator.
|
|
*
|
|
* \author Wenzel Jakob
|
|
*/
|
|
class OrderedChunkAllocator {
|
|
public:
|
|
inline OrderedChunkAllocator(size_t minAllocation = MTS_KD_MIN_ALLOC)
|
|
: m_minAllocation(minAllocation) {
|
|
m_chunks.reserve(16);
|
|
}
|
|
|
|
~OrderedChunkAllocator() {
|
|
cleanup();
|
|
}
|
|
|
|
/**
|
|
* \brief Release all memory used by the allocator
|
|
*/
|
|
void cleanup() {
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it)
|
|
freeAligned((*it).start);
|
|
m_chunks.clear();
|
|
}
|
|
|
|
/**
|
|
* \brief Merge the chunks of another allocator into this one
|
|
*/
|
|
void merge(const OrderedChunkAllocator &other) {
|
|
m_chunks.reserve(m_chunks.size() + other.m_chunks.size());
|
|
m_chunks.insert(m_chunks.end(), other.m_chunks.begin(),
|
|
other.m_chunks.end());
|
|
}
|
|
|
|
/**
|
|
* \brief Forget about all chunks without actually freeing them.
|
|
* This is useful when the chunks have been merged into another
|
|
* allocator.
|
|
*/
|
|
void forget() {
|
|
m_chunks.clear();
|
|
}
|
|
|
|
/**
|
|
* \brief Request a block of memory from the allocator
|
|
*
|
|
* Walks through the list of chunks to find one with enough
|
|
* free memory. If no chunk could be found, a new one is created.
|
|
*/
|
|
template <typename T> T * __restrict allocate(size_t size) {
|
|
size *= sizeof(T);
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it) {
|
|
Chunk &chunk = *it;
|
|
if (chunk.remainder() >= size) {
|
|
T* result = reinterpret_cast<T *>(chunk.cur);
|
|
chunk.cur += size;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
/* No chunk had enough free memory */
|
|
size_t allocSize = std::max(size,
|
|
m_minAllocation);
|
|
|
|
Chunk chunk;
|
|
chunk.start = (uint8_t *) allocAligned(allocSize);
|
|
chunk.cur = chunk.start + size;
|
|
chunk.size = allocSize;
|
|
m_chunks.push_back(chunk);
|
|
|
|
return reinterpret_cast<T *>(chunk.start);
|
|
}
|
|
|
|
template <typename T> void release(T *ptr) {
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it) {
|
|
Chunk &chunk = *it;
|
|
if ((uint8_t *) ptr >= chunk.start &&
|
|
(uint8_t *) ptr < chunk.start + chunk.size) {
|
|
chunk.cur = (uint8_t *) ptr;
|
|
return;
|
|
}
|
|
}
|
|
#if defined(MTS_KD_DEBUG)
|
|
/* Uh oh, allocation could not be found. Check if it has size==0 */
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it) {
|
|
const Chunk &chunk = *it;
|
|
if ((uint8_t *) ptr == chunk.start + chunk.size)
|
|
return;
|
|
}
|
|
SLog(EError, "OrderedChunkAllocator: Internal error while"
|
|
" releasing memory");
|
|
#endif
|
|
}
|
|
|
|
/**
|
|
* \brief Shrink the size of the last allocated chunk
|
|
*/
|
|
template <typename T> void shrinkAllocation(T *ptr, size_t newSize) {
|
|
newSize *= sizeof(T);
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it) {
|
|
Chunk &chunk = *it;
|
|
if ((uint8_t *) ptr >= chunk.start &&
|
|
(uint8_t *) ptr < chunk.start + chunk.size) {
|
|
chunk.cur = (uint8_t *) ptr + newSize;
|
|
return;
|
|
}
|
|
}
|
|
#if defined(MTS_KD_DEBUG)
|
|
/* Uh oh, allocation could not be found. Check if it has size == 0 */
|
|
if (newSize == 0) {
|
|
for (std::vector<Chunk>::iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it) {
|
|
const Chunk &chunk = *it;
|
|
if ((uint8_t *) ptr == chunk.start + chunk.size)
|
|
return;
|
|
}
|
|
}
|
|
SLog(EError, "OrderedChunkAllocator: Internal error while"
|
|
" releasing memory");
|
|
#endif
|
|
}
|
|
|
|
inline size_t getChunkCount() const { return m_chunks.size(); }
|
|
|
|
/**
|
|
* \brief Return the total amount of chunk memory in bytes
|
|
*/
|
|
size_t size() const {
|
|
size_t result = 0;
|
|
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it)
|
|
result += (*it).size;
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the total amount of used memory in bytes
|
|
*/
|
|
size_t used() const {
|
|
size_t result = 0;
|
|
for (std::vector<Chunk>::const_iterator it = m_chunks.begin();
|
|
it != m_chunks.end(); ++it)
|
|
result += (*it).used();
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* \brief Return a string representation of the chunks
|
|
*/
|
|
std::string toString() const {
|
|
std::ostringstream oss;
|
|
oss << "OrderedChunkAllocator[" << endl;
|
|
for (size_t i=0; i<m_chunks.size(); ++i)
|
|
oss << " Chunk " << i << ": " << m_chunks[i].toString() << endl;
|
|
oss << "]";
|
|
return oss.str();
|
|
}
|
|
|
|
private:
|
|
struct Chunk {
|
|
size_t size;
|
|
uint8_t *start, *cur;
|
|
|
|
inline size_t used() const {
|
|
return cur - start;
|
|
}
|
|
|
|
inline size_t remainder() const {
|
|
return size - used();
|
|
}
|
|
|
|
std::string toString() const {
|
|
return formatString("0x%llx-0x%llx (size=" SIZE_T_FMT
|
|
", used=" SIZE_T_FMT ")", start, start+size,
|
|
size, used());
|
|
}
|
|
};
|
|
|
|
size_t m_minAllocation;
|
|
std::vector<Chunk> m_chunks;
|
|
};
|
|
|
|
/**
|
|
* \brief Basic vector implementation, which stores all data
|
|
* in a list of fixed-sized blocks.
|
|
*
|
|
* This leads to a more conservative memory usage when the
|
|
* final size of a (possibly very large) growing vector is
|
|
* unknown. Also, frequent reallocations & copies are avoided.
|
|
*
|
|
* \author Wenzel Jakob
|
|
*/
|
|
template <typename T, size_t BlockSize> class BlockedVector {
|
|
public:
|
|
BlockedVector() : m_pos(0) {}
|
|
|
|
~BlockedVector() {
|
|
clear();
|
|
}
|
|
|
|
/**
|
|
* \brief Append an element to the end
|
|
*/
|
|
inline void push_back(const T &value) {
|
|
size_t blockIdx = m_pos / BlockSize;
|
|
size_t offset = m_pos % BlockSize;
|
|
if (blockIdx == m_blocks.size())
|
|
m_blocks.push_back(new T[BlockSize]);
|
|
m_blocks[blockIdx][offset] = value;
|
|
m_pos++;
|
|
}
|
|
|
|
/**
|
|
* \brief Allocate a certain number of elements and
|
|
* return a pointer to the first one.
|
|
*
|
|
* The implementation will ensure that they lie
|
|
* contiguous in memory -- note that this can potentially
|
|
* create unused elements in the previous block if a new
|
|
* one has to be allocated.
|
|
*/
|
|
inline T * __restrict allocate(size_t size) {
|
|
#if defined(MTS_KD_DEBUG)
|
|
SAssert(size <= BlockSize);
|
|
#endif
|
|
size_t blockIdx = m_pos / BlockSize;
|
|
size_t offset = m_pos % BlockSize;
|
|
T *result;
|
|
if (EXPECT_TAKEN(offset + size <= BlockSize)) {
|
|
if (blockIdx == m_blocks.size())
|
|
m_blocks.push_back(new T[BlockSize]);
|
|
result = m_blocks[blockIdx] + offset;
|
|
m_pos += size;
|
|
} else {
|
|
++blockIdx;
|
|
if (blockIdx == m_blocks.size())
|
|
m_blocks.push_back(new T[BlockSize]);
|
|
result = m_blocks[blockIdx];
|
|
m_pos += BlockSize - offset + size;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
inline T &operator[](size_t index) {
|
|
return *(m_blocks[index / BlockSize] +
|
|
(index % BlockSize));
|
|
}
|
|
|
|
inline const T &operator[](size_t index) const {
|
|
return *(m_blocks[index / BlockSize] +
|
|
(index % BlockSize));
|
|
}
|
|
|
|
|
|
/**
|
|
* \brief Return the currently used number of items
|
|
*/
|
|
inline size_t size() const {
|
|
return m_pos;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of allocated blocks
|
|
*/
|
|
inline size_t blockCount() const {
|
|
return m_blocks.size();
|
|
}
|
|
|
|
/**
|
|
* \brief Return the total capacity
|
|
*/
|
|
inline size_t capacity() const {
|
|
return m_blocks.size() * BlockSize;
|
|
}
|
|
|
|
/**
|
|
* \brief Resize the vector to the given size.
|
|
*
|
|
* Note: this implementation doesn't support
|
|
* enlarging the vector and simply changes the
|
|
* last item pointer.
|
|
*/
|
|
inline void resize(size_t pos) {
|
|
#if defined(MTS_KD_DEBUG)
|
|
SAssert(pos <= capacity());
|
|
#endif
|
|
m_pos = pos;
|
|
}
|
|
|
|
/**
|
|
* \brief Release all memory
|
|
*/
|
|
void clear() {
|
|
for (typename std::vector<T *>::iterator it = m_blocks.begin();
|
|
it != m_blocks.end(); ++it)
|
|
delete[] *it;
|
|
m_blocks.clear();
|
|
m_pos = 0;
|
|
}
|
|
private:
|
|
std::vector<T *> m_blocks;
|
|
size_t m_pos;
|
|
};
|
|
|
|
/**
|
|
* \brief Compact storage for primitive classifcation
|
|
*
|
|
* When classifying primitives with respect to a split plane,
|
|
* a data structure is needed to hold the tertiary result of
|
|
* this operation. This class implements a compact storage
|
|
* (2 bits per entry) in the spirit of the std::vector<bool>
|
|
* specialization.
|
|
*
|
|
* \author Wenzel Jakob
|
|
*/
|
|
class ClassificationStorage {
|
|
public:
|
|
ClassificationStorage(size_t size = 0)
|
|
: m_buffer(NULL), m_bufferSize(0) { }
|
|
|
|
~ClassificationStorage() {
|
|
if (m_buffer)
|
|
delete[] m_buffer;
|
|
}
|
|
|
|
void setPrimitiveCount(size_t size) {
|
|
if (m_buffer)
|
|
delete[] m_buffer;
|
|
if (size > 0) {
|
|
m_bufferSize = size/4 + ((size % 4) > 0 ? 1 : 0);
|
|
m_buffer = new uint8_t[m_bufferSize];
|
|
} else {
|
|
m_buffer = NULL;
|
|
}
|
|
}
|
|
|
|
inline void set(uint32_t index, int value) {
|
|
uint8_t *ptr = m_buffer + (index >> 2);
|
|
uint8_t shift = (index & 3) << 1;
|
|
*ptr = (*ptr & ~(3 << shift)) | (value << shift);
|
|
}
|
|
|
|
inline int get(uint32_t index) const {
|
|
uint8_t *ptr = m_buffer + (index >> 2);
|
|
uint8_t shift = (index & 3) << 1;
|
|
return (*ptr >> shift) & 3;
|
|
}
|
|
|
|
inline size_t size() const {
|
|
return m_bufferSize;
|
|
}
|
|
private:
|
|
uint8_t *m_buffer;
|
|
size_t m_bufferSize;
|
|
};
|
|
|
|
/**
|
|
* \brief Base class of all kd-trees.
|
|
*
|
|
* This class defines the byte layout for KD-tree nodes and
|
|
* provides methods for querying the tree structure.
|
|
*/
|
|
template <typename AABBType> class KDTreeBase : public Object {
|
|
public:
|
|
/// Index number format (max 2^32 prims)
|
|
typedef uint32_t index_type;
|
|
|
|
/// Size number format
|
|
typedef uint32_t size_type;
|
|
|
|
/**
|
|
* \brief KD-tree node in 8 bytes.
|
|
*/
|
|
struct KDNode {
|
|
union {
|
|
/* Inner node */
|
|
struct {
|
|
/* Bit layout:
|
|
31 : False (inner node)
|
|
30 : Indirection node flag
|
|
29-3 : Offset to the left child
|
|
or indirection table entry
|
|
2-0 : Split axis
|
|
*/
|
|
uint32_t combined;
|
|
|
|
/// Split plane coordinate
|
|
float split;
|
|
} inner;
|
|
|
|
/* Leaf node */
|
|
struct {
|
|
/* Bit layout:
|
|
31 : True (leaf node)
|
|
30-0 : Offset to the node's primitive list
|
|
*/
|
|
uint32_t combined;
|
|
|
|
/// End offset of the primitive list
|
|
uint32_t end;
|
|
} leaf;
|
|
};
|
|
|
|
enum EMask {
|
|
ETypeMask = 1 << 31,
|
|
EIndirectionMask = 1 << 30,
|
|
ELeafOffsetMask = ~ETypeMask,
|
|
EInnerAxisMask = 0x3,
|
|
EInnerOffsetMask = ~(EInnerAxisMask + EIndirectionMask),
|
|
ERelOffsetLimit = (1<<28) - 1
|
|
};
|
|
|
|
/// Initialize a leaf kd-Tree node
|
|
inline void initLeafNode(unsigned int offset, unsigned int numPrims) {
|
|
leaf.combined = ETypeMask | offset;
|
|
leaf.end = offset + numPrims;
|
|
}
|
|
|
|
/**
|
|
* Initialize an interior kd-Tree node. Reports a failure if the
|
|
* relative offset to the left child node is too large.
|
|
*/
|
|
inline bool initInnerNode(int axis, float split, ptrdiff_t relOffset) {
|
|
if (relOffset < 0 || relOffset > ERelOffsetLimit)
|
|
return false;
|
|
inner.combined = axis | ((uint32_t) relOffset << 2);
|
|
inner.split = split;
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* \brief Initialize an interior indirection node.
|
|
*
|
|
* Indirections are necessary whenever the children cannot be
|
|
* referenced using a relative pointer, which can happen when
|
|
* they lie in different memory chunks. In this case, the node
|
|
* stores an index into a globally shared pointer list.
|
|
*/
|
|
inline void initIndirectionNode(int axis, float split,
|
|
uint32_t indirectionEntry) {
|
|
inner.combined = EIndirectionMask
|
|
| ((uint32_t) indirectionEntry << 2)
|
|
| axis;
|
|
inner.split = split;
|
|
}
|
|
|
|
/// Is this a leaf node?
|
|
FINLINE bool isLeaf() const {
|
|
return leaf.combined & ETypeMask;
|
|
}
|
|
|
|
/// Is this an indirection node?
|
|
FINLINE bool isIndirection() const {
|
|
return leaf.combined & EIndirectionMask;
|
|
}
|
|
|
|
/// Assuming this is a leaf node, return the first primitive index
|
|
FINLINE index_type getPrimStart() const {
|
|
return leaf.combined & ELeafOffsetMask;
|
|
}
|
|
|
|
/// Assuming this is a leaf node, return the last primitive index
|
|
FINLINE index_type getPrimEnd() const {
|
|
return leaf.end;
|
|
}
|
|
|
|
/// Return the index of an indirection node
|
|
FINLINE index_type getIndirectionIndex() const {
|
|
return(inner.combined & EInnerOffsetMask) >> 2;
|
|
}
|
|
|
|
/// Return the left child (assuming that this is an interior node)
|
|
FINLINE const KDNode * __restrict getLeft() const {
|
|
return this +
|
|
((inner.combined & EInnerOffsetMask) >> 2);
|
|
}
|
|
|
|
/// Return the sibling of the current node
|
|
FINLINE const KDNode * __restrict getSibling() const {
|
|
return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
|
|
}
|
|
|
|
/// Return the left child (assuming that this is an interior node)
|
|
FINLINE KDNode * __restrict getLeft() {
|
|
return this +
|
|
((inner.combined & EInnerOffsetMask) >> 2);
|
|
}
|
|
|
|
/// Return the left child (assuming that this is an interior node)
|
|
FINLINE const KDNode * __restrict getRight() const {
|
|
return getLeft() + 1;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the split plane location (assuming that this
|
|
* is an interior node)
|
|
*/
|
|
FINLINE float getSplit() const {
|
|
return inner.split;
|
|
}
|
|
|
|
/// Return the split axis (assuming that this is an interior node)
|
|
FINLINE int getAxis() const {
|
|
return inner.combined & EInnerAxisMask;
|
|
}
|
|
|
|
/// Return a string representation
|
|
std::string toString() const {
|
|
std::ostringstream oss;
|
|
if (isLeaf()) {
|
|
oss << "KDNode[leaf, primStart=" << getPrimStart()
|
|
<< ", primCount=" << getPrimEnd()-getPrimStart() << "]";
|
|
} else {
|
|
oss << "KDNode[interior, axis=" << getAxis()
|
|
<< ", split=" << getAxis()
|
|
<< ", leftOffset="
|
|
<< ((inner.combined & EInnerOffsetMask) >> 2)
|
|
<< "]";
|
|
}
|
|
return oss.str();
|
|
}
|
|
};
|
|
BOOST_STATIC_ASSERT(sizeof(KDNode) == 8);
|
|
|
|
/// Return the root node of the kd-tree
|
|
inline const KDNode *getRoot() const {
|
|
return m_nodes;
|
|
}
|
|
|
|
/// Return whether or not the kd-tree has been built
|
|
inline bool isBuilt() const {
|
|
return m_nodes != NULL;
|
|
}
|
|
|
|
/// Return a (slightly enlarged) axis-aligned bounding box containing all primitives
|
|
inline const AABBType &getAABB() const { return m_aabb; }
|
|
|
|
/// Return a tight axis-aligned bounding box containing all primitives
|
|
inline const AABBType &getTightAABB() const { return m_tightAABB;}
|
|
|
|
MTS_DECLARE_CLASS()
|
|
protected:
|
|
virtual ~KDTreeBase() { }
|
|
protected:
|
|
KDNode *m_nodes;
|
|
AABBType m_aabb, m_tightAABB;
|
|
};
|
|
|
|
#if defined(WIN32)
|
|
/* Use strict IEEE 754 floating point computations
|
|
for the following kd-tree building code */
|
|
MTS_NAMESPACE_END
|
|
#pragma float_control(precise, on)
|
|
MTS_NAMESPACE_BEGIN
|
|
#endif
|
|
|
|
#define KDLog(level, fmt, ...) Thread::getThread()->getLogger()->log(\
|
|
level, KDTreeBase<AABB>::m_theClass, __FILE__, __LINE__, \
|
|
fmt, ## __VA_ARGS__)
|
|
|
|
/**
|
|
* \brief Optimized KD-tree acceleration data structure for
|
|
* n-dimensional (n<=4) shapes and various queries on them.
|
|
*
|
|
* Note that this class mainly concerns itself with data that cover a
|
|
* region of space. For point data, other implementations will be more
|
|
* suitable. The most important application in Mitsuba is the fast
|
|
* construction of high-quality trees for ray tracing. See the class
|
|
* \ref SAHKDTree for this specialization.
|
|
*
|
|
* The code in this class is a fully generic kd-tree implementation, which
|
|
* can theoretically support any kind of shape. However, subclasses still
|
|
* need to provide the following signatures for a functional implementation:
|
|
*
|
|
* /// Return the total number of primitives
|
|
* inline size_type getPrimitiveCount() const;
|
|
*
|
|
* /// Return the axis-aligned bounding box of a certain primitive
|
|
* inline AABB getAABB(index_type primIdx) const;
|
|
*
|
|
* /// Return the AABB of a primitive when clipped to another AABB
|
|
* inline AABB getClippedAABB(index_type primIdx, const AABBType &aabb) const;
|
|
*
|
|
* This class follows the "Curiously recurring template" design pattern
|
|
* so that the above functions can be inlined (in particular, no virtual
|
|
* calls will be necessary!).
|
|
*
|
|
* When the kd-tree is initially built, this class optimizes a cost
|
|
* heuristic every time a split plane has to be chosen. For ray tracing,
|
|
* the heuristic is usually the surface area heuristic (SAH), but other
|
|
* choices are possible as well. The tree construction heuristic must be
|
|
* passed as a template argument, which can use a supplied AABB and
|
|
* split candidate to compute approximate probabilities of recursing into
|
|
* the left and right subrees during a typical kd-tree query operation.
|
|
* See \ref SurfaceAreaHeuristic for an example of the interface that
|
|
* must be implemented.
|
|
*
|
|
* The kd-tree construction algorithm creates 'perfect split' trees as
|
|
* outlined in the paper "On Building fast kd-Trees for Ray Tracing, and on
|
|
* doing that in O(N log N)" by Ingo Wald and Vlastimil Havran. This works
|
|
* even when the tree is not meant to be used for ray tracing.
|
|
* For polygonal meshes, the involved Sutherland-Hodgman iterations can be
|
|
* quite expensive in terms of the overall construction time. The
|
|
* \ref setClip method can be used to deactivate perfect splits at the
|
|
* cost of a lower-quality tree.
|
|
*
|
|
* Because the O(N log N) construction algorithm tends to cause many
|
|
* incoherent memory accesses, a fast approximate technique (Min-max
|
|
* binning) is used near the top of the tree, which significantly reduces
|
|
* cache misses. Once the input data has been narrowed down to a
|
|
* reasonable amount, the implementation switches over to the O(N log N)
|
|
* builder. When multiple processors are available, the build process runs
|
|
* in parallel.
|
|
*
|
|
* \author Wenzel Jakob
|
|
*/
|
|
template <typename AABBType, typename TreeConstructionHeuristic, typename Derived>
|
|
class GenericKDTree : public KDTreeBase<AABBType> {
|
|
protected:
|
|
// Some forward declarations
|
|
struct MinMaxBins;
|
|
struct EdgeEvent;
|
|
struct EdgeEventOrdering;
|
|
|
|
public:
|
|
typedef KDTreeBase<AABBType> Parent;
|
|
typedef typename Parent::size_type size_type;
|
|
typedef typename Parent::index_type index_type;
|
|
typedef typename Parent::KDNode KDNode;
|
|
typedef typename AABBType::value_type value_type;
|
|
typedef typename AABBType::point_type point_type;
|
|
typedef typename AABBType::vector_type vector_type;
|
|
|
|
using Parent::m_nodes;
|
|
using Parent::m_aabb;
|
|
using Parent::m_tightAABB;
|
|
using Parent::isBuilt;
|
|
|
|
/**
|
|
* \brief Create a new kd-tree instance initialized with
|
|
* the default parameters.
|
|
*/
|
|
GenericKDTree() : m_indices(NULL) {
|
|
m_nodes = NULL;
|
|
m_traversalCost = 15;
|
|
m_queryCost = 20;
|
|
m_emptySpaceBonus = 0.9f;
|
|
m_clip = true;
|
|
m_stopPrims = 6;
|
|
m_maxBadRefines = 3;
|
|
m_exactPrimThreshold = 65536;
|
|
m_maxDepth = 0;
|
|
m_retract = true;
|
|
m_parallelBuild = true;
|
|
m_minMaxBins = 128;
|
|
}
|
|
|
|
/**
|
|
* \brief Release all memory
|
|
*/
|
|
virtual ~GenericKDTree() {
|
|
if (m_indices)
|
|
delete[] m_indices;
|
|
if (m_nodes)
|
|
freeAligned(m_nodes-1); // undo alignment shift
|
|
}
|
|
|
|
/**
|
|
* \brief Set the traversal cost used by the tree construction heuristic
|
|
*/
|
|
inline void setTraversalCost(Float traversalCost) {
|
|
m_traversalCost = traversalCost;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the traversal cost used by the tree construction heuristic
|
|
*/
|
|
inline Float getTraversalCost() const {
|
|
return m_traversalCost;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the query cost used by the tree construction heuristic
|
|
* (This is the average cost for testing a contained shape against
|
|
* a kd-tree search query)
|
|
*/
|
|
inline void setQueryCost(Float queryCost) {
|
|
m_queryCost = queryCost;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the query cost used by the tree construction heuristic
|
|
* (This is the average cost for testing a contained shape against
|
|
* a kd-tree search query)
|
|
*/
|
|
inline Float getQueryCost() const {
|
|
return m_queryCost;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the bonus factor for empty space used by the
|
|
* tree construction heuristic
|
|
*/
|
|
inline void setEmptySpaceBonus(Float emptySpaceBonus) {
|
|
m_emptySpaceBonus = emptySpaceBonus;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the bonus factor for empty space used by the
|
|
* tree construction heuristic
|
|
*/
|
|
inline Float getEmptySpaceBonus() const {
|
|
return m_emptySpaceBonus;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the maximum tree depth (0 = use heuristic)
|
|
*/
|
|
inline void setMaxDepth(size_type maxDepth) {
|
|
m_maxDepth = maxDepth;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the number of bins used for Min-Max binning
|
|
*/
|
|
inline void setMinMaxBins(size_type minMaxBins) {
|
|
m_minMaxBins = minMaxBins;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of bins used for Min-Max binning
|
|
*/
|
|
inline size_type getMinMaxBins() const {
|
|
return m_minMaxBins;
|
|
}
|
|
|
|
/**
|
|
* \brief Return maximum tree depth (0 = use heuristic)
|
|
*/
|
|
inline size_type getMaxDepth() const {
|
|
return m_maxDepth;
|
|
}
|
|
|
|
/**
|
|
* \brief Specify whether or not to use primitive clipping will
|
|
* be used in the tree construction.
|
|
*/
|
|
inline void setClip(bool clip) {
|
|
m_clip = clip;
|
|
}
|
|
|
|
/**
|
|
* \brief Return whether or not to use primitive clipping will
|
|
* be used in the tree construction.
|
|
*/
|
|
inline bool getClip() const {
|
|
return m_clip;
|
|
}
|
|
|
|
/**
|
|
* \brief Specify whether or not bad splits can be "retracted".
|
|
*/
|
|
inline void setRetract(bool retract) {
|
|
m_retract = retract;
|
|
}
|
|
|
|
/**
|
|
* \brief Return whether or not bad splits can be "retracted".
|
|
*/
|
|
inline bool getRetract() const {
|
|
return m_retract;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the number of bad refines allowed to happen
|
|
* in succession before a leaf node will be created.
|
|
*/
|
|
inline void setMaxBadRefines(size_type maxBadRefines) {
|
|
m_maxBadRefines = maxBadRefines;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of bad refines allowed to happen
|
|
* in succession before a leaf node will be created.
|
|
*/
|
|
inline size_type getMaxBadRefines() const {
|
|
return m_maxBadRefines;
|
|
}
|
|
|
|
/**
|
|
* \brief Set the number of primitives, at which recursion will
|
|
* stop when building the tree.
|
|
*/
|
|
inline void setStopPrims(size_type stopPrims) {
|
|
m_stopPrims = stopPrims;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of primitives, at which recursion will
|
|
* stop when building the tree.
|
|
*/
|
|
inline size_type getStopPrims() const {
|
|
return m_stopPrims;
|
|
}
|
|
|
|
/**
|
|
* \brief Specify whether or not tree construction
|
|
* should run in parallel.
|
|
*/
|
|
inline void setParallelBuild(bool parallel) {
|
|
m_parallelBuild = parallel;
|
|
}
|
|
|
|
/**
|
|
* \brief Return whether or not tree construction
|
|
* will run in parallel.
|
|
*/
|
|
inline bool getParallelBuild() const {
|
|
return m_parallelBuild;
|
|
}
|
|
|
|
/**
|
|
* \brief Specify the number of primitives, at which the builder will
|
|
* switch from (approximate) Min-Max binning to the accurate
|
|
* O(n log n) optimization method.
|
|
*/
|
|
inline void setExactPrimitiveThreshold(size_type exactPrimThreshold) {
|
|
m_exactPrimThreshold = exactPrimThreshold;
|
|
}
|
|
|
|
/**
|
|
* \brief Return the number of primitives, at which the builder will
|
|
* switch from (approximate) Min-Max binning to the accurate
|
|
* O(n log n) optimization method.
|
|
*/
|
|
inline size_type getExactPrimitiveThreshold() const {
|
|
return m_exactPrimThreshold;
|
|
}
|
|
protected:
|
|
/**
|
|
* \brief Build a KD-tree over the supplied geometry
|
|
*
|
|
* To be called by the subclass.
|
|
*/
|
|
void buildInternal() {
|
|
/* Some samity checks */
|
|
if (isBuilt())
|
|
KDLog(EError, "The kd-tree has already been built!");
|
|
if (m_traversalCost <= 0)
|
|
KDLog(EError, "The traveral cost must be > 0");
|
|
if (m_queryCost <= 0)
|
|
KDLog(EError, "The query cost must be > 0");
|
|
if (m_emptySpaceBonus <= 0 || m_emptySpaceBonus > 1)
|
|
KDLog(EError, "The empty space bonus must be in [0, 1]");
|
|
if (m_stopPrims < 0)
|
|
KDLog(EError, "The stopping primitive count must be >= 0");
|
|
if (m_exactPrimThreshold < 0)
|
|
KDLog(EError, "The exact primitive threshold must be >= 0");
|
|
if (m_minMaxBins <= 1)
|
|
KDLog(EError, "The number of min-max bins must be > 2");
|
|
|
|
size_type primCount = cast()->getPrimitiveCount();
|
|
if (primCount == 0) {
|
|
KDLog(EWarn, "kd-tree contains no geometry!");
|
|
// +1 shift is for alignment purposes (see KDNode::getSibling)
|
|
m_nodes = static_cast<KDNode *>(allocAligned(sizeof(KDNode) * 2))+1;
|
|
m_nodes[0].initLeafNode(0, 0);
|
|
return;
|
|
}
|
|
|
|
if (primCount <= m_exactPrimThreshold)
|
|
m_parallelBuild = false;
|
|
|
|
BuildContext ctx(primCount, m_minMaxBins);
|
|
|
|
/* Establish an ad-hoc depth cutoff value (Formula from PBRT) */
|
|
if (m_maxDepth == 0)
|
|
m_maxDepth = (int) (8 + 1.3f * log2i(primCount));
|
|
m_maxDepth = std::min(m_maxDepth, (size_type) MTS_KD_MAXDEPTH);
|
|
|
|
KDLog(EDebug, "Creating a preliminary index list (%s)",
|
|
memString(primCount * sizeof(index_type)).c_str());
|
|
|
|
OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
|
|
index_type *indices = leftAlloc.allocate<index_type>(primCount);
|
|
|
|
ref<Timer> timer = new Timer();
|
|
AABBType &aabb = m_aabb;
|
|
aabb.reset();
|
|
for (index_type i=0; i<primCount; ++i) {
|
|
aabb.expandBy(cast()->getAABB(i));
|
|
indices[i] = i;
|
|
}
|
|
|
|
KDLog(EDebug, "Computed scene bounds in %i ms",
|
|
timer->getMilliseconds());
|
|
KDLog(EDebug, "");
|
|
|
|
KDLog(EDebug, "kd-tree configuration:");
|
|
KDLog(EDebug, " Traversal cost : %.2f", m_traversalCost);
|
|
KDLog(EDebug, " Query cost : %.2f", m_queryCost);
|
|
KDLog(EDebug, " Empty space bonus : %.2f", m_emptySpaceBonus);
|
|
KDLog(EDebug, " Max. tree depth : %i", m_maxDepth);
|
|
KDLog(EDebug, " Scene bounding box (min) : %s",
|
|
aabb.min.toString().c_str());
|
|
KDLog(EDebug, " Scene bounding box (max) : %s",
|
|
aabb.max.toString().c_str());
|
|
KDLog(EDebug, " Min-max bins : %i", m_minMaxBins);
|
|
KDLog(EDebug, " O(n log n) method : use for <= %i primitives",
|
|
m_exactPrimThreshold);
|
|
KDLog(EDebug, " Perfect splits : %s", m_clip ? "yes" : "no");
|
|
KDLog(EDebug, " Retract bad splits : %s",
|
|
m_retract ? "yes" : "no");
|
|
KDLog(EDebug, " Stopping primitive count : %i", m_stopPrims);
|
|
KDLog(EDebug, " Build tree in parallel : %s",
|
|
m_parallelBuild ? "yes" : "no");
|
|
KDLog(EDebug, "");
|
|
|
|
size_type procCount = getProcessorCount();
|
|
if (procCount == 1)
|
|
m_parallelBuild = false;
|
|
|
|
if (m_parallelBuild) {
|
|
m_builders.resize(procCount);
|
|
for (size_type i=0; i<procCount; ++i) {
|
|
m_builders[i] = new TreeBuilder(i, this);
|
|
m_builders[i]->incRef();
|
|
m_builders[i]->start();
|
|
}
|
|
}
|
|
|
|
m_indirectionLock = new Mutex();
|
|
KDNode *prelimRoot = ctx.nodes.allocate(1);
|
|
buildTreeMinMax(ctx, 1, prelimRoot, aabb, aabb,
|
|
indices, primCount, true, 0);
|
|
ctx.leftAlloc.release(indices);
|
|
|
|
KDAssert(ctx.leftAlloc.used() == 0);
|
|
KDAssert(ctx.rightAlloc.used() == 0);
|
|
|
|
if (m_parallelBuild) {
|
|
m_interface.mutex->lock();
|
|
m_interface.done = true;
|
|
m_interface.cond->broadcast();
|
|
m_interface.mutex->unlock();
|
|
for (size_type i=0; i<m_builders.size(); ++i)
|
|
m_builders[i]->join();
|
|
}
|
|
|
|
KDLog(EInfo, "Finished -- took %i ms.", timer->getMilliseconds());
|
|
KDLog(EDebug, "");
|
|
|
|
KDLog(EDebug, "Temporary memory statistics:");
|
|
KDLog(EDebug, " Classification storage : %s",
|
|
memString((ctx.classStorage.size() * (1+procCount))).c_str());
|
|
KDLog(EDebug, " Indirection entries : " SIZE_T_FMT " (%s)",
|
|
m_indirections.size(), memString(m_indirections.capacity()
|
|
* sizeof(KDNode *)).c_str());
|
|
|
|
KDLog(EDebug, " Main thread:");
|
|
ctx.printStats();
|
|
size_t totalUsage = m_indirections.capacity()
|
|
* sizeof(KDNode *) + ctx.size();
|
|
|
|
/// Clean up event lists and print statistics
|
|
ctx.leftAlloc.cleanup();
|
|
ctx.rightAlloc.cleanup();
|
|
for (size_type i=0; i<m_builders.size(); ++i) {
|
|
KDLog(EDebug, " Worker thread %i:", i+1);
|
|
BuildContext &subCtx = m_builders[i]->getContext();
|
|
subCtx.printStats();
|
|
totalUsage += subCtx.size();
|
|
subCtx.leftAlloc.cleanup();
|
|
subCtx.rightAlloc.cleanup();
|
|
ctx.accumulateStatisticsFrom(subCtx);
|
|
}
|
|
KDLog(EDebug, " Total: %s", memString(totalUsage).c_str());
|
|
|
|
KDLog(EDebug, "");
|
|
timer->reset();
|
|
KDLog(EDebug, "Optimizing memory layout ..");
|
|
|
|
std::stack<boost::tuple<const KDNode *, KDNode *,
|
|
const BuildContext *, AABBType> > stack;
|
|
|
|
Float expTraversalSteps = 0;
|
|
Float expLeavesVisited = 0;
|
|
Float expPrimitivesIntersected = 0;
|
|
Float heuristicCost = 0;
|
|
|
|
size_type nodePtr = 0, indexPtr = 0;
|
|
size_type maxPrimsInLeaf = 0;
|
|
const size_type primBucketCount = 16;
|
|
size_type primBuckets[primBucketCount];
|
|
memset(primBuckets, 0, sizeof(size_type)*primBucketCount);
|
|
m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
|
|
m_indexCount = ctx.primIndexCount;
|
|
|
|
// +1 shift is for alignment purposes (see KDNode::getSibling)
|
|
m_nodes = static_cast<KDNode *> (allocAligned(
|
|
sizeof(KDNode) * (m_nodeCount+1)))+1;
|
|
m_indices = new index_type[m_indexCount];
|
|
|
|
/* The following code rewrites all tree nodes with proper relative
|
|
* indices. It also computes the final tree cost and some other
|
|
* useful heuristics */
|
|
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++],
|
|
&ctx, aabb));
|
|
while (!stack.empty()) {
|
|
const KDNode *node = boost::get<0>(stack.top());
|
|
KDNode *target = boost::get<1>(stack.top());
|
|
const BuildContext *context = boost::get<2>(stack.top());
|
|
AABBType aabb = boost::get<3>(stack.top());
|
|
stack.pop();
|
|
typename std::map<const KDNode *, index_type>::const_iterator it
|
|
= m_interface.threadMap.find(node);
|
|
// Check if we're switching to a subtree built by a worker thread
|
|
if (it != m_interface.threadMap.end())
|
|
context = &m_builders[(*it).second]->getContext();
|
|
|
|
if (node->isLeaf()) {
|
|
size_type primStart = node->getPrimStart(),
|
|
primEnd = node->getPrimEnd(),
|
|
primsInLeaf = primEnd-primStart;
|
|
target->initLeafNode(indexPtr, primsInLeaf);
|
|
|
|
Float quantity = TreeConstructionHeuristic::getQuantity(aabb),
|
|
weightedQuantity = quantity * primsInLeaf;
|
|
expLeavesVisited += quantity;
|
|
expPrimitivesIntersected += weightedQuantity;
|
|
heuristicCost += weightedQuantity * m_queryCost;
|
|
if (primsInLeaf < primBucketCount)
|
|
primBuckets[primsInLeaf]++;
|
|
if (primsInLeaf > maxPrimsInLeaf)
|
|
maxPrimsInLeaf = primsInLeaf;
|
|
|
|
const BlockedVector<index_type, MTS_KD_BLOCKSIZE_IDX> &indices
|
|
= context->indices;
|
|
for (size_type idx = primStart; idx<primEnd; ++idx) {
|
|
KDAssert(indices[idx] >= 0 && indices[idx] < primCount);
|
|
m_indices[indexPtr++] = indices[idx];
|
|
}
|
|
} else {
|
|
Float quantity = TreeConstructionHeuristic::getQuantity(aabb);
|
|
expTraversalSteps += quantity;
|
|
heuristicCost += quantity * m_traversalCost;
|
|
|
|
const KDNode *left;
|
|
if (EXPECT_TAKEN(!node->isIndirection()))
|
|
left = node->getLeft();
|
|
else
|
|
left = m_indirections[node->getIndirectionIndex()];
|
|
|
|
KDNode *children = &m_nodes[nodePtr];
|
|
nodePtr += 2;
|
|
int axis = node->getAxis();
|
|
float split = node->getSplit();
|
|
bool result = target->initInnerNode(axis, split, children - target);
|
|
if (!result)
|
|
KDLog(EError, "Cannot represent relative pointer -- "
|
|
"too many primitives?");
|
|
|
|
Float tmp = aabb.min[axis];
|
|
aabb.min[axis] = split;
|
|
stack.push(boost::make_tuple(left+1, children+1, context, aabb));
|
|
aabb.min[axis] = tmp;
|
|
aabb.max[axis] = split;
|
|
stack.push(boost::make_tuple(left, children, context, aabb));
|
|
}
|
|
}
|
|
|
|
KDAssert(nodePtr == ctx.innerNodeCount + ctx.leafNodeCount);
|
|
KDAssert(indexPtr == m_indexCount);
|
|
|
|
KDLog(EDebug, "Finished -- took %i ms.", timer->getMilliseconds());
|
|
|
|
/* Free some more memory */
|
|
ctx.nodes.clear();
|
|
ctx.indices.clear();
|
|
for (size_type i=0; i<m_builders.size(); ++i) {
|
|
BuildContext &subCtx = m_builders[i]->getContext();
|
|
subCtx.nodes.clear();
|
|
subCtx.indices.clear();
|
|
}
|
|
m_indirectionLock = NULL;
|
|
std::vector<KDNode *>().swap(m_indirections);
|
|
|
|
if (m_builders.size() > 0) {
|
|
for (size_type i=0; i<m_builders.size(); ++i)
|
|
m_builders[i]->decRef();
|
|
m_builders.clear();
|
|
}
|
|
|
|
KDLog(EDebug, "");
|
|
|
|
Float rootQuantity = TreeConstructionHeuristic::getQuantity(aabb);
|
|
expTraversalSteps /= rootQuantity;
|
|
expLeavesVisited /= rootQuantity;
|
|
expPrimitivesIntersected /= rootQuantity;
|
|
heuristicCost /= rootQuantity;
|
|
|
|
/* Slightly enlarge the bounding box
|
|
(necessary e.g. when the scene is planar) */
|
|
m_tightAABB = aabb;
|
|
aabb.min -= (aabb.max-aabb.min) * Epsilon
|
|
+ vector_type(Epsilon);
|
|
aabb.max += (aabb.max-aabb.min) * Epsilon
|
|
+ vector_type(Epsilon);
|
|
|
|
KDLog(EDebug, "Structural kd-tree statistics:");
|
|
KDLog(EDebug, " Parallel work units : " SIZE_T_FMT,
|
|
m_interface.threadMap.size());
|
|
KDLog(EDebug, " Node storage cost : %s",
|
|
memString(nodePtr * sizeof(KDNode)).c_str());
|
|
KDLog(EDebug, " Index storage cost : %s",
|
|
memString(indexPtr * sizeof(index_type)).c_str());
|
|
KDLog(EDebug, " Inner nodes : %i", ctx.innerNodeCount);
|
|
KDLog(EDebug, " Leaf nodes : %i", ctx.leafNodeCount);
|
|
KDLog(EDebug, " Nonempty leaf nodes : %i",
|
|
ctx.nonemptyLeafNodeCount);
|
|
std::ostringstream oss;
|
|
oss << " Leaf node histogram : ";
|
|
for (size_type i=0; i<primBucketCount; i++) {
|
|
oss << i << "(" << primBuckets[i] << ") ";
|
|
if ((i+1)%4==0 && i+1<primBucketCount) {
|
|
KDLog(EDebug, "%s", oss.str().c_str());
|
|
oss.str("");
|
|
oss << " ";
|
|
}
|
|
}
|
|
KDLog(EDebug, "%s", oss.str().c_str());
|
|
KDLog(EDebug, "");
|
|
KDLog(EDebug, "Qualitative kd-tree statistics:");
|
|
KDLog(EDebug, " Retracted splits : %i", ctx.retractedSplits);
|
|
KDLog(EDebug, " Pruned primitives : %i", ctx.pruned);
|
|
KDLog(EDebug, " Largest leaf node : %i primitives",
|
|
maxPrimsInLeaf);
|
|
KDLog(EDebug, " Avg. prims/nonempty leaf : %.2f",
|
|
ctx.primIndexCount / (Float) ctx.nonemptyLeafNodeCount);
|
|
KDLog(EDebug, " Expected traversals/query : %.2f", expTraversalSteps);
|
|
KDLog(EDebug, " Expected leaf visits/query : %.2f", expLeavesVisited);
|
|
KDLog(EDebug, " Expected prim. visits/query : %.2f",
|
|
expPrimitivesIntersected);
|
|
KDLog(EDebug, " Final cost : %.2f", heuristicCost);
|
|
KDLog(EDebug, "");
|
|
}
|
|
|
|
protected:
|
|
/// Primitive classification during tree-construction
|
|
enum EClassificationResult {
|
|
/// Straddling primitive
|
|
EBothSides = 0,
|
|
/// Primitive is entirely on the left side of the split
|
|
ELeftSide = 1,
|
|
/// Primitive is entirely on the right side of the split
|
|
ERightSide = 2,
|
|
/// Edge events have been generated for the straddling primitive
|
|
EBothSidesProcessed = 3
|
|
};
|
|
|
|
/**
|
|
* \brief Describes the beginning or end of a primitive
|
|
* when projected onto a certain dimension.
|
|
*/
|
|
struct EdgeEvent {
|
|
/// Possible event types
|
|
enum EEventType {
|
|
EEdgeEnd = 0,
|
|
EEdgePlanar = 1,
|
|
EEdgeStart = 2
|
|
};
|
|
|
|
/// Dummy constructor
|
|
inline EdgeEvent() { }
|
|
|
|
/// Create a new edge event
|
|
inline EdgeEvent(uint16_t type, int axis, float pos, index_type index)
|
|
: pos(pos), index(index), type(type), axis(axis) { }
|
|
|
|
/// Return a string representation
|
|
std::string toString() const {
|
|
std::ostringstream oss;
|
|
oss << "EdgeEvent[" << endl
|
|
<< " pos = " << pos << "," << endl
|
|
<< " index = " << index << "," << endl
|
|
<< " type = ";
|
|
if (type == EEdgeEnd)
|
|
oss << "end";
|
|
else if (type == EEdgePlanar)
|
|
oss << "planar";
|
|
else if (type == EEdgeStart)
|
|
oss << "start";
|
|
else
|
|
oss << "unknown!";
|
|
oss << "," << endl
|
|
<< " axis = " << axis << endl
|
|
<<"]";
|
|
return oss.str();
|
|
}
|
|
|
|
/// Plane position
|
|
float pos;
|
|
/// Primitive index
|
|
index_type index;
|
|
/// Event type: end/planar/start
|
|
unsigned int type:2;
|
|
/// Event axis
|
|
unsigned int axis:2;
|
|
};
|
|
|
|
BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
|
|
|
|
/// Edge event comparison functor
|
|
struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
|
|
inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
|
|
if (a.axis != b.axis)
|
|
return a.axis < b.axis;
|
|
if (a.pos != b.pos)
|
|
return a.pos < b.pos;
|
|
return a.type < b.type;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* \brief Data type for split candidates computed by
|
|
* the O(n log n) greedy optimization method.
|
|
* */
|
|
struct SplitCandidate {
|
|
Float cost;
|
|
float pos;
|
|
int axis;
|
|
size_type numLeft, numRight;
|
|
bool planarLeft;
|
|
|
|
inline SplitCandidate() :
|
|
cost(std::numeric_limits<Float>::infinity()),
|
|
pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false) {
|
|
}
|
|
|
|
std::string toString() const {
|
|
std::ostringstream oss;
|
|
oss << "SplitCandidate[" << endl
|
|
<< " cost=" << cost << "," << endl
|
|
<< " pos=" << pos << "," << endl
|
|
<< " axis=" << axis << "," << endl
|
|
<< " numLeft=" << numLeft << "," << endl
|
|
<< " numRight=" << numRight << "," << endl
|
|
<< " planarLeft=" << (planarLeft ? "yes" : "no") << endl
|
|
<< "]";
|
|
return oss.str();
|
|
}
|
|
};
|
|
|
|
/**
|
|
* \brief Per-thread context used to manage memory allocations,
|
|
* also records some useful statistics.
|
|
*/
|
|
struct BuildContext {
|
|
OrderedChunkAllocator leftAlloc, rightAlloc;
|
|
BlockedVector<KDNode, MTS_KD_BLOCKSIZE_KD> nodes;
|
|
BlockedVector<index_type, MTS_KD_BLOCKSIZE_IDX> indices;
|
|
ClassificationStorage classStorage;
|
|
MinMaxBins minMaxBins;
|
|
|
|
size_type leafNodeCount;
|
|
size_type nonemptyLeafNodeCount;
|
|
size_type innerNodeCount;
|
|
size_type primIndexCount;
|
|
size_type retractedSplits;
|
|
size_type pruned;
|
|
|
|
BuildContext(size_type primCount, size_type binCount)
|
|
: classStorage(primCount), minMaxBins(binCount) {
|
|
classStorage.setPrimitiveCount(primCount);
|
|
leafNodeCount = 0;
|
|
nonemptyLeafNodeCount = 0;
|
|
innerNodeCount = 0;
|
|
primIndexCount = 0;
|
|
retractedSplits = 0;
|
|
pruned = 0;
|
|
}
|
|
|
|
size_t size() {
|
|
return leftAlloc.size() + rightAlloc.size()
|
|
+ nodes.capacity() * sizeof(KDNode)
|
|
+ indices.capacity() * sizeof(index_type)
|
|
+ classStorage.size();
|
|
}
|
|
|
|
void printStats() {
|
|
KDLog(EDebug, " Left events : " SIZE_T_FMT " chunks (%s)",
|
|
leftAlloc.getChunkCount(),
|
|
memString(leftAlloc.size()).c_str());
|
|
KDLog(EDebug, " Right events : " SIZE_T_FMT " chunks (%s)",
|
|
rightAlloc.getChunkCount(),
|
|
memString(rightAlloc.size()).c_str());
|
|
KDLog(EDebug, " kd-tree nodes : " SIZE_T_FMT " entries, "
|
|
SIZE_T_FMT " blocks (%s)", nodes.size(), nodes.blockCount(),
|
|
memString(nodes.capacity() * sizeof(KDNode)).c_str());
|
|
KDLog(EDebug, " Indices : " SIZE_T_FMT " entries, "
|
|
SIZE_T_FMT " blocks (%s)", indices.size(),
|
|
indices.blockCount(), memString(indices.capacity()
|
|
* sizeof(index_type)).c_str());
|
|
}
|
|
|
|
void accumulateStatisticsFrom(const BuildContext &ctx) {
|
|
leafNodeCount += ctx.leafNodeCount;
|
|
nonemptyLeafNodeCount += ctx.nonemptyLeafNodeCount;
|
|
innerNodeCount += ctx.innerNodeCount;
|
|
primIndexCount += ctx.primIndexCount;
|
|
retractedSplits += ctx.retractedSplits;
|
|
pruned += ctx.pruned;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* \brief Communication data structure used to pass jobs to
|
|
* kd-tree builder threads
|
|
*/
|
|
struct BuildInterface {
|
|
/* Communcation */
|
|
ref<Mutex> mutex;
|
|
ref<ConditionVariable> cond, condJobTaken;
|
|
std::map<const KDNode *, index_type> threadMap;
|
|
bool done;
|
|
|
|
/* Job description for building a subtree */
|
|
int depth;
|
|
KDNode *node;
|
|
AABBType nodeAABB;
|
|
EdgeEvent *eventStart, *eventEnd;
|
|
size_type primCount;
|
|
int badRefines;
|
|
|
|
inline BuildInterface() {
|
|
mutex = new Mutex();
|
|
cond = new ConditionVariable(mutex);
|
|
condJobTaken = new ConditionVariable(mutex);
|
|
node = NULL;
|
|
done = false;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* \brief kd-tree builder thread
|
|
*/
|
|
class TreeBuilder : public Thread {
|
|
public:
|
|
TreeBuilder(index_type id, GenericKDTree *parent)
|
|
: Thread(formatString("bld%i", id)),
|
|
m_id(id),
|
|
m_parent(parent),
|
|
m_context(parent->cast()->getPrimitiveCount(),
|
|
parent->getMinMaxBins()),
|
|
m_interface(parent->m_interface) {
|
|
setCritical(true);
|
|
}
|
|
|
|
~TreeBuilder() {
|
|
KDAssert(m_context.leftAlloc.used() == 0);
|
|
KDAssert(m_context.rightAlloc.used() == 0);
|
|
}
|
|
|
|
void run() {
|
|
OrderedChunkAllocator &leftAlloc = m_context.leftAlloc;
|
|
while (true) {
|
|
m_interface.mutex->lock();
|
|
while (!m_interface.done && !m_interface.node)
|
|
m_interface.cond->wait();
|
|
if (m_interface.done) {
|
|
m_interface.mutex->unlock();
|
|
break;
|
|
}
|
|
int depth = m_interface.depth;
|
|
KDNode *node = m_interface.node;
|
|
AABBType nodeAABB = m_interface.nodeAABB;
|
|
size_t eventCount = m_interface.eventEnd - m_interface.eventStart;
|
|
size_type primCount = m_interface.primCount;
|
|
int badRefines = m_interface.badRefines;
|
|
EdgeEvent *eventStart = leftAlloc.allocate<EdgeEvent>(eventCount),
|
|
*eventEnd = eventStart + eventCount;
|
|
memcpy(eventStart, m_interface.eventStart,
|
|
eventCount * sizeof(EdgeEvent));
|
|
m_interface.threadMap[node] = m_id;
|
|
m_interface.node = NULL;
|
|
m_interface.condJobTaken->signal();
|
|
m_interface.mutex->unlock();
|
|
|
|
std::sort(eventStart, eventEnd, EdgeEventOrdering());
|
|
m_parent->buildTree(m_context, depth, node,
|
|
nodeAABB, eventStart, eventEnd, primCount, true, badRefines);
|
|
leftAlloc.release(eventStart);
|
|
}
|
|
}
|
|
|
|
inline BuildContext &getContext() {
|
|
return m_context;
|
|
}
|
|
|
|
private:
|
|
index_type m_id;
|
|
GenericKDTree *m_parent;
|
|
BuildContext m_context;
|
|
BuildInterface &m_interface;
|
|
};
|
|
|
|
/// Cast to the derived class
|
|
inline Derived *cast() {
|
|
return static_cast<Derived *>(this);
|
|
}
|
|
|
|
/// Cast to the derived class (const version)
|
|
inline const Derived *cast() const {
|
|
return static_cast<const Derived *>(this);
|
|
}
|
|
|
|
/**
|
|
* \brief Create an edge event list for a given list of primitives.
|
|
*
|
|
* This is necessary when passing from Min-Max binning to the more
|
|
* accurate O(n log n) optimizier.
|
|
*/
|
|
boost::tuple<EdgeEvent *, EdgeEvent *, size_type> createEventList(
|
|
OrderedChunkAllocator &alloc, const AABBType &nodeAABB,
|
|
index_type *prims, size_type primCount) {
|
|
size_type initialSize = primCount * 2 * point_type::dim, actualPrimCount = 0;
|
|
EdgeEvent *eventStart = alloc.allocate<EdgeEvent>(initialSize);
|
|
EdgeEvent *eventEnd = eventStart;
|
|
|
|
for (size_type i=0; i<primCount; ++i) {
|
|
index_type index = prims[i];
|
|
AABBType aabb;
|
|
if (m_clip) {
|
|
aabb = cast()->getClippedAABB(index, nodeAABB);
|
|
if (!aabb.isValid() || aabb.getSurfaceArea() == 0)
|
|
continue;
|
|
} else {
|
|
aabb = cast()->getAABB(index);
|
|
}
|
|
|
|
for (int axis=0; axis<point_type::dim; ++axis) {
|
|
float min = (float) aabb.min[axis], max = (float) aabb.max[axis];
|
|
|
|
if (min == max) {
|
|
*eventEnd++ = EdgeEvent(EdgeEvent::EEdgePlanar, axis,
|
|
min, index);
|
|
} else {
|
|
*eventEnd++ = EdgeEvent(EdgeEvent::EEdgeStart, axis,
|
|
min, index);
|
|
*eventEnd++ = EdgeEvent(EdgeEvent::EEdgeEnd, axis,
|
|
max, index);
|
|
}
|
|
}
|
|
++actualPrimCount;
|
|
}
|
|
|
|
size_type newSize = (size_type) (eventEnd - eventStart);
|
|
if (newSize != initialSize)
|
|
alloc.shrinkAllocation<EdgeEvent>(eventStart, newSize);
|
|
|
|
return boost::make_tuple(eventStart, eventEnd, actualPrimCount);
|
|
}
|
|
|
|
/**
|
|
* \brief Leaf node creation helper function
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param eventStart
|
|
* Start pointer of an edge event list
|
|
* \param eventEnd
|
|
* End pointer of an edge event list
|
|
* \param primCount
|
|
* Total primitive count for the current node
|
|
*/
|
|
void createLeaf(BuildContext &ctx, KDNode *node, EdgeEvent *eventStart,
|
|
EdgeEvent *eventEnd, size_type primCount) {
|
|
node->initLeafNode((size_type) ctx.indices.size(), primCount);
|
|
if (primCount > 0) {
|
|
size_type seenPrims = 0;
|
|
ctx.nonemptyLeafNodeCount++;
|
|
|
|
for (EdgeEvent *event = eventStart; event != eventEnd
|
|
&& event->axis == 0; ++event) {
|
|
if (event->type == EdgeEvent::EEdgeStart ||
|
|
event->type == EdgeEvent::EEdgePlanar) {
|
|
ctx.indices.push_back(event->index);
|
|
seenPrims++;
|
|
}
|
|
}
|
|
KDAssert(seenPrims == primCount);
|
|
ctx.primIndexCount += primCount;
|
|
}
|
|
ctx.leafNodeCount++;
|
|
}
|
|
|
|
/**
|
|
* \brief Leaf node creation helper function
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param indices
|
|
* Start pointer of an index list
|
|
* \param primCount
|
|
* Total primitive count for the current node
|
|
*/
|
|
void createLeaf(BuildContext &ctx, KDNode *node, size_type *indices,
|
|
size_type primCount) {
|
|
node->initLeafNode((size_type) ctx.indices.size(), primCount);
|
|
if (primCount > 0) {
|
|
ctx.nonemptyLeafNodeCount++;
|
|
for (size_type i=0; i<primCount; ++i)
|
|
ctx.indices.push_back(indices[i]);
|
|
ctx.primIndexCount += primCount;
|
|
}
|
|
ctx.leafNodeCount++;
|
|
}
|
|
|
|
/**
|
|
* \brief Leaf node creation helper function.
|
|
*
|
|
* Creates a unique index list by collapsing
|
|
* a subtree with a bad cost.
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param start
|
|
* Start pointer of the subtree indices
|
|
*/
|
|
void createLeafAfterRetraction(BuildContext &ctx, KDNode *node, size_type start) {
|
|
size_type indexCount = (size_type) (ctx.indices.size() - start);
|
|
SAssert(indexCount > 0);
|
|
|
|
OrderedChunkAllocator &alloc = ctx.leftAlloc;
|
|
|
|
/* A temporary list is allocated to do the sorting (the indices
|
|
are not guaranteed to be contiguous in memory) */
|
|
index_type *tempStart = alloc.allocate<index_type>(indexCount),
|
|
*tempEnd = tempStart + indexCount,
|
|
*ptr = tempStart;
|
|
|
|
for (size_type i=start, end = start + indexCount; i<end; ++i)
|
|
*ptr++ = ctx.indices[i];
|
|
|
|
/* Generate an index list without duplicate entries */
|
|
std::sort(tempStart, tempEnd, std::less<index_type>());
|
|
ptr = tempStart;
|
|
|
|
int idx = start;
|
|
while (ptr < tempEnd) {
|
|
ctx.indices[idx] = *ptr++;
|
|
while (ptr < tempEnd && *ptr == ctx.indices[idx])
|
|
++ptr;
|
|
idx++;
|
|
}
|
|
|
|
int nSeen = idx-start;
|
|
ctx.primIndexCount = ctx.primIndexCount - indexCount + nSeen;
|
|
ctx.indices.resize(idx);
|
|
alloc.release(tempStart);
|
|
node->initLeafNode(start, nSeen);
|
|
ctx.nonemptyLeafNodeCount++;
|
|
ctx.leafNodeCount++;
|
|
}
|
|
|
|
/**
|
|
* \brief Implements the transition from min-max-binning to the
|
|
* O(n log n) optimization.
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param depth
|
|
* Current tree depth (1 == root node)
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param nodeAABB
|
|
* Axis-aligned bounding box of the current node
|
|
* \param indices
|
|
* Index list of all triangles in the current node (for min-max binning)
|
|
* \param primCount
|
|
* Total primitive count for the current node
|
|
* \param isLeftChild
|
|
* Is this node the left child of its parent? This is important for
|
|
* memory management using the \ref OrderedChunkAllocator.
|
|
* \param badRefines
|
|
* Number of "probable bad refines" further up the tree. This makes
|
|
* it possible to split along an initially bad-looking candidate in
|
|
* the hope that the cost was significantly overestimated. The
|
|
* counter makes sure that only a limited number of such splits can
|
|
* happen in succession.
|
|
* \returns
|
|
* Final cost of the node
|
|
*/
|
|
inline Float transitionToNLogN(BuildContext &ctx, unsigned int depth, KDNode *node,
|
|
const AABBType &nodeAABB, index_type *indices,
|
|
size_type primCount, bool isLeftChild, size_type badRefines) {
|
|
OrderedChunkAllocator &alloc = isLeftChild
|
|
? ctx.leftAlloc : ctx.rightAlloc;
|
|
boost::tuple<EdgeEvent *, EdgeEvent *, size_type> events
|
|
= createEventList(alloc, nodeAABB, indices, primCount);
|
|
Float cost;
|
|
if (m_parallelBuild) {
|
|
m_interface.mutex->lock();
|
|
m_interface.depth = depth;
|
|
m_interface.node = node;
|
|
m_interface.nodeAABB = nodeAABB;
|
|
m_interface.eventStart = boost::get<0>(events);
|
|
m_interface.eventEnd = boost::get<1>(events);
|
|
m_interface.primCount = boost::get<2>(events);
|
|
m_interface.badRefines = badRefines;
|
|
m_interface.cond->signal();
|
|
|
|
/* Wait for a worker thread to take this job */
|
|
while (m_interface.node)
|
|
m_interface.condJobTaken->wait();
|
|
m_interface.mutex->unlock();
|
|
|
|
// Never tear down this subtree (return a cost of -infinity)
|
|
cost = -std::numeric_limits<Float>::infinity();
|
|
} else {
|
|
std::sort(boost::get<0>(events), boost::get<1>(events),
|
|
EdgeEventOrdering());
|
|
|
|
cost = buildTree(ctx, depth, node, nodeAABB,
|
|
boost::get<0>(events), boost::get<1>(events),
|
|
boost::get<2>(events), isLeftChild, badRefines);
|
|
}
|
|
alloc.release(boost::get<0>(events));
|
|
return cost;
|
|
}
|
|
|
|
/**
|
|
* \brief Build helper function (min-max binning)
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param depth
|
|
* Current tree depth (1 == root node)
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param nodeAABB
|
|
* Axis-aligned bounding box of the current node
|
|
* \param tightAABB
|
|
* Tight bounding box of the contained geometry (for min-max binning)
|
|
* \param indices
|
|
* Index list of all triangles in the current node (for min-max binning)
|
|
* \param primCount
|
|
* Total primitive count for the current node
|
|
* \param isLeftChild
|
|
* Is this node the left child of its parent? This is important for
|
|
* memory management using the \ref OrderedChunkAllocator.
|
|
* \param badRefines
|
|
* Number of "probable bad refines" further up the tree. This makes
|
|
* it possible to split along an initially bad-looking candidate in
|
|
* the hope that the cost was significantly overestimated. The
|
|
* counter makes sure that only a limited number of such splits can
|
|
* happen in succession.
|
|
* \returns
|
|
* Final cost of the node
|
|
*/
|
|
Float buildTreeMinMax(BuildContext &ctx, unsigned int depth, KDNode *node,
|
|
const AABBType &nodeAABB, const AABBType &tightAABB, index_type *indices,
|
|
size_type primCount, bool isLeftChild, size_type badRefines) {
|
|
KDAssert(nodeAABB.contains(tightAABB));
|
|
|
|
Float leafCost = primCount * m_queryCost;
|
|
if (primCount <= m_stopPrims || depth >= m_maxDepth) {
|
|
createLeaf(ctx, node, indices, primCount);
|
|
return leafCost;
|
|
}
|
|
|
|
if (primCount <= m_exactPrimThreshold)
|
|
return transitionToNLogN(ctx, depth, node, nodeAABB, indices,
|
|
primCount, isLeftChild, badRefines);
|
|
|
|
/* ==================================================================== */
|
|
/* Binning */
|
|
/* ==================================================================== */
|
|
|
|
ctx.minMaxBins.setAABB(tightAABB);
|
|
ctx.minMaxBins.bin(cast(), indices, primCount);
|
|
|
|
/* ==================================================================== */
|
|
/* Split candidate search */
|
|
/* ==================================================================== */
|
|
SplitCandidate bestSplit = ctx.minMaxBins.minimizeCost(m_traversalCost,
|
|
m_queryCost);
|
|
|
|
if (bestSplit.cost == std::numeric_limits<Float>::infinity()) {
|
|
/* This is bad: we have either run out of floating point precision to
|
|
accurately represent split planes (e.g. 'tightAABB' is almost collapsed
|
|
along an axis), or the compiler made overly liberal use of floating point
|
|
optimizations, causing the two stages of the min-max binning code to
|
|
become inconsistent. The two ways to proceed at this point are to
|
|
either create a leaf (bad) or switch over to the O(n log n) greedy
|
|
optimization, which is done below */
|
|
KDLog(EWarn, "Min-max binning was unable to split %i primitives with %s "
|
|
"-- retrying with the O(n log n) greedy optimization",
|
|
primCount, tightAABB.toString().c_str());
|
|
return transitionToNLogN(ctx, depth, node, nodeAABB, indices,
|
|
primCount, isLeftChild, badRefines);
|
|
}
|
|
|
|
/* "Bad refines" heuristic from PBRT */
|
|
if (bestSplit.cost >= leafCost) {
|
|
if ((bestSplit.cost > 4 * leafCost && primCount < 16)
|
|
|| badRefines >= m_maxBadRefines) {
|
|
createLeaf(ctx, node, indices, primCount);
|
|
return leafCost;
|
|
}
|
|
++badRefines;
|
|
}
|
|
|
|
/* ==================================================================== */
|
|
/* Partitioning */
|
|
/* ==================================================================== */
|
|
|
|
boost::tuple<AABBType, index_type *, AABBType, index_type *> partition =
|
|
ctx.minMaxBins.partition(ctx, cast(), indices, bestSplit,
|
|
isLeftChild, m_traversalCost, m_queryCost);
|
|
|
|
/* ==================================================================== */
|
|
/* Recursion */
|
|
/* ==================================================================== */
|
|
|
|
KDNode *children = ctx.nodes.allocate(2);
|
|
|
|
size_type nodePosBeforeSplit = (size_type) ctx.nodes.size();
|
|
size_type indexPosBeforeSplit = (size_type) ctx.indices.size();
|
|
size_type leafNodeCountBeforeSplit = ctx.leafNodeCount;
|
|
size_type nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
|
|
size_type innerNodeCountBeforeSplit = ctx.innerNodeCount;
|
|
|
|
if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
|
|
m_indirectionLock->lock();
|
|
size_type indirectionIdx = (size_type) m_indirections.size();
|
|
m_indirections.push_back(children);
|
|
/* Unable to store relative offset -- create an indirection
|
|
table entry */
|
|
node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
|
|
indirectionIdx);
|
|
m_indirectionLock->unlock();
|
|
}
|
|
ctx.innerNodeCount++;
|
|
|
|
AABBType childAABB(nodeAABB);
|
|
childAABB.max[bestSplit.axis] = bestSplit.pos;
|
|
|
|
Float leftCost = buildTreeMinMax(ctx, depth+1, children,
|
|
childAABB, boost::get<0>(partition), boost::get<1>(partition),
|
|
bestSplit.numLeft, true, badRefines);
|
|
|
|
childAABB.min[bestSplit.axis] = bestSplit.pos;
|
|
childAABB.max[bestSplit.axis] = nodeAABB.max[bestSplit.axis];
|
|
|
|
Float rightCost = buildTreeMinMax(ctx, depth+1, children + 1,
|
|
childAABB, boost::get<2>(partition), boost::get<3>(partition),
|
|
bestSplit.numRight, false, badRefines);
|
|
|
|
TreeConstructionHeuristic tch(nodeAABB);
|
|
std::pair<Float, Float> prob = tch(bestSplit.axis,
|
|
bestSplit.pos - nodeAABB.min[bestSplit.axis],
|
|
nodeAABB.max[bestSplit.axis] - bestSplit.pos);
|
|
|
|
/* Compute the final cost given the updated cost
|
|
values received from the children */
|
|
Float finalCost = m_traversalCost +
|
|
(prob.first * leftCost + prob.second * rightCost);
|
|
|
|
/* Release the index lists not needed by the children anymore */
|
|
if (isLeftChild)
|
|
ctx.rightAlloc.release(boost::get<3>(partition));
|
|
else
|
|
ctx.leftAlloc.release(boost::get<1>(partition));
|
|
|
|
/* ==================================================================== */
|
|
/* Final decision */
|
|
/* ==================================================================== */
|
|
|
|
if (!m_retract || finalCost < primCount * m_queryCost) {
|
|
return finalCost;
|
|
} else {
|
|
/* In the end, splitting didn't help to reduce the cost.
|
|
Tear up everything below this node and create a leaf */
|
|
ctx.nodes.resize(nodePosBeforeSplit);
|
|
ctx.retractedSplits++;
|
|
ctx.leafNodeCount = leafNodeCountBeforeSplit;
|
|
ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
|
|
ctx.innerNodeCount = innerNodeCountBeforeSplit;
|
|
createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
|
|
return leafCost;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* \brief Build helper function (greedy O(n log n) optimization)
|
|
*
|
|
* \param ctx
|
|
* Thread-specific build context containing allocators etc.
|
|
* \param depth
|
|
* Current tree depth (1 == root node)
|
|
* \param node
|
|
* KD-tree node entry to be filled
|
|
* \param nodeAABB
|
|
* Axis-aligned bounding box of the current node
|
|
* \param eventStart
|
|
* Pointer to the beginning of a sorted edge event list
|
|
* \param eventEnd
|
|
* Pointer to the end of a sorted edge event list
|
|
* \param primCount
|
|
* Total primitive count for the current node
|
|
* \param isLeftChild
|
|
* Is this node the left child of its parent? This is important for
|
|
* memory management using the \ref OrderedChunkAllocator.
|
|
* \param badRefines
|
|
* Number of "probable bad refines" further up the tree. This makes
|
|
* it possible to split along an initially bad-looking candidate in
|
|
* the hope that the cost was significantly overestimated. The
|
|
* counter makes sure that only a limited number of such splits can
|
|
* happen in succession.
|
|
* \returns
|
|
* Final cost of the node
|
|
*/
|
|
Float buildTree(BuildContext &ctx, unsigned int depth, KDNode *node,
|
|
const AABBType &nodeAABB, EdgeEvent *eventStart, EdgeEvent *eventEnd,
|
|
size_type primCount, bool isLeftChild, size_type badRefines) {
|
|
|
|
Float leafCost = primCount * m_queryCost;
|
|
if (primCount <= m_stopPrims || depth >= m_maxDepth) {
|
|
createLeaf(ctx, node, eventStart, eventEnd, primCount);
|
|
return leafCost;
|
|
}
|
|
|
|
SplitCandidate bestSplit;
|
|
|
|
/* ==================================================================== */
|
|
/* Split candidate search */
|
|
/* ==================================================================== */
|
|
|
|
/* First, find the optimal splitting plane according to the
|
|
tree construction heuristic. To do this in O(n), the search is
|
|
implemented as a sweep over the edge events */
|
|
|
|
/* Initially, the split plane is placed left of the scene
|
|
and thus all geometry is on its right side */
|
|
size_type numLeft[point_type::dim],
|
|
numRight[point_type::dim];
|
|
|
|
for (int i=0; i<point_type::dim; ++i) {
|
|
numLeft[i] = 0;
|
|
numRight[i] = primCount;
|
|
}
|
|
|
|
EdgeEvent *eventsByAxis[point_type::dim];
|
|
int eventsByAxisCtr = 1;
|
|
eventsByAxis[0] = eventStart;
|
|
TreeConstructionHeuristic tch(nodeAABB);
|
|
|
|
/* Iterate over all events on the current axis */
|
|
for (EdgeEvent *event = eventStart; event < eventEnd; ) {
|
|
/* Record the current position and count the number
|
|
and type of remaining events, which are also here.
|
|
Due to the sort ordering, there is no need to worry
|
|
about an axis change in the loops below */
|
|
int axis = event->axis;
|
|
float pos = event->pos;
|
|
size_type numStart = 0, numEnd = 0, numPlanar = 0;
|
|
|
|
/* Count "end" events */
|
|
while (event < eventEnd && event->pos == pos
|
|
&& event->axis == axis
|
|
&& event->type == EdgeEvent::EEdgeEnd) {
|
|
++numEnd; ++event;
|
|
}
|
|
|
|
/* Count "planar" events */
|
|
while (event < eventEnd && event->pos == pos
|
|
&& event->axis == axis
|
|
&& event->type == EdgeEvent::EEdgePlanar) {
|
|
++numPlanar; ++event;
|
|
}
|
|
|
|
/* Count "start" events */
|
|
while (event < eventEnd && event->pos == pos
|
|
&& event->axis == axis
|
|
&& event->type == EdgeEvent::EEdgeStart) {
|
|
++numStart; ++event;
|
|
}
|
|
|
|
/* Keep track of the beginning of dimensions */
|
|
if (event < eventEnd && event->axis != axis) {
|
|
KDAssert(eventsByAxisCtr < point_type::dim);
|
|
eventsByAxis[eventsByAxisCtr++] = event;
|
|
}
|
|
|
|
/* The split plane can now be moved onto 't'. Accordingly, all planar
|
|
and ending primitives are removed from the right side */
|
|
numRight[axis] -= numPlanar + numEnd;
|
|
|
|
/* Calculate a score using the tree construction heuristic */
|
|
if (EXPECT_TAKEN(pos > nodeAABB.min[axis] && pos < nodeAABB.max[axis])) {
|
|
const size_type nL = numLeft[axis], nR = numRight[axis];
|
|
const Float nLF = (Float) nL, nRF = (Float) nR;
|
|
|
|
std::pair<Float, Float> prob = tch(axis,
|
|
pos - nodeAABB.min[axis],
|
|
nodeAABB.max[axis] - pos);
|
|
|
|
if (numPlanar == 0) {
|
|
Float cost = m_traversalCost + m_queryCost
|
|
* (prob.first * nLF + prob.second * nRF);
|
|
if (nL == 0 || nR == 0)
|
|
cost *= m_emptySpaceBonus;
|
|
if (cost < bestSplit.cost) {
|
|
bestSplit.pos = pos;
|
|
bestSplit.axis = axis;
|
|
bestSplit.cost = cost;
|
|
bestSplit.numLeft = nL;
|
|
bestSplit.numRight = nR;
|
|
}
|
|
} else {
|
|
Float costPlanarLeft = m_traversalCost
|
|
+ m_queryCost * (prob.first * (nL+numPlanar) + prob.second * nRF);
|
|
Float costPlanarRight = m_traversalCost
|
|
+ m_queryCost * (prob.first * nLF + prob.second * (nR+numPlanar));
|
|
|
|
if (nL + numPlanar == 0 || nR == 0)
|
|
costPlanarLeft *= m_emptySpaceBonus;
|
|
if (nL == 0 || nR + numPlanar == 0)
|
|
costPlanarRight *= m_emptySpaceBonus;
|
|
|
|
if (costPlanarLeft < bestSplit.cost ||
|
|
costPlanarRight < bestSplit.cost) {
|
|
bestSplit.pos = pos;
|
|
bestSplit.axis = axis;
|
|
|
|
if (costPlanarLeft < costPlanarRight) {
|
|
bestSplit.cost = costPlanarLeft;
|
|
bestSplit.numLeft = nL + numPlanar;
|
|
bestSplit.numRight = nR;
|
|
bestSplit.planarLeft = true;
|
|
} else {
|
|
bestSplit.cost = costPlanarRight;
|
|
bestSplit.numLeft = nL;
|
|
bestSplit.numRight = nR + numPlanar;
|
|
bestSplit.planarLeft = false;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
#if defined(MTS_KD_DEBUG)
|
|
if (m_clip && (pos < nodeAABB.min[axis]
|
|
|| pos > nodeAABB.max[axis])) {
|
|
/* When primitive clipping is active, this should never happen! */
|
|
KDLog(EError, "Internal error: edge event is out of bounds");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* The split plane is moved past 't'. All prims,
|
|
which were planar on 't', are moved to the left
|
|
side. Also, starting prims are now also left of
|
|
the split plane. */
|
|
numLeft[axis] += numStart + numPlanar;
|
|
}
|
|
|
|
#if defined(MTS_KD_DEBUG)
|
|
/* Sanity checks. Everything should now be left of the split plane */
|
|
for (int i=0; i<point_type::dim; ++i)
|
|
KDAssert(numRight[i] == 0 && numLeft[i] == primCount);
|
|
for (int i=1; i<point_type::dim; ++i)
|
|
KDAssert(eventsByAxis[i]->axis == i && (eventsByAxis[i]-1)->axis == i-1);
|
|
#endif
|
|
|
|
/* "Bad refines" heuristic from PBRT */
|
|
if (bestSplit.cost >= leafCost) {
|
|
if ((bestSplit.cost > 4 * leafCost && primCount < 16)
|
|
|| badRefines >= m_maxBadRefines
|
|
|| bestSplit.cost == std::numeric_limits<Float>::infinity()) {
|
|
createLeaf(ctx, node, eventStart, eventEnd, primCount);
|
|
return leafCost;
|
|
}
|
|
++badRefines;
|
|
}
|
|
|
|
/* ==================================================================== */
|
|
/* Primitive Classification */
|
|
/* ==================================================================== */
|
|
|
|
ClassificationStorage &storage = ctx.classStorage;
|
|
|
|
/* Initially mark all prims as being located on both sides */
|
|
for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
|
|
event < eventEnd && event->axis == bestSplit.axis; ++event)
|
|
storage.set(event->index, EBothSides);
|
|
|
|
size_type primsLeft = 0, primsRight = 0, primsBoth = primCount;
|
|
/* Sweep over all edge events and classify the primitives wrt. the split */
|
|
for (EdgeEvent *event = eventsByAxis[bestSplit.axis];
|
|
event < eventEnd && event->axis == bestSplit.axis; ++event) {
|
|
if (event->type == EdgeEvent::EEdgeEnd && event->pos <= bestSplit.pos) {
|
|
/* The primitive's interval ends before or on the split plane
|
|
-> classify to the left side */
|
|
KDAssert(storage.get(event->index) == EBothSides);
|
|
storage.set(event->index, ELeftSide);
|
|
primsBoth--;
|
|
primsLeft++;
|
|
} else if (event->type == EdgeEvent::EEdgeStart
|
|
&& event->pos >= bestSplit.pos) {
|
|
/* The primitive's interval starts after or on the split plane
|
|
-> classify to the right side */
|
|
KDAssert(storage.get(event->index) == EBothSides);
|
|
storage.set(event->index, ERightSide);
|
|
primsBoth--;
|
|
primsRight++;
|
|
} else if (event->type == EdgeEvent::EEdgePlanar) {
|
|
/* If the planar primitive is not on the split plane, the
|
|
classification is easy. Otherwise, place it on the side with
|
|
the lower cost */
|
|
KDAssert(storage.get(event->index) == EBothSides);
|
|
if (event->pos < bestSplit.pos || (event->pos == bestSplit.pos
|
|
&& bestSplit.planarLeft)) {
|
|
storage.set(event->index, ELeftSide);
|
|
primsBoth--;
|
|
primsLeft++;
|
|
} else if (event->pos > bestSplit.pos
|
|
|| (event->pos == bestSplit.pos && !bestSplit.planarLeft)) {
|
|
storage.set(event->index, ERightSide);
|
|
primsBoth--;
|
|
primsRight++;
|
|
} else {
|
|
KDAssertEx(false, "Internal error!");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Some sanity checks */
|
|
KDAssert(primsLeft + primsRight + primsBoth == primCount);
|
|
KDAssert(primsLeft + primsBoth == bestSplit.numLeft);
|
|
KDAssert(primsRight + primsBoth == bestSplit.numRight);
|
|
|
|
OrderedChunkAllocator &leftAlloc = ctx.leftAlloc,
|
|
&rightAlloc = ctx.rightAlloc;
|
|
|
|
EdgeEvent *leftEventsStart, *rightEventsStart;
|
|
if (isLeftChild) {
|
|
leftEventsStart = eventStart;
|
|
rightEventsStart = rightAlloc.allocate<EdgeEvent>(bestSplit.numRight * 2 * point_type::dim);
|
|
} else {
|
|
leftEventsStart = leftAlloc.allocate<EdgeEvent>(bestSplit.numLeft * 2 * point_type::dim);
|
|
rightEventsStart = eventStart;
|
|
}
|
|
|
|
EdgeEvent *leftEventsEnd = leftEventsStart, *rightEventsEnd = rightEventsStart;
|
|
|
|
AABBType leftNodeAABB = nodeAABB, rightNodeAABB = nodeAABB;
|
|
leftNodeAABB.max[bestSplit.axis] = bestSplit.pos;
|
|
rightNodeAABB.min[bestSplit.axis] = bestSplit.pos;
|
|
|
|
size_type prunedLeft = 0, prunedRight = 0;
|
|
|
|
/* ==================================================================== */
|
|
/* Partitioning */
|
|
/* ==================================================================== */
|
|
|
|
if (m_clip) {
|
|
EdgeEvent
|
|
*leftEventsTempStart = leftAlloc.allocate<EdgeEvent>(primsLeft * 2 * point_type::dim),
|
|
*rightEventsTempStart = rightAlloc.allocate<EdgeEvent>(primsRight * 2 * point_type::dim),
|
|
*newEventsLeftStart = leftAlloc.allocate<EdgeEvent>(primsBoth * 2 * point_type::dim),
|
|
*newEventsRightStart = rightAlloc.allocate<EdgeEvent>(primsBoth * 2 * point_type::dim);
|
|
|
|
EdgeEvent *leftEventsTempEnd = leftEventsTempStart,
|
|
*rightEventsTempEnd = rightEventsTempStart,
|
|
*newEventsLeftEnd = newEventsLeftStart,
|
|
*newEventsRightEnd = newEventsRightStart;
|
|
|
|
for (EdgeEvent *event = eventStart; event<eventEnd; ++event) {
|
|
int classification = storage.get(event->index);
|
|
|
|
if (classification == ELeftSide) {
|
|
/* Left-only primitive. Move to the left list and advance */
|
|
*leftEventsTempEnd++ = *event;
|
|
} else if (classification == ERightSide) {
|
|
/* Right-only primitive. Move to the right list and advance */
|
|
*rightEventsTempEnd++ = *event;
|
|
} else if (classification == EBothSides) {
|
|
/* The primitive overlaps the split plane. Re-clip and
|
|
generate new events for each side */
|
|
const index_type index = event->index;
|
|
|
|
AABBType clippedLeft = cast()->getClippedAABB(index, leftNodeAABB);
|
|
AABBType clippedRight = cast()->getClippedAABB(index, rightNodeAABB);
|
|
|
|
KDAssert(leftNodeAABB.contains(clippedLeft));
|
|
KDAssert(rightNodeAABB.contains(clippedRight));
|
|
|
|
if (clippedLeft.isValid() && clippedLeft.getSurfaceArea() > 0) {
|
|
for (int axis=0; axis<point_type::dim; ++axis) {
|
|
float min = (float) clippedLeft.min[axis],
|
|
max = (float) clippedLeft.max[axis];
|
|
|
|
if (min == max) {
|
|
*newEventsLeftEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgePlanar,
|
|
axis, min, index);
|
|
} else {
|
|
*newEventsLeftEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgeStart,
|
|
axis, min, index);
|
|
*newEventsLeftEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgeEnd,
|
|
axis, max, index);
|
|
}
|
|
}
|
|
} else {
|
|
prunedLeft++;
|
|
}
|
|
|
|
if (clippedRight.isValid() && clippedRight.getSurfaceArea() > 0) {
|
|
for (int axis=0; axis<point_type::dim; ++axis) {
|
|
float min = (float) clippedRight.min[axis],
|
|
max = (float) clippedRight.max[axis];
|
|
|
|
if (min == max) {
|
|
*newEventsRightEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgePlanar,
|
|
axis, min, index);
|
|
} else {
|
|
*newEventsRightEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgeStart,
|
|
axis, min, index);
|
|
*newEventsRightEnd++ = EdgeEvent(
|
|
EdgeEvent::EEdgeEnd,
|
|
axis, max, index);
|
|
}
|
|
}
|
|
} else {
|
|
prunedRight++;
|
|
}
|
|
|
|
/* Mark this primitive as processed so that clipping
|
|
is only done once */
|
|
storage.set(index, EBothSidesProcessed);
|
|
}
|
|
}
|
|
|
|
KDAssert((size_type) (leftEventsTempEnd - leftEventsTempStart) <= primsLeft * 2 * point_type::dim);
|
|
KDAssert((size_type) (rightEventsTempEnd - rightEventsTempStart) <= primsRight * 2 * point_type::dim);
|
|
KDAssert((size_type) (newEventsLeftEnd - newEventsLeftStart) <= primsBoth * 2 * point_type::dim);
|
|
KDAssert((size_type) (newEventsRightEnd - newEventsRightStart) <= primsBoth * 2 * point_type::dim);
|
|
ctx.pruned += prunedLeft + prunedRight;
|
|
|
|
/* Sort the events from overlapping prims */
|
|
std::sort(newEventsLeftStart, newEventsLeftEnd, EdgeEventOrdering());
|
|
std::sort(newEventsRightStart, newEventsRightEnd, EdgeEventOrdering());
|
|
|
|
/* Merge the left list */
|
|
leftEventsEnd = std::merge(leftEventsTempStart,
|
|
leftEventsTempEnd, newEventsLeftStart, newEventsLeftEnd,
|
|
leftEventsStart, EdgeEventOrdering());
|
|
|
|
/* Merge the right list */
|
|
rightEventsEnd = std::merge(rightEventsTempStart,
|
|
rightEventsTempEnd, newEventsRightStart, newEventsRightEnd,
|
|
rightEventsStart, EdgeEventOrdering());
|
|
|
|
/* Release temporary memory */
|
|
leftAlloc.release(newEventsLeftStart);
|
|
leftAlloc.release(leftEventsTempStart);
|
|
rightAlloc.release(newEventsRightStart);
|
|
rightAlloc.release(rightEventsTempStart);
|
|
} else {
|
|
for (EdgeEvent *event = eventStart; event < eventEnd; ++event) {
|
|
int classification = storage.get(event->index);
|
|
|
|
if (classification == ELeftSide) {
|
|
/* Left-only primitive. Move to the left list and advance */
|
|
*leftEventsEnd++ = *event;
|
|
} else if (classification == ERightSide) {
|
|
/* Right-only primitive. Move to the right list and advance */
|
|
*rightEventsEnd++ = *event;
|
|
} else if (classification == EBothSides) {
|
|
/* The primitive overlaps the split plane. Its edge events
|
|
must be added to both lists. */
|
|
*leftEventsEnd++ = *event;
|
|
*rightEventsEnd++ = *event;
|
|
}
|
|
}
|
|
KDAssert((size_type) (leftEventsEnd - leftEventsStart) <= bestSplit.numLeft * 2 * point_type::dim);
|
|
KDAssert((size_type) (rightEventsEnd - rightEventsStart) <= bestSplit.numRight * 2 * point_type::dim);
|
|
}
|
|
|
|
/* Shrink the edge event storage now that we know exactly how
|
|
many are on each side */
|
|
ctx.leftAlloc.shrinkAllocation(leftEventsStart,
|
|
leftEventsEnd - leftEventsStart);
|
|
|
|
ctx.rightAlloc.shrinkAllocation(rightEventsStart,
|
|
rightEventsEnd - rightEventsStart);
|
|
|
|
/* ==================================================================== */
|
|
/* Recursion */
|
|
/* ==================================================================== */
|
|
|
|
KDNode *children = ctx.nodes.allocate(2);
|
|
|
|
size_type nodePosBeforeSplit = (size_type) ctx.nodes.size();
|
|
size_type indexPosBeforeSplit = (size_type) ctx.indices.size();
|
|
size_type leafNodeCountBeforeSplit = ctx.leafNodeCount;
|
|
size_type nonemptyLeafNodeCountBeforeSplit = ctx.nonemptyLeafNodeCount;
|
|
size_type innerNodeCountBeforeSplit = ctx.innerNodeCount;
|
|
|
|
if (!node->initInnerNode(bestSplit.axis, bestSplit.pos, children-node)) {
|
|
m_indirectionLock->lock();
|
|
size_type indirectionIdx = (size_type) m_indirections.size();
|
|
m_indirections.push_back(children);
|
|
/* Unable to store relative offset -- create an indirection
|
|
table entry */
|
|
node->initIndirectionNode(bestSplit.axis, bestSplit.pos,
|
|
indirectionIdx);
|
|
m_indirectionLock->unlock();
|
|
}
|
|
ctx.innerNodeCount++;
|
|
|
|
Float leftCost = buildTree(ctx, depth+1, children,
|
|
leftNodeAABB, leftEventsStart, leftEventsEnd,
|
|
bestSplit.numLeft - prunedLeft, true, badRefines);
|
|
|
|
Float rightCost = buildTree(ctx, depth+1, children+1,
|
|
rightNodeAABB, rightEventsStart, rightEventsEnd,
|
|
bestSplit.numRight - prunedRight, false, badRefines);
|
|
|
|
std::pair<Float, Float> prob = tch(bestSplit.axis,
|
|
bestSplit.pos - nodeAABB.min[bestSplit.axis],
|
|
nodeAABB.max[bestSplit.axis] - bestSplit.pos);
|
|
|
|
/* Compute the final cost given the updated cost
|
|
values received from the children */
|
|
Float finalCost = m_traversalCost +
|
|
(prob.first * leftCost + prob.second * rightCost);
|
|
|
|
/* Release the index lists not needed by the children anymore */
|
|
if (isLeftChild)
|
|
ctx.rightAlloc.release(rightEventsStart);
|
|
else
|
|
ctx.leftAlloc.release(leftEventsStart);
|
|
|
|
/* ==================================================================== */
|
|
/* Final decision */
|
|
/* ==================================================================== */
|
|
|
|
if (!m_retract || finalCost < primCount * m_queryCost) {
|
|
return finalCost;
|
|
} else {
|
|
/* In the end, splitting didn't help to reduce the SAH cost.
|
|
Tear up everything below this node and create a leaf */
|
|
ctx.nodes.resize(nodePosBeforeSplit);
|
|
ctx.retractedSplits++;
|
|
ctx.leafNodeCount = leafNodeCountBeforeSplit;
|
|
ctx.nonemptyLeafNodeCount = nonemptyLeafNodeCountBeforeSplit;
|
|
ctx.innerNodeCount = innerNodeCountBeforeSplit;
|
|
createLeafAfterRetraction(ctx, node, indexPosBeforeSplit);
|
|
return leafCost;
|
|
}
|
|
|
|
return bestSplit.cost;
|
|
}
|
|
|
|
/**
|
|
* \brief Min-max binning as described in
|
|
* "Highly Parallel Fast KD-tree Construction for Interactive
|
|
* Ray Tracing of Dynamic Scenes"
|
|
* by M. Shevtsov, A. Soupikov and A. Kapustin
|
|
*/
|
|
struct MinMaxBins {
|
|
MinMaxBins(size_type nBins) : m_binCount(nBins) {
|
|
m_minBins = new size_type[m_binCount*point_type::dim];
|
|
m_maxBins = new size_type[m_binCount*point_type::dim];
|
|
}
|
|
|
|
~MinMaxBins() {
|
|
delete[] m_minBins;
|
|
delete[] m_maxBins;
|
|
}
|
|
|
|
/**
|
|
* \brief Prepare to bin for the specified bounds
|
|
*/
|
|
void setAABB(const AABBType &aabb) {
|
|
m_aabb = aabb;
|
|
m_binSize = m_aabb.getExtents() / (Float) m_binCount;
|
|
for (int axis=0; axis<point_type::dim; ++axis)
|
|
m_invBinSize[axis] = 1/m_binSize[axis];
|
|
}
|
|
|
|
/**
|
|
* \brief Run min-max binning
|
|
*
|
|
* \param derived Derived class to be used to determine the AABB for
|
|
* a given list of primitives
|
|
* \param indices Primitive indirection list
|
|
* \param primCount Specifies the length of \a indices
|
|
*/
|
|
void bin(const Derived *derived, index_type *indices,
|
|
size_type primCount) {
|
|
m_primCount = primCount;
|
|
memset(m_minBins, 0, sizeof(size_type) * point_type::dim * m_binCount);
|
|
memset(m_maxBins, 0, sizeof(size_type) * point_type::dim * m_binCount);
|
|
const int64_t maxBin = m_binCount-1;
|
|
|
|
for (size_type i=0; i<m_primCount; ++i) {
|
|
const AABBType aabb = derived->getAABB(indices[i]);
|
|
for (int axis=0; axis<point_type::dim; ++axis) {
|
|
int64_t minIdx = (int64_t) ((aabb.min[axis] - m_aabb.min[axis])
|
|
* m_invBinSize[axis]);
|
|
int64_t maxIdx = (int64_t) ((aabb.max[axis] - m_aabb.min[axis])
|
|
* m_invBinSize[axis]);
|
|
m_maxBins[axis * m_binCount
|
|
+ std::max((int64_t) 0, std::min(maxIdx, maxBin))]++;
|
|
m_minBins[axis * m_binCount
|
|
+ std::max((int64_t) 0, std::min(minIdx, maxBin))]++;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* \brief Evaluate the tree construction heuristic at each bin boundary
|
|
* and return the minimizer for the given cost constants. Min-max
|
|
* binning uses no "empty space bonus" since it cannot create such
|
|
* splits.
|
|
*/
|
|
SplitCandidate minimizeCost(Float traversalCost, Float queryCost) {
|
|
SplitCandidate candidate;
|
|
int binIdx = 0, leftBin = 0;
|
|
TreeConstructionHeuristic tch(m_aabb);
|
|
|
|
for (int axis=0; axis<point_type::dim; ++axis) {
|
|
vector_type extents = m_aabb.getExtents();
|
|
size_type numLeft = 0, numRight = m_primCount;
|
|
Float leftWidth = 0, rightWidth = extents[axis];
|
|
const Float binSize = m_binSize[axis];
|
|
|
|
for (int i=0; i<m_binCount-1; ++i) {
|
|
numLeft += m_minBins[binIdx];
|
|
numRight -= m_maxBins[binIdx];
|
|
leftWidth += binSize;
|
|
rightWidth -= binSize;
|
|
std::pair<Float, Float> prob =
|
|
tch(axis, leftWidth, rightWidth);
|
|
|
|
Float cost = traversalCost + queryCost
|
|
* (prob.first * numLeft + prob.second * numRight);
|
|
|
|
if (cost < candidate.cost) {
|
|
candidate.cost = cost;
|
|
candidate.axis = axis;
|
|
candidate.numLeft = numLeft;
|
|
candidate.numRight = numRight;
|
|
leftBin = i;
|
|
}
|
|
|
|
binIdx++;
|
|
}
|
|
binIdx++;
|
|
}
|
|
|
|
KDAssert(candidate.cost != std::numeric_limits<Float>::infinity());
|
|
|
|
const int axis = candidate.axis;
|
|
const Float min = m_aabb.min[axis];
|
|
|
|
/* The following part may seem a bit paranoid. It is ensures that the
|
|
* returned split plane is consistent with the floating point calculations
|
|
* done by the binning code in \ref bin(). Since reciprocals and
|
|
* various floating point roundoff errors are involved, simply setting
|
|
*
|
|
* candidate.pos = m_aabb.min[axis] + (leftBin+1) * m_binSize[axis];
|
|
*
|
|
* can potentially lead to a slightly different number primitives being
|
|
* classified to the left and right if we were to do check each
|
|
* primitive against this split position. We can't have that, however,
|
|
* since the partitioning code assumes that these numbers are correct.
|
|
* This lets it avoid doing another costly sweep, hence all the
|
|
* floating point madness below.
|
|
*/
|
|
Float invBinSize = m_invBinSize[axis];
|
|
float split = min + (leftBin + 1) * m_binSize[axis];
|
|
float splitNext = nextafterf(split,
|
|
std::numeric_limits<float>::max());
|
|
int idx = (int) ((split - min) * invBinSize);
|
|
int idxNext = (int) ((splitNext - min) * invBinSize);
|
|
|
|
/**
|
|
* The split plane should pass through the last discrete floating
|
|
* floating value, which would still be classified into
|
|
* the left bin. If this is not computed correctly, do binary
|
|
* search.
|
|
*/
|
|
if (!(idx == leftBin && idxNext == leftBin+1)) {
|
|
float left = m_aabb.min[axis];
|
|
float right = m_aabb.max[axis];
|
|
int it = 0;
|
|
while (true) {
|
|
split = left + (right-left)/2;
|
|
splitNext = nextafterf(split,
|
|
std::numeric_limits<float>::max());
|
|
idx = (int) ((split - min) * invBinSize);
|
|
idxNext = (int) ((splitNext - min) * invBinSize);
|
|
|
|
if (idx == leftBin && idxNext == leftBin+1) {
|
|
/* Got it! */
|
|
break;
|
|
} else if (std::abs(idx-idxNext) > 1 || ++it > 50) {
|
|
/* Insufficient floating point resolution
|
|
-> a leaf will be created. */
|
|
candidate.cost = std::numeric_limits<Float>::infinity();
|
|
break;
|
|
}
|
|
|
|
if (idx <= leftBin)
|
|
left = split;
|
|
else
|
|
right = split;
|
|
}
|
|
}
|
|
|
|
if (split <= m_aabb.min[axis] || split >= m_aabb.max[axis]) {
|
|
/* Insufficient floating point resolution
|
|
-> a leaf will be created. */
|
|
candidate.cost = std::numeric_limits<Float>::infinity();
|
|
}
|
|
|
|
candidate.pos = split;
|
|
|
|
return candidate;
|
|
}
|
|
|
|
/**
|
|
* \brief Given a suitable split candiate, compute tight bounding
|
|
* boxes for the left and right subtrees and return associated
|
|
* primitive lists.
|
|
*/
|
|
boost::tuple<AABBType, index_type *, AABBType, index_type *> partition(
|
|
BuildContext &ctx, const Derived *derived, index_type *primIndices,
|
|
SplitCandidate &split, bool isLeftChild, Float traversalCost,
|
|
Float queryCost) {
|
|
const float splitPos = split.pos;
|
|
const int axis = split.axis;
|
|
size_type numLeft = 0, numRight = 0;
|
|
AABBType leftBounds, rightBounds;
|
|
|
|
index_type *leftIndices, *rightIndices;
|
|
if (isLeftChild) {
|
|
OrderedChunkAllocator &rightAlloc = ctx.rightAlloc;
|
|
leftIndices = primIndices;
|
|
rightIndices = rightAlloc.allocate<index_type>(split.numRight);
|
|
} else {
|
|
OrderedChunkAllocator &leftAlloc = ctx.leftAlloc;
|
|
leftIndices = leftAlloc.allocate<index_type>(split.numLeft);
|
|
rightIndices = primIndices;
|
|
}
|
|
|
|
for (size_type i=0; i<m_primCount; ++i) {
|
|
const index_type primIndex = primIndices[i];
|
|
const AABBType aabb = derived->getAABB(primIndex);
|
|
|
|
if (aabb.max[axis] <= splitPos) {
|
|
KDAssert(numLeft < split.numLeft);
|
|
leftBounds.expandBy(aabb);
|
|
leftIndices[numLeft++] = primIndex;
|
|
} else if (aabb.min[axis] > splitPos) {
|
|
KDAssert(numRight < split.numRight);
|
|
rightBounds.expandBy(aabb);
|
|
rightIndices[numRight++] = primIndex;
|
|
} else {
|
|
leftBounds.expandBy(aabb);
|
|
rightBounds.expandBy(aabb);
|
|
KDAssert(numLeft < split.numLeft);
|
|
KDAssert(numRight < split.numRight);
|
|
leftIndices[numLeft++] = primIndex;
|
|
rightIndices[numRight++] = primIndex;
|
|
}
|
|
}
|
|
|
|
leftBounds.clip(m_aabb);
|
|
rightBounds.clip(m_aabb);
|
|
|
|
KDAssert(numLeft == split.numLeft);
|
|
KDAssert(numRight == split.numRight);
|
|
|
|
/// Release the unused memory regions
|
|
if (isLeftChild)
|
|
ctx.leftAlloc.shrinkAllocation(leftIndices, split.numLeft);
|
|
else
|
|
ctx.rightAlloc.shrinkAllocation(rightIndices, split.numRight);
|
|
|
|
leftBounds.max[axis] = std::min(leftBounds.max[axis], (Float) splitPos);
|
|
rightBounds.min[axis] = std::max(rightBounds.min[axis], (Float) splitPos);
|
|
|
|
if (leftBounds.max[axis] != rightBounds.min[axis]) {
|
|
/* There is some space between the child nodes -- move
|
|
the split plane onto one of the AABBs so that the
|
|
heuristic cost is minimized */
|
|
TreeConstructionHeuristic tch(m_aabb);
|
|
|
|
std::pair<Float, Float> prob1 = tch(axis,
|
|
leftBounds.max[axis] - m_aabb.min[axis],
|
|
m_aabb.max[axis] - leftBounds.max[axis]);
|
|
std::pair<Float, Float> prob2 = tch(axis,
|
|
rightBounds.min[axis] - m_aabb.min[axis],
|
|
m_aabb.max[axis] - rightBounds.min[axis]);
|
|
Float cost1 = traversalCost + queryCost
|
|
* (prob1.first * numLeft + prob1.second * numRight);
|
|
Float cost2 = traversalCost + queryCost
|
|
* (prob2.first * numLeft + prob2.second * numRight);
|
|
|
|
if (cost1 <= cost2) {
|
|
split.cost = cost1;
|
|
split.pos = leftBounds.max[axis];
|
|
} else {
|
|
split.cost = cost2;
|
|
split.pos = rightBounds.min[axis];
|
|
}
|
|
|
|
leftBounds.max[axis] = std::min(leftBounds.max[axis],
|
|
(Float) split.pos);
|
|
rightBounds.min[axis] = std::max(rightBounds.min[axis],
|
|
(Float) split.pos);
|
|
}
|
|
|
|
return boost::make_tuple(leftBounds, leftIndices,
|
|
rightBounds, rightIndices);
|
|
}
|
|
private:
|
|
size_type *m_minBins;
|
|
size_type *m_maxBins;
|
|
size_type m_primCount;
|
|
int m_binCount;
|
|
AABBType m_aabb;
|
|
vector_type m_binSize;
|
|
vector_type m_invBinSize;
|
|
};
|
|
|
|
protected:
|
|
index_type *m_indices;
|
|
Float m_traversalCost;
|
|
Float m_queryCost;
|
|
Float m_emptySpaceBonus;
|
|
bool m_clip, m_retract, m_parallelBuild;
|
|
size_type m_maxDepth;
|
|
size_type m_stopPrims;
|
|
size_type m_maxBadRefines;
|
|
size_type m_exactPrimThreshold;
|
|
size_type m_minMaxBins;
|
|
size_type m_nodeCount;
|
|
size_type m_indexCount;
|
|
std::vector<TreeBuilder *> m_builders;
|
|
std::vector<KDNode *> m_indirections;
|
|
ref<Mutex> m_indirectionLock;
|
|
BuildInterface m_interface;
|
|
};
|
|
|
|
#if defined(WIN32)
|
|
/* Revert back to fast / non-strict IEEE 754
|
|
floating point computations */
|
|
MTS_NAMESPACE_END
|
|
#pragma float_control(precise, off)
|
|
MTS_NAMESPACE_BEGIN
|
|
#endif
|
|
|
|
template <typename AABBType>
|
|
Class *KDTreeBase<AABBType>::m_theClass
|
|
= new Class("KDTreeBase", true, "Object");
|
|
|
|
template <typename AABBType>
|
|
const Class *KDTreeBase<AABBType>::getClass() const {
|
|
return m_theClass;
|
|
}
|
|
|
|
MTS_NAMESPACE_END
|
|
|
|
#endif /* __KDTREE_GENERIC_H */
|