work on the min-max binning split method

metadata
Wenzel Jakob 2010-10-07 18:38:06 +02:00
parent 76a501b291
commit effb3ebfb0
9 changed files with 301 additions and 51 deletions

View File

@ -277,12 +277,15 @@ libcore_objects = [
'src/libcore/zstream.cpp', 'src/libcore/shvector.cpp',
'src/libcore/fresolver.cpp'
]
if sys.platform == 'darwin':
coreEnv_osx = coreEnv.Clone();
coreEnv_osx['CXXFLAGS'].remove('-fstrict-aliasing');
coreEnv_osx['CXXFLAGS'].remove('-ftree-vectorize');
coreEnv_osx['CXXFLAGS'].append('-fno-strict-aliasing');
libcore_objects += coreEnv_osx.SharedObject('src/libcore/darwin.mm')
libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_darwin.mm')
elif sys.platform == 'win32':
libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_win32.cpp')
libcore = coreEnv.SharedLibrary('src/libcore/mitsuba-core', libcore_objects);

View File

@ -31,6 +31,8 @@ Some minor adjustments may have to be made to this file based on your configurat
You may also set adjust certain compilation flags here:
\begin{description}
\item[\texttt{MTS\_DEBUG}] Enable assertions etc. Usually a good idea.
\item[\texttt{MTS\_KD\_DEBUG}] Enable additional checks in the kd-Tree. This
is quite slow and mainly useful to track down bugs when they are suspected.
\item[\texttt{SINGLE\_PRECISION}] Do all computation in single precision. This is usually sufficient.
\item[\texttt{DOUBLE\_PRECISION}] Do all computation in double precision. Incompatible with
\texttt{MTS\_SSE}, \texttt{MTS\_HAS\_COHERENT\_RT}, and \texttt{MTS\_DEBUG\_FP}.

View File

@ -168,6 +168,8 @@ inline bool ubi_isnan(double f) {
int classification = ::_fpclass(f);
return classification == _FPCLASS_QNAN || classification == _FPCLASS_SNAN;
}
extern float nextafterf(float x, float y);
#else
inline bool ubi_isnan(float f) {
return std::fpclassify(f) == FP_NAN;

View File

@ -21,11 +21,9 @@
#include <mitsuba/core/timer.h>
#include <boost/static_assert.hpp>
#include <boost/tuple/tuple.hpp>
#define MTS_KD_MAX_DEPTH 48 ///< Compile-time KD-tree depth limit
#define MTS_KD_STATISTICS 1 ///< Collect statistics during building/traversal
#define MTS_KD_DEBUG ///< Enable serious KD-tree debugging (slow)
#define MTS_KD_MINMAX_BINS 32
MTS_NAMESPACE_BEGIN
@ -38,13 +36,23 @@ MTS_NAMESPACE_BEGIN
*/
template <typename ShapeType> class GenericKDTree : public Object {
public:
/// Data type of split candidates computed by the SAH optimization routines
typedef boost::tuple<Float, Float, int> split_candidate;
/// Index number format (max 2^32 prims)
typedef uint32_t index_type;
/// Size number format
typedef uint32_t size_type;
/**
* \brief Data type for split candidates computed by
* the SAH optimization routines.
* */
struct SplitCandidate {
Float cost;
float pos;
int axis;
int numLeft, numRight;
bool planarLeft;
};
GenericKDTree() {
m_traversalCost = 15;
m_intersectionCost = 20;
@ -55,15 +63,15 @@ public:
* \brief Build a KD-tree over the supplied axis-aligned bounding
* boxes.
*/
template <typename AABBFunctor>
void build(const AABBFunctor &aabbFunctor) {
template <typename Functor>
void build(const Functor &functor) {
/* Establish an ad-hoc depth cutoff value (Formula from PBRT) */
m_maxDepth = std::min((int) (8 + 1.3f * log2i(aabbFunctor.getPrimitiveCount())),
m_maxDepth = std::min((int) (8 + 1.3f * log2i(functor.getPrimitiveCount())),
MTS_KD_MAX_DEPTH);
m_aabb.reset();
for (size_type i=0; i<aabbFunctor.getPrimitiveCount(); ++i)
m_aabb.expandBy(aabbFunctor(i));
for (size_type i=0; i<functor.getPrimitiveCount(); ++i)
m_aabb.expandBy(functor.getAABB(i));
Log(EDebug, "kd-tree configuration:");
Log(EDebug, " Traversal cost : %.2f", m_traversalCost);
@ -74,23 +82,31 @@ public:
Log(EDebug, " Scene bounding box (max) : %s", m_aabb.max.toString().c_str());
Log(EInfo, "Constructing a SAH kd-tree (%i primitives) ..",
aabbFunctor.getPrimitiveCount());
functor.getPrimitiveCount());
ref<Timer> timer = new Timer();
Log(EInfo, "Performing initial clustering ..");
MinMaxBins<MTS_KD_MINMAX_BINS> bins(m_aabb);
bins.bin(aabbFunctor);
split_candidate candidate = bins.maximizeSAH(m_traversalCost,
bins.bin(functor);
SplitCandidate candidate = bins.maximizeSAH(m_traversalCost,
m_intersectionCost, m_emptySpaceBonus);
cout << "originalCost = " << aabbFunctor.getPrimitiveCount() * m_intersectionCost << endl;
AABB leftAABB, rightAABB;
bins.partition(functor, candidate, leftAABB, rightAABB,
m_traversalCost, m_intersectionCost);
cout << "originalCost = " << functor.getPrimitiveCount() * m_intersectionCost << endl;
cout << "bestCost = "
<< boost::get<0>(candidate)
<< candidate.cost
<< ", bestPos = "
<< boost::get<1>(candidate)
<< candidate.pos
<< ", bestAxis = "
<< boost::get<2>(candidate)
<< candidate.axis
<< ", numLeft = "
<< candidate.numLeft
<< ", numRight = "
<< candidate.numRight
<< endl;
Log(EInfo, "Initial clustering took %i ms", timer->getMilliseconds());
}
@ -111,23 +127,24 @@ protected:
inline EdgeEvent() { }
/// Create a new edge event
inline EdgeEvent(uint8_t type, Float t, index_type index)
: t(t), index(index), type(type) {
}
inline EdgeEvent(int type, float t, index_type index)
: t(t), index(index), type(type) { }
/* Plane position */
Float t;
/* Primitive index */
/// Plane position
float t;
/// Primitive index
index_type index;
/* Event type: end/planar/start */
uint8_t type;
/// Event type: end/planar/start
int type;
};
BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
typedef std::vector<EdgeEvent> edge_event_vector;
typedef edge_event_vector edge_event_vector3[3];
/// Edge event comparison functor
struct EdgeEventSorter : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
if (a.t != b.t)
return a.t < b.t;
@ -215,20 +232,18 @@ protected:
}
/// Return the split plane location (assuming that this is an interior node)
inline float getSplit() const {
FINLINE float getSplit() const {
return inner.split;
}
/// Return the split axis (assuming that this is an interior node)
inline int getAxis() const {
FINLINE int getAxis() const {
return inner.combined & EInnerAxisMask;
}
};
BOOST_STATIC_ASSERT(sizeof(KDNode) == 8);
/**
* \brief Min-max binning as described in
* "Highly Parallel Fast KD-tree Construction for Interactive
@ -249,21 +264,26 @@ protected:
* a given list of primitives
* \param count Number of primitives
*/
template <typename AABBFunctor> void bin(
const AABBFunctor &functor) {
template <typename Functor> void bin(
const Functor &functor) {
m_primCount = functor.getPrimitiveCount();
memset(m_minBins, 0, sizeof(size_type) * 3 * BinCount);
memset(m_maxBins, 0, sizeof(size_type) * 3 * BinCount);
Vector invBinSize;
for (int axis=0; axis<3; ++axis)
invBinSize[axis] = 1/m_binSize[axis];
for (size_type i=0; i<m_primCount; ++i) {
const AABB aabb = functor(i);
const AABB aabb = functor.getAABB(i);
for (int axis=0; axis<3; ++axis) {
int minIdx = (aabb.min[axis] - aabb.min[axis]) * invBinSize[axis];
int maxIdx = (aabb.max[axis] - aabb.min[axis]) * invBinSize[axis];
m_maxBins[axis * BinCount + std::min(std::max(maxIdx, 0), BinCount-1)]++;
m_minBins[axis * BinCount + std::min(std::max(minIdx, 0), BinCount-1)]++;
int minIdx = (int) ((aabb.min[axis] - m_aabb.min[axis])
* invBinSize[axis]);
int maxIdx = (int) ((aabb.max[axis] - m_aabb.min[axis])
* invBinSize[axis]);
Assert(minIdx >= 0 && maxIdx >= 0);
m_maxBins[axis * BinCount + std::min(maxIdx, BinCount-1)]++;
m_minBins[axis * BinCount + std::min(minIdx, BinCount-1)]++;
}
}
}
@ -274,6 +294,7 @@ protected:
* processors in a SMP machine.
*/
MinMaxBins& operator+=(const MinMaxBins &otherBins) {
m_primCount += otherBins.primCount;
for (int i=0; i<3*BinCount; ++i) {
m_minBins[i] += otherBins.m_minBins[i];
m_maxBins[i] += otherBins.m_maxBins[i];
@ -285,11 +306,13 @@ protected:
* \brief Evaluate the surface area heuristic at each bin boundary
* and return the maximizer for the given cost constants.
*/
split_candidate maximizeSAH(Float traversalCost,
SplitCandidate maximizeSAH(Float traversalCost,
Float intersectionCost, Float emptySpaceBonus) {
Float bestCost = std::numeric_limits<Float>::infinity(), bestPos = 0;
SplitCandidate candidate;
Float normalization = 2.0f / m_aabb.getSurfaceArea();
int binIdx = 0, bestAxis = -1;
int binIdx = 0, leftBin = 0;
candidate.planarLeft = false;
candidate.cost = std::numeric_limits<Float>::infinity();
for (int axis=0; axis<3; ++axis) {
Vector extents = m_aabb.getExtents();
@ -317,20 +340,152 @@ protected:
if (numLeft == 0 || numRight == 0)
cost *= emptySpaceBonus;
if (cost < bestCost) {
bestCost = cost;
bestAxis = axis;
bestPos = m_aabb.min[axis] + leftWidth;
if (cost < candidate.cost) {
candidate.cost = cost;
candidate.axis = axis;
candidate.numLeft = numLeft;
candidate.numRight = numRight;
leftBin = i;
}
binIdx++;
}
binIdx++;
}
Assert(candidate.cost != std::numeric_limits<Float>::infinity());
Assert(bestCost != std::numeric_limits<Float>::infinity());
const int axis = candidate.axis;
const float min = m_aabb.min[axis];
return boost::make_tuple(bestCost, bestPos, bestAxis);
/* This part is ensures that the returned split plane is consistent
* with the floating point calculations done by the binning code
* in \ref bin(). Since reciprocals and various floating point
* roundoff errors are involved, simply setting
*
* candidate.pos = m_aabb.min[axis] + (leftBin+1) * m_binSize[axis];
*
* will potentially lead to a different number primitives being
* classified to the left and right compared to the numbers stored
* in candidate.numLeft and candidate.numRight. We can't have that,
* however, since the partitioning code assumes that these
* numbers are correct. This removes the need for an extra sweep
* through the whole primitive list.
*/
float invBinSize = 1/m_binSize[axis],
split = min + (leftBin + 1) * m_binSize[axis];
float splitNext = nextafterf(split,
std::numeric_limits<float>::max());
int idx = (int) ((split - min) * invBinSize);
int idxNext = (int) ((splitNext - min) * invBinSize);
/**
* The split plane should be along the last discrete floating
* floating position, which would still be classified into
* the left bin.
*/
if (!(idx == leftBin && idxNext > leftBin)) {
float direction;
/* First, determine the search direction */
if (idx > leftBin)
direction = -std::numeric_limits<float>::max();
else
direction = std::numeric_limits<float>::max();
while (true) {
split = nextafterf(split, direction);
splitNext = nextafterf(split,
std::numeric_limits<float>::max());
idx = (int) ((split - min) * invBinSize);
idxNext = (int) ((splitNext - min) * invBinSize);
if (idx == leftBin && idxNext > leftBin)
break;
}
}
candidate.pos = split;
return candidate;
}
/**
* \brief Given a suitable split candiate, compute tight bounding
* boxes for the left and right subtrees and return associated
* primitive lists.
*/
template <typename Functor> void partition(
const Functor &functor, SplitCandidate &split,
AABB &left, AABB &right, Float traversalCost,
Float intersectionCost) {
const float splitPos = split.pos;
const int axis = split.axis;
int numLeft = 0, numRight = 0;
Assert(m_primCount == functor.getPrimitiveCount());
for (size_type i=0; i<m_primCount; ++i) {
const AABB aabb = functor.getAABB(i);
if (aabb.max[axis] <= splitPos) {
left.expandBy(aabb);
numLeft++;
} else if (aabb.min[axis] >= splitPos) {
right.expandBy(aabb);
numRight++;
} else {
left.expandBy(aabb);
right.expandBy(aabb);
numLeft++;
numRight++;
}
}
Assert(numLeft == split.numLeft);
Assert(numRight == split.numRight);
left.max[axis] = std::min(left.max[axis], (Float) splitPos);
right.min[axis] = std::max(right.min[axis], (Float) splitPos);
if (left.max[axis] != right.min[axis]) {
/* There is some space between the child nodes -- move the
split plane onto one of the AABBs to minimize the SAH */
Float normalization = 2.0f / m_aabb.getSurfaceArea();
Vector extents = m_aabb.getExtents();
cout << "There is some space (split=" << splitPos << ") :" << endl;
cout << m_aabb.toString() << endl;
cout << left.toString() << endl;
cout << right.toString() << endl;
extents[axis] = left.max[axis] - m_aabb.min[axis];
Float pLeft1 = normalization * (extents.x*extents.y
+ extents.x*extents.z + extents.y*extents.z);
extents[axis] = m_aabb.max[axis] - left.max[axis];
Float pRight1 = normalization * (extents.x*extents.y
+ extents.x*extents.z + extents.y*extents.z);
Float sahCost1 = traversalCost + intersectionCost
* (pLeft1 * numLeft + pRight1 * numRight);
extents[axis] = right.min[axis] - m_aabb.min[axis];
Float pLeft2 = normalization * (extents.x*extents.y
+ extents.x*extents.z + extents.y*extents.z);
extents[axis] = m_aabb.max[axis] - right.min[axis];
Float pRight2 = normalization * (extents.x*extents.y
+ extents.x*extents.z + extents.y*extents.z);
Float sahCost2 = traversalCost + intersectionCost
* (pLeft2 * numLeft + pRight2 * numRight);
if (sahCost1 <= sahCost2) {
cout << "Adjusting cost from " << split.cost << " to " << sahCost1 << endl;
split.cost = sahCost1;
split.pos = left.max[axis];
} else {
cout << "Adjusting cost from " << split.cost << " to " << sahCost2 << endl;
split.cost = sahCost2;
split.pos = right.min[axis];
}
}
}
private:
size_type m_minBins[3*BinCount], m_maxBins[3*BinCount];

View File

@ -172,8 +172,6 @@ int ubi_main(int argc, char **argv) {
Thread::getThread()->getLogger()->setLogLevel(EInfo);
FileResolver *fileResolver = Thread::getThread()->getFileResolver();
#if !defined(WIN32)
/* Correct number parsing on some locales (e.g. ru_RU) */
setlocale(LC_NUMERIC, "C");

View File

@ -0,0 +1,77 @@
/* s_nextafterf.c -- float version of s_nextafter.c.
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include <mitsuba/mitsuba.h>
typedef union {
float value;
uint32_t word;
} ieee_float_shape_type;
#define GET_FLOAT_WORD(i,d) \
do { \
ieee_float_shape_type gf_u; \
gf_u.value = (d); \
(i) = gf_u.word; \
} while (0)
#define SET_FLOAT_WORD(d,i) \
do { \
ieee_float_shape_type sf_u; \
sf_u.word = (i); \
(d) = sf_u.value; \
} while (0)
float nextafterf(float x, float y) {
int32_t hx, hy, ix, iy;
GET_FLOAT_WORD(hx, x);
GET_FLOAT_WORD(hy, y);
ix = hx & 0x7fffffff; /* |x| */
iy = hy & 0x7fffffff; /* |y| */
/* x is nan or y is nan? */
if ((ix > 0x7f800000) || (iy > 0x7f800000))
return x + y;
if (x == y)
return y;
if (ix == 0) { /* x == 0? */
SET_FLOAT_WORD(x, (hy & 0x80000000) | 1);
return x;
}
if (hx >= 0) { /* x > 0 */
if (hx > hy) { /* x > y: x -= ulp */
hx -= 1;
} else { /* x < y: x += ulp */
hx += 1;
}
} else { /* x < 0 */
if (hy >= 0 || hx > hy) { /* x < y: x -= ulp */
hx -= 1;
} else { /* x > y: x += ulp */
hx += 1;
}
}
hy = hx & 0x7f800000;
if (hy >= 0x7f800000) {
x = x + x; /* overflow */
return x; /* overflow */
}
SET_FLOAT_WORD(x, hx);
return x;
}

View File

@ -136,8 +136,17 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const {
Point vertices1[MAX_VERTS], vertices2[MAX_VERTS];
int nVertices = 3;
for (int i=0; i<3; ++i)
#if defined(MTS_DEBUG_KD)
AABB origAABB;
#endif
for (int i=0; i<3; ++i) {
vertices1[i] = buffer[idx[i]].p;
#if defined(MTS_DEBUG_KD)
origAABB.expandBy(vertices1[i]);
#endif
}
for (int axis=0; axis<3; ++axis) {
nVertices = sutherlandHodgman(vertices1, nVertices, vertices2, axis, aabb.min[axis], true);
@ -156,7 +165,11 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const {
result.clip(aabb);
#endif
#if defined(MTS_DEBUG_KD)
SAssert(aabb.contains(result));
SAssert(origAABB.contains(result));
#endif
return result;
}

View File

@ -95,7 +95,7 @@ public:
return m_primitiveCount;
}
inline AABB operator()(uint32_t idx) const {
inline AABB getAABB(uint32_t idx) const {
return m_triangles[idx].getAABB(m_vertexBuffer);
}