From effb3ebfb079e085bf3e3b725ffa8c1e03403814 Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Thu, 7 Oct 2010 18:38:06 +0200 Subject: [PATCH] work on the min-max binning split method --- SConstruct | 5 +- doc/compiling.tex | 2 + include/mitsuba/core/stl.h | 2 + include/mitsuba/render/gkdtree.h | 247 ++++++++++++++---- src/converter/mtsimport.cpp | 2 - src/libcore/{darwin.mm => platform_darwin.mm} | 0 src/libcore/platform_win32.cpp | 77 ++++++ src/libcore/triangle.cpp | 15 +- src/tests/test_kd.cpp | 2 +- 9 files changed, 301 insertions(+), 51 deletions(-) rename src/libcore/{darwin.mm => platform_darwin.mm} (100%) create mode 100644 src/libcore/platform_win32.cpp diff --git a/SConstruct b/SConstruct index 39788005..ed9c0cbc 100644 --- a/SConstruct +++ b/SConstruct @@ -277,12 +277,15 @@ libcore_objects = [ 'src/libcore/zstream.cpp', 'src/libcore/shvector.cpp', 'src/libcore/fresolver.cpp' ] + if sys.platform == 'darwin': coreEnv_osx = coreEnv.Clone(); coreEnv_osx['CXXFLAGS'].remove('-fstrict-aliasing'); coreEnv_osx['CXXFLAGS'].remove('-ftree-vectorize'); coreEnv_osx['CXXFLAGS'].append('-fno-strict-aliasing'); - libcore_objects += coreEnv_osx.SharedObject('src/libcore/darwin.mm') + libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_darwin.mm') +elif sys.platform == 'win32': + libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_win32.cpp') libcore = coreEnv.SharedLibrary('src/libcore/mitsuba-core', libcore_objects); diff --git a/doc/compiling.tex b/doc/compiling.tex index 11b4d4e1..cad46220 100644 --- a/doc/compiling.tex +++ b/doc/compiling.tex @@ -31,6 +31,8 @@ Some minor adjustments may have to be made to this file based on your configurat You may also set adjust certain compilation flags here: \begin{description} \item[\texttt{MTS\_DEBUG}] Enable assertions etc. Usually a good idea. +\item[\texttt{MTS\_KD\_DEBUG}] Enable additional checks in the kd-Tree. This +is quite slow and mainly useful to track down bugs when they are suspected. \item[\texttt{SINGLE\_PRECISION}] Do all computation in single precision. This is usually sufficient. \item[\texttt{DOUBLE\_PRECISION}] Do all computation in double precision. Incompatible with \texttt{MTS\_SSE}, \texttt{MTS\_HAS\_COHERENT\_RT}, and \texttt{MTS\_DEBUG\_FP}. diff --git a/include/mitsuba/core/stl.h b/include/mitsuba/core/stl.h index 2278d982..39e2a574 100644 --- a/include/mitsuba/core/stl.h +++ b/include/mitsuba/core/stl.h @@ -168,6 +168,8 @@ inline bool ubi_isnan(double f) { int classification = ::_fpclass(f); return classification == _FPCLASS_QNAN || classification == _FPCLASS_SNAN; } + +extern float nextafterf(float x, float y); #else inline bool ubi_isnan(float f) { return std::fpclassify(f) == FP_NAN; diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index 6c2d0a4f..149c333e 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -21,11 +21,9 @@ #include #include -#include #define MTS_KD_MAX_DEPTH 48 ///< Compile-time KD-tree depth limit #define MTS_KD_STATISTICS 1 ///< Collect statistics during building/traversal -#define MTS_KD_DEBUG ///< Enable serious KD-tree debugging (slow) #define MTS_KD_MINMAX_BINS 32 MTS_NAMESPACE_BEGIN @@ -38,13 +36,23 @@ MTS_NAMESPACE_BEGIN */ template class GenericKDTree : public Object { public: - /// Data type of split candidates computed by the SAH optimization routines - typedef boost::tuple split_candidate; /// Index number format (max 2^32 prims) typedef uint32_t index_type; /// Size number format typedef uint32_t size_type; + /** + * \brief Data type for split candidates computed by + * the SAH optimization routines. + * */ + struct SplitCandidate { + Float cost; + float pos; + int axis; + int numLeft, numRight; + bool planarLeft; + }; + GenericKDTree() { m_traversalCost = 15; m_intersectionCost = 20; @@ -55,15 +63,15 @@ public: * \brief Build a KD-tree over the supplied axis-aligned bounding * boxes. */ - template - void build(const AABBFunctor &aabbFunctor) { + template + void build(const Functor &functor) { /* Establish an ad-hoc depth cutoff value (Formula from PBRT) */ - m_maxDepth = std::min((int) (8 + 1.3f * log2i(aabbFunctor.getPrimitiveCount())), + m_maxDepth = std::min((int) (8 + 1.3f * log2i(functor.getPrimitiveCount())), MTS_KD_MAX_DEPTH); m_aabb.reset(); - for (size_type i=0; i timer = new Timer(); Log(EInfo, "Performing initial clustering .."); MinMaxBins bins(m_aabb); - bins.bin(aabbFunctor); - split_candidate candidate = bins.maximizeSAH(m_traversalCost, + bins.bin(functor); + SplitCandidate candidate = bins.maximizeSAH(m_traversalCost, m_intersectionCost, m_emptySpaceBonus); - - cout << "originalCost = " << aabbFunctor.getPrimitiveCount() * m_intersectionCost << endl; + + AABB leftAABB, rightAABB; + bins.partition(functor, candidate, leftAABB, rightAABB, + m_traversalCost, m_intersectionCost); + + cout << "originalCost = " << functor.getPrimitiveCount() * m_intersectionCost << endl; cout << "bestCost = " - << boost::get<0>(candidate) + << candidate.cost << ", bestPos = " - << boost::get<1>(candidate) + << candidate.pos << ", bestAxis = " - << boost::get<2>(candidate) + << candidate.axis + << ", numLeft = " + << candidate.numLeft + << ", numRight = " + << candidate.numRight << endl; Log(EInfo, "Initial clustering took %i ms", timer->getMilliseconds()); } @@ -111,23 +127,24 @@ protected: inline EdgeEvent() { } /// Create a new edge event - inline EdgeEvent(uint8_t type, Float t, index_type index) - : t(t), index(index), type(type) { - } + inline EdgeEvent(int type, float t, index_type index) + : t(t), index(index), type(type) { } - /* Plane position */ - Float t; - /* Primitive index */ + /// Plane position + float t; + /// Primitive index index_type index; - /* Event type: end/planar/start */ - uint8_t type; + /// Event type: end/planar/start + int type; }; + BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12); + typedef std::vector edge_event_vector; typedef edge_event_vector edge_event_vector3[3]; /// Edge event comparison functor - struct EdgeEventSorter : public std::binary_function { + struct EdgeEventOrdering : public std::binary_function { inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const { if (a.t != b.t) return a.t < b.t; @@ -215,20 +232,18 @@ protected: } /// Return the split plane location (assuming that this is an interior node) - inline float getSplit() const { + FINLINE float getSplit() const { return inner.split; } /// Return the split axis (assuming that this is an interior node) - inline int getAxis() const { + FINLINE int getAxis() const { return inner.combined & EInnerAxisMask; } }; BOOST_STATIC_ASSERT(sizeof(KDNode) == 8); - - /** * \brief Min-max binning as described in * "Highly Parallel Fast KD-tree Construction for Interactive @@ -249,21 +264,26 @@ protected: * a given list of primitives * \param count Number of primitives */ - template void bin( - const AABBFunctor &functor) { + template void bin( + const Functor &functor) { m_primCount = functor.getPrimitiveCount(); memset(m_minBins, 0, sizeof(size_type) * 3 * BinCount); memset(m_maxBins, 0, sizeof(size_type) * 3 * BinCount); Vector invBinSize; + for (int axis=0; axis<3; ++axis) invBinSize[axis] = 1/m_binSize[axis]; + for (size_type i=0; i= 0 && maxIdx >= 0); + m_maxBins[axis * BinCount + std::min(maxIdx, BinCount-1)]++; + m_minBins[axis * BinCount + std::min(minIdx, BinCount-1)]++; } } } @@ -274,6 +294,7 @@ protected: * processors in a SMP machine. */ MinMaxBins& operator+=(const MinMaxBins &otherBins) { + m_primCount += otherBins.primCount; for (int i=0; i<3*BinCount; ++i) { m_minBins[i] += otherBins.m_minBins[i]; m_maxBins[i] += otherBins.m_maxBins[i]; @@ -285,11 +306,13 @@ protected: * \brief Evaluate the surface area heuristic at each bin boundary * and return the maximizer for the given cost constants. */ - split_candidate maximizeSAH(Float traversalCost, + SplitCandidate maximizeSAH(Float traversalCost, Float intersectionCost, Float emptySpaceBonus) { - Float bestCost = std::numeric_limits::infinity(), bestPos = 0; + SplitCandidate candidate; Float normalization = 2.0f / m_aabb.getSurfaceArea(); - int binIdx = 0, bestAxis = -1; + int binIdx = 0, leftBin = 0; + candidate.planarLeft = false; + candidate.cost = std::numeric_limits::infinity(); for (int axis=0; axis<3; ++axis) { Vector extents = m_aabb.getExtents(); @@ -317,20 +340,152 @@ protected: if (numLeft == 0 || numRight == 0) cost *= emptySpaceBonus; - if (cost < bestCost) { - bestCost = cost; - bestAxis = axis; - bestPos = m_aabb.min[axis] + leftWidth; + if (cost < candidate.cost) { + candidate.cost = cost; + candidate.axis = axis; + candidate.numLeft = numLeft; + candidate.numRight = numRight; + leftBin = i; } binIdx++; } binIdx++; } + + Assert(candidate.cost != std::numeric_limits::infinity()); - Assert(bestCost != std::numeric_limits::infinity()); + const int axis = candidate.axis; + const float min = m_aabb.min[axis]; - return boost::make_tuple(bestCost, bestPos, bestAxis); + /* This part is ensures that the returned split plane is consistent + * with the floating point calculations done by the binning code + * in \ref bin(). Since reciprocals and various floating point + * roundoff errors are involved, simply setting + * + * candidate.pos = m_aabb.min[axis] + (leftBin+1) * m_binSize[axis]; + * + * will potentially lead to a different number primitives being + * classified to the left and right compared to the numbers stored + * in candidate.numLeft and candidate.numRight. We can't have that, + * however, since the partitioning code assumes that these + * numbers are correct. This removes the need for an extra sweep + * through the whole primitive list. + */ + float invBinSize = 1/m_binSize[axis], + split = min + (leftBin + 1) * m_binSize[axis]; + float splitNext = nextafterf(split, + std::numeric_limits::max()); + int idx = (int) ((split - min) * invBinSize); + int idxNext = (int) ((splitNext - min) * invBinSize); + + /** + * The split plane should be along the last discrete floating + * floating position, which would still be classified into + * the left bin. + */ + if (!(idx == leftBin && idxNext > leftBin)) { + float direction; + + /* First, determine the search direction */ + if (idx > leftBin) + direction = -std::numeric_limits::max(); + else + direction = std::numeric_limits::max(); + + while (true) { + split = nextafterf(split, direction); + splitNext = nextafterf(split, + std::numeric_limits::max()); + idx = (int) ((split - min) * invBinSize); + idxNext = (int) ((splitNext - min) * invBinSize); + if (idx == leftBin && idxNext > leftBin) + break; + } + } + + candidate.pos = split; + + return candidate; + } + + /** + * \brief Given a suitable split candiate, compute tight bounding + * boxes for the left and right subtrees and return associated + * primitive lists. + */ + template void partition( + const Functor &functor, SplitCandidate &split, + AABB &left, AABB &right, Float traversalCost, + Float intersectionCost) { + const float splitPos = split.pos; + const int axis = split.axis; + int numLeft = 0, numRight = 0; + + Assert(m_primCount == functor.getPrimitiveCount()); + + for (size_type i=0; i= splitPos) { + right.expandBy(aabb); + numRight++; + } else { + left.expandBy(aabb); + right.expandBy(aabb); + numLeft++; + numRight++; + } + } + + Assert(numLeft == split.numLeft); + Assert(numRight == split.numRight); + + left.max[axis] = std::min(left.max[axis], (Float) splitPos); + right.min[axis] = std::max(right.min[axis], (Float) splitPos); + + if (left.max[axis] != right.min[axis]) { + /* There is some space between the child nodes -- move the + split plane onto one of the AABBs to minimize the SAH */ + Float normalization = 2.0f / m_aabb.getSurfaceArea(); + Vector extents = m_aabb.getExtents(); + + cout << "There is some space (split=" << splitPos << ") :" << endl; + cout << m_aabb.toString() << endl; + cout << left.toString() << endl; + cout << right.toString() << endl; + + extents[axis] = left.max[axis] - m_aabb.min[axis]; + Float pLeft1 = normalization * (extents.x*extents.y + + extents.x*extents.z + extents.y*extents.z); + extents[axis] = m_aabb.max[axis] - left.max[axis]; + Float pRight1 = normalization * (extents.x*extents.y + + extents.x*extents.z + extents.y*extents.z); + Float sahCost1 = traversalCost + intersectionCost + * (pLeft1 * numLeft + pRight1 * numRight); + + extents[axis] = right.min[axis] - m_aabb.min[axis]; + Float pLeft2 = normalization * (extents.x*extents.y + + extents.x*extents.z + extents.y*extents.z); + extents[axis] = m_aabb.max[axis] - right.min[axis]; + Float pRight2 = normalization * (extents.x*extents.y + + extents.x*extents.z + extents.y*extents.z); + Float sahCost2 = traversalCost + intersectionCost + * (pLeft2 * numLeft + pRight2 * numRight); + + if (sahCost1 <= sahCost2) { + cout << "Adjusting cost from " << split.cost << " to " << sahCost1 << endl; + split.cost = sahCost1; + split.pos = left.max[axis]; + } else { + cout << "Adjusting cost from " << split.cost << " to " << sahCost2 << endl; + split.cost = sahCost2; + split.pos = right.min[axis]; + } + } } private: size_type m_minBins[3*BinCount], m_maxBins[3*BinCount]; diff --git a/src/converter/mtsimport.cpp b/src/converter/mtsimport.cpp index af0d4ce1..31126d01 100644 --- a/src/converter/mtsimport.cpp +++ b/src/converter/mtsimport.cpp @@ -172,8 +172,6 @@ int ubi_main(int argc, char **argv) { Thread::getThread()->getLogger()->setLogLevel(EInfo); - FileResolver *fileResolver = Thread::getThread()->getFileResolver(); - #if !defined(WIN32) /* Correct number parsing on some locales (e.g. ru_RU) */ setlocale(LC_NUMERIC, "C"); diff --git a/src/libcore/darwin.mm b/src/libcore/platform_darwin.mm similarity index 100% rename from src/libcore/darwin.mm rename to src/libcore/platform_darwin.mm diff --git a/src/libcore/platform_win32.cpp b/src/libcore/platform_win32.cpp new file mode 100644 index 00000000..f0c7ef6a --- /dev/null +++ b/src/libcore/platform_win32.cpp @@ -0,0 +1,77 @@ +/* s_nextafterf.c -- float version of s_nextafter.c. + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + */ + +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +#include + +typedef union { + float value; + uint32_t word; +} ieee_float_shape_type; + +#define GET_FLOAT_WORD(i,d) \ +do { \ + ieee_float_shape_type gf_u; \ + gf_u.value = (d); \ + (i) = gf_u.word; \ +} while (0) + +#define SET_FLOAT_WORD(d,i) \ +do { \ + ieee_float_shape_type sf_u; \ + sf_u.word = (i); \ + (d) = sf_u.value; \ +} while (0) + +float nextafterf(float x, float y) { + int32_t hx, hy, ix, iy; + + GET_FLOAT_WORD(hx, x); + GET_FLOAT_WORD(hy, y); + ix = hx & 0x7fffffff; /* |x| */ + iy = hy & 0x7fffffff; /* |y| */ + + /* x is nan or y is nan? */ + if ((ix > 0x7f800000) || (iy > 0x7f800000)) + return x + y; + + if (x == y) + return y; + + if (ix == 0) { /* x == 0? */ + SET_FLOAT_WORD(x, (hy & 0x80000000) | 1); + return x; + } + + if (hx >= 0) { /* x > 0 */ + if (hx > hy) { /* x > y: x -= ulp */ + hx -= 1; + } else { /* x < y: x += ulp */ + hx += 1; + } + } else { /* x < 0 */ + if (hy >= 0 || hx > hy) { /* x < y: x -= ulp */ + hx -= 1; + } else { /* x > y: x += ulp */ + hx += 1; + } + } + hy = hx & 0x7f800000; + if (hy >= 0x7f800000) { + x = x + x; /* overflow */ + return x; /* overflow */ + } + SET_FLOAT_WORD(x, hx); + return x; +} diff --git a/src/libcore/triangle.cpp b/src/libcore/triangle.cpp index 98b890a3..fc5b006d 100644 --- a/src/libcore/triangle.cpp +++ b/src/libcore/triangle.cpp @@ -136,8 +136,17 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const { Point vertices1[MAX_VERTS], vertices2[MAX_VERTS]; int nVertices = 3; - for (int i=0; i<3; ++i) +#if defined(MTS_DEBUG_KD) + AABB origAABB; +#endif + + + for (int i=0; i<3; ++i) { vertices1[i] = buffer[idx[i]].p; +#if defined(MTS_DEBUG_KD) + origAABB.expandBy(vertices1[i]); +#endif + } for (int axis=0; axis<3; ++axis) { nVertices = sutherlandHodgman(vertices1, nVertices, vertices2, axis, aabb.min[axis], true); @@ -156,7 +165,11 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const { result.clip(aabb); #endif + +#if defined(MTS_DEBUG_KD) SAssert(aabb.contains(result)); + SAssert(origAABB.contains(result)); +#endif return result; } diff --git a/src/tests/test_kd.cpp b/src/tests/test_kd.cpp index 67dc329a..f8b439ea 100644 --- a/src/tests/test_kd.cpp +++ b/src/tests/test_kd.cpp @@ -95,7 +95,7 @@ public: return m_primitiveCount; } - inline AABB operator()(uint32_t idx) const { + inline AABB getAABB(uint32_t idx) const { return m_triangles[idx].getAABB(m_vertexBuffer); }