From b8480a3d6087afdad8cd25b970c9a8d33339600a Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Wed, 5 Nov 2014 19:39:53 +0100 Subject: [PATCH] Fixes for several subtle kd-tree construction issues - fixed several bugs that occasionally broke double-precision builds - rewrote min/max binning code to make it tolerant to precision issues. Previously, it gave up in such cases, which lead to a slow build with the O(n log n) method. --- include/mitsuba/core/math.h | 39 ++++-- include/mitsuba/render/gkdtree.h | 206 ++++++++----------------------- src/libcore/CMakeLists.txt | 2 +- src/libcore/SConscript | 1 - src/libcore/platform_win32.cpp | 177 -------------------------- src/libcore/triangle.cpp | 25 +--- 6 files changed, 86 insertions(+), 364 deletions(-) delete mode 100644 src/libcore/platform_win32.cpp diff --git a/include/mitsuba/core/math.h b/include/mitsuba/core/math.h index 539a4214..1215052e 100644 --- a/include/mitsuba/core/math.h +++ b/include/mitsuba/core/math.h @@ -276,15 +276,40 @@ inline size_t roundToPowerOfTwo(size_t value) { return copysign((double) 1.0, value); #endif } + + /// Cast to single precision and round up if not exactly representable (passthrough) + inline float castflt_up(float val) { return val; } + + /// Cast to single precision and round up if not exactly representable + inline float castflt_up(double val) { + union { + float a; + int b; + }; + + a = (float) val; + if ((double) a < val) + b += a < 0 ? -1 : 1; + return a; + } + + /// Cast to single precision and round down if not exactly representable (passthrough) + inline float castflt_down(float val) { return val; } + + /// Cast to single precision and round down if not exactly representable + inline float castflt_down(double val) { + union { + float a; + int b; + }; + + a = (float) val; + if ((double) a > val) + b += a > 0 ? -1 : 1; + return a; + } }; /* namespace math */ MTS_NAMESPACE_END -#if defined(_MSC_VER) && _MSC_VER < 1800 -extern "C" { - extern MTS_EXPORT_CORE float nextafterf(float x, float y); - extern MTS_EXPORT_CORE double nextafter(double x, double y); -}; -#endif - #endif /* __MITSUBA_CORE_MATH_H_ */ diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index 5fda0753..20f3f8c9 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -59,6 +59,7 @@ MTS_NAMESPACE_BEGIN + /** * \brief Special "ordered" memory allocator * @@ -997,10 +998,8 @@ protected: #if defined(DOUBLE_PRECISION) for (int i=0; i<3; ++i) { - aabb.min[i] = nextafterf((float) aabb.min[i], - -std::numeric_limits::max()); - aabb.max[i] = nextafterf((float) aabb.max[i], - std::numeric_limits::max()); + aabb.min[i] = math::castflt_down(aabb.min[i]); + aabb.max[i] = math::castflt_up(aabb.max[i]); } #endif @@ -1346,10 +1345,11 @@ protected: int axis; SizeType numLeft, numRight; bool planarLeft; + int leftBin; inline SplitCandidate() : cost(std::numeric_limits::infinity()), - pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false) { + pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false), leftBin(-1) { } std::string toString() const { @@ -1360,6 +1360,7 @@ protected: << " axis=" << axis << "," << endl << " numLeft=" << numLeft << "," << endl << " numRight=" << numRight << "," << endl + << " leftBin=" << leftBin << "," << endl << " planarLeft=" << (planarLeft ? "yes" : "no") << endl << "]"; return oss.str(); @@ -1561,7 +1562,8 @@ protected: } for (int axis=0; axis::max()); - leftNodeAABB_enlarged.max[axis0] = rightNodeAABB_enlarged.max[axis0] = - nextafterf((float) nodeAABB.max[axis0], std::numeric_limits::max()); - leftNodeAABB_enlarged.min[axis1] = rightNodeAABB_enlarged.min[axis1] = - nextafterf((float) nodeAABB.min[axis1], -std::numeric_limits::max()); - leftNodeAABB_enlarged.max[axis1] = rightNodeAABB_enlarged.max[axis1] = - nextafterf((float) nodeAABB.max[axis1], std::numeric_limits::max()); - #endif - for (EdgeEvent *event = eventStart; eventindex); @@ -2227,16 +2214,16 @@ protected: generate new events for each side */ const IndexType index = event->index; - AABBType clippedLeft = cast()->getClippedAABB(index, leftNodeAABB_enlarged); - AABBType clippedRight = cast()->getClippedAABB(index, rightNodeAABB_enlarged); + AABBType clippedLeft = cast()->getClippedAABB(index, leftNodeAABB); + AABBType clippedRight = cast()->getClippedAABB(index, rightNodeAABB); - KDAssert(leftNodeAABB_enlarged.contains(clippedLeft)); - KDAssert(rightNodeAABB_enlarged.contains(clippedRight)); + KDAssert(leftNodeAABB.contains(clippedLeft)); + KDAssert(rightNodeAABB.contains(clippedRight)); if (clippedLeft.isValid() && clippedLeft.getSurfaceArea() > 0) { for (int axis=0; axis 0) { for (int axis=0; axisgetAABB(indices[i]); for (int axis=0; axis::infinity()); - - const int axis = candidate.axis; - const Float min = m_aabb.min[axis]; - - /* The following part may seem a bit paranoid. It is ensures that the - * returned split plane is consistent with the floating point calculations - * done by the binning code in \ref bin(). Since reciprocals and - * various floating point roundoff errors are involved, simply setting - * - * candidate.pos = m_aabb.min[axis] + (leftBin+1) * m_binSize[axis]; - * - * can potentially lead to a slightly different number primitives being - * classified to the left and right if we were to do check each - * primitive against this split position. We can't have that, however, - * since the partitioning code assumes that these numbers are correct. - * This lets it avoid doing another costly sweep, hence all the - * floating point madness below. - */ - Float invBinSize = m_invBinSize[axis]; - float split = (float) (min + (leftBin + 1) * m_binSize[axis]); - float splitNext = nextafterf(split, - std::numeric_limits::max()); - int idx = (int) ((split - min) * invBinSize); - int idxNext = (int) ((splitNext - min) * invBinSize); - - /** - * The split plane should pass through the last discrete floating - * floating value, which would still be classified into - * the left bin. If this is not computed correctly, do binary - * search. - */ - if (!(idx == leftBin && idxNext == leftBin+1)) { - float left = (float) m_aabb.min[axis]; - float right = (float) m_aabb.max[axis]; - int it = 0; - while (true) { - split = left + (right-left)/2; - splitNext = nextafterf(split, - std::numeric_limits::max()); - idx = (int) ((split - min) * invBinSize); - idxNext = (int) ((splitNext - min) * invBinSize); - - if (idx == leftBin && idxNext == leftBin+1) { - /* Got it! */ - break; - } else if (std::abs(idx-idxNext) > 1 || ++it > 50) { - /* Insufficient floating point resolution - -> a leaf will be created. */ - candidate.cost = std::numeric_limits::infinity(); - break; - } - - if (idx <= leftBin) - left = split; - else - right = split; - } - } - - if (split <= m_aabb.min[axis] || split >= m_aabb.max[axis]) { - /* Insufficient floating point resolution - -> a leaf will be created. */ - candidate.cost = std::numeric_limits::infinity(); - } - - candidate.pos = split; + KDAssert(candidate.cost != std::numeric_limits::infinity() && + candidate.leftBin >= 0); return candidate; } @@ -2594,10 +2518,9 @@ protected: BuildContext &ctx, const Derived *derived, IndexType *primIndices, SplitCandidate &split, bool isLeftChild, Float traversalCost, Float queryCost) { - const float splitPos = split.pos; - const int axis = split.axis; SizeType numLeft = 0, numRight = 0; AABBType leftBounds, rightBounds; + const int axis = split.axis; IndexType *leftIndices, *rightIndices; if (isLeftChild) { @@ -2613,12 +2536,14 @@ protected: for (SizeType i=0; igetAABB(primIndex); + int startIdx = computeIndex(math::castflt_down(aabb.min[axis]), axis); + int endIdx = computeIndex(math::castflt_up (aabb.max[axis]), axis); - if (aabb.max[axis] <= splitPos) { + if (endIdx <= split.leftBin) { KDAssert(numLeft < split.numLeft); leftBounds.expandBy(aabb); leftIndices[numLeft++] = primIndex; - } else if (aabb.min[axis] > splitPos) { + } else if (startIdx > split.leftBin) { KDAssert(numRight < split.numRight); rightBounds.expandBy(aabb); rightIndices[numRight++] = primIndex; @@ -2631,9 +2556,11 @@ protected: rightIndices[numRight++] = primIndex; } } - leftBounds.clip(m_aabb); rightBounds.clip(m_aabb); + split.pos = m_min[axis] + m_binSize[axis] * (split.leftBin + 1); + leftBounds.max[axis] = std::min(leftBounds.max[axis], (Float) split.pos); + rightBounds.min[axis] = std::max(rightBounds.min[axis], (Float) split.pos); KDAssert(numLeft == split.numLeft); KDAssert(numRight == split.numRight); @@ -2644,40 +2571,6 @@ protected: else ctx.rightAlloc.shrinkAllocation(rightIndices, split.numRight); - leftBounds.max[axis] = std::min(leftBounds.max[axis], (Float) splitPos); - rightBounds.min[axis] = std::max(rightBounds.min[axis], (Float) splitPos); - - if (leftBounds.max[axis] != rightBounds.min[axis]) { - /* There is some space between the child nodes -- move - the split plane onto one of the AABBs so that the - heuristic cost is minimized */ - TreeConstructionHeuristic tch(m_aabb); - - std::pair prob1 = tch(axis, - leftBounds.max[axis] - m_aabb.min[axis], - m_aabb.max[axis] - leftBounds.max[axis]); - std::pair prob2 = tch(axis, - rightBounds.min[axis] - m_aabb.min[axis], - m_aabb.max[axis] - rightBounds.min[axis]); - Float cost1 = traversalCost + queryCost - * (prob1.first * numLeft + prob1.second * numRight); - Float cost2 = traversalCost + queryCost - * (prob2.first * numLeft + prob2.second * numRight); - - if (cost1 <= cost2) { - split.cost = cost1; - split.pos = (float) leftBounds.max[axis]; - } else { - split.cost = cost2; - split.pos = (float) rightBounds.min[axis]; - } - - leftBounds.max[axis] = std::min(leftBounds.max[axis], - (Float) split.pos); - rightBounds.min[axis] = std::max(rightBounds.min[axis], - (Float) split.pos); - } - return Partition(leftBounds, leftIndices, rightBounds, rightIndices); } @@ -2686,9 +2579,10 @@ protected: SizeType *m_maxBins; SizeType m_primCount; int m_binCount; + float m_min[PointType::dim]; + float m_binSize[PointType::dim]; + float m_invBinSize[PointType::dim]; AABBType m_aabb; - VectorType m_binSize; - VectorType m_invBinSize; }; protected: diff --git a/src/libcore/CMakeLists.txt b/src/libcore/CMakeLists.txt index 11524a73..1c99503e 100644 --- a/src/libcore/CMakeLists.txt +++ b/src/libcore/CMakeLists.txt @@ -135,7 +135,7 @@ if (APPLE) list(APPEND SRCS platform_darwin.mm) elseif (WIN32) list(APPEND HDRS ${INCLUDE_DIR}/getopt.h) - list(APPEND SRCS getopt.c platform_win32.cpp) + list(APPEND SRCS getopt.c) endif() include_directories( diff --git a/src/libcore/SConscript b/src/libcore/SConscript index b6a741fa..7da954c6 100644 --- a/src/libcore/SConscript +++ b/src/libcore/SConscript @@ -55,7 +55,6 @@ if sys.platform == 'darwin': libcore_objects += coreEnv_osx.SharedObject('platform_darwin.mm') elif sys.platform == 'win32': libcore_objects += coreEnv.SharedObject('getopt.c') - libcore_objects += coreEnv.SharedObject('platform_win32.cpp') libcore = coreEnv.SharedLibrary('mitsuba-core', libcore_objects) diff --git a/src/libcore/platform_win32.cpp b/src/libcore/platform_win32.cpp deleted file mode 100644 index 3ee90ad6..00000000 --- a/src/libcore/platform_win32.cpp +++ /dev/null @@ -1,177 +0,0 @@ -/* s_nextafterf.c -- float version of s_nextafter.c. - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - */ - -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -#include - -#if defined(_MSC_VER) && _MSC_VER < 1800 - -/* Use strict IEEE 754 floating point computations - for the following code */ -#pragma float_control(precise, on) - -typedef union { - float value; - uint32_t word; -} ieee_float_shape_type; - -typedef union { - double value; - struct { - uint32_t lsw; - uint32_t msw; - } parts; -} ieee_double_shape_type; - -#define GET_FLOAT_WORD(i,d) \ -do { \ - ieee_float_shape_type gf_u; \ - gf_u.value = (d); \ - (i) = gf_u.word; \ -} while (0) - -#define SET_FLOAT_WORD(d,i) \ -do { \ - ieee_float_shape_type sf_u; \ - sf_u.word = (i); \ - (d) = sf_u.value; \ -} while (0) - - -#define EXTRACT_WORDS(ix0,ix1,d) \ -do { \ - ieee_double_shape_type ew_u; \ - ew_u.value = (d); \ - (ix0) = ew_u.parts.msw; \ - (ix1) = ew_u.parts.lsw; \ -} while (0) - - -#define INSERT_WORDS(d,ix0,ix1) \ -do { \ - ieee_double_shape_type iw_u; \ - iw_u.parts.msw = (ix0); \ - iw_u.parts.lsw = (ix1); \ - (d) = iw_u.value; \ -} while (0) - -float nextafterf(float x, float y) { - int32_t hx, hy, ix, iy; - - GET_FLOAT_WORD(hx, x); - GET_FLOAT_WORD(hy, y); - ix = hx & 0x7fffffff; /* |x| */ - iy = hy & 0x7fffffff; /* |y| */ - - /* x is nan or y is nan? */ - if ((ix > 0x7f800000) || (iy > 0x7f800000)) - return x + y; - - if (x == y) - return y; - - if (ix == 0) { /* x == 0? */ - SET_FLOAT_WORD(x, (hy & 0x80000000) | 1); - return x; - } - - if (hx >= 0) { /* x > 0 */ - if (hx > hy) { /* x > y: x -= ulp */ - hx -= 1; - } else { /* x < y: x += ulp */ - hx += 1; - } - } else { /* x < 0 */ - if (hy >= 0 || hx > hy) { /* x < y: x -= ulp */ - hx -= 1; - } else { /* x > y: x += ulp */ - hx += 1; - } - } - hy = hx & 0x7f800000; - if (hy >= 0x7f800000) { - x = x + x; /* overflow */ - return x; /* overflow */ - } - SET_FLOAT_WORD(x, hx); - return x; -} - -double nextafter(double x, double y) { - int32_t hx,hy,ix,iy; - uint32_t lx,ly; - - EXTRACT_WORDS(hx,lx,x); - EXTRACT_WORDS(hy,ly,y); - - ix = hx & 0x7fffffff; /* |x| */ - iy = hy & 0x7fffffff; /* |y| */ - - if (((ix>=0x7ff00000) && ((ix-0x7ff00000)|lx) != 0) || /* x is nan */ - ((iy>=0x7ff00000) && ((iy-0x7ff00000)|ly) != 0)) /* y is nan */ - return x+y; - - if (x==y) - return x; /* x=y, return x */ - - if ((ix|lx) == 0) { /* x == 0 */ - INSERT_WORDS(x,hy & 0x80000000,1); /* return +-minsubnormal */ - y = x*x; - if (y==x) - return y; - else - return x; /* raise underflow flag */ - } - - if (hx>=0) { /* x > 0 */ - if (hx>hy || ((hx==hy) && (lx>ly))) { /* x > y, x -= ulp */ - if (lx==0) - hx -= 1; - lx -= 1; - } else { /* x < y, x += ulp */ - lx += 1; - if (lx==0) - hx += 1; - } - } else { /* x < 0 */ - if (hy>=0 || hx>hy || ((hx==hy) && (lx>ly))) { /* x < y, x -= ulp */ - if (lx==0) - hx -= 1; - lx -= 1; - } else { /* x > y, x += ulp */ - lx += 1; - if (lx==0) - hx += 1; - } - } - - hy = hx & 0x7ff00000; - - if (hy >= 0x7ff00000) - return x+x; /* overflow */ - - if (hy < 0x00100000) { /* underflow */ - y = x*x; - if (y!=x) { - /* raise underflow flag */ - INSERT_WORDS(y,hx,lx); - return y; - } - } - - INSERT_WORDS(x,hx,lx); - - return x; -} -#endif diff --git a/src/libcore/triangle.cpp b/src/libcore/triangle.cpp index c1ec44ae..0fdd282d 100644 --- a/src/libcore/triangle.cpp +++ b/src/libcore/triangle.cpp @@ -136,28 +136,9 @@ AABB Triangle::getClippedAABB(const Point *positions, const AABB &aabb) const { AABB result; for (int i=0; i::infinity()); - } else if (pos_f > pos_d) { - /* Float value is too large */ - pos_roundedUp = pos_f; - pos_roundedDown = nextafterf(pos_f, - -std::numeric_limits::infinity()); - } else { - /* Double value is exactly representable */ - pos_roundedDown = pos_roundedUp = pos_f; - } - - result.min[j] = std::min(result.min[j], pos_roundedDown); - result.max[j] = std::max(result.max[j], pos_roundedUp); + double pos = vertices1[i][j]; + result.min[j] = std::min(result.min[j], (Float) math::castflt_down(pos)); + result.max[j] = std::max(result.max[j], (Float) math::castflt_up(pos)); } } result.clip(aabb);