work on the min-max binning split method

2010-10-07 18:38:06 +02:00 · 2010-10-07 18:38:06 +02:00 · effb3ebfb0
parent 76a501b291
commit effb3ebfb0
9 changed files with 301 additions and 51 deletions
--- a/5
+++ b/5
@ -277,12 +277,15 @@ libcore_objects = [
 	'src/libcore/zstream.cpp', 'src/libcore/shvector.cpp',
 	'src/libcore/fresolver.cpp'
 ]
 if sys.platform == 'darwin':
 	coreEnv_osx = coreEnv.Clone();
 	coreEnv_osx['CXXFLAGS'].remove('-fstrict-aliasing');
 	coreEnv_osx['CXXFLAGS'].remove('-ftree-vectorize');
 	coreEnv_osx['CXXFLAGS'].append('-fno-strict-aliasing');
-	libcore_objects += coreEnv_osx.SharedObject('src/libcore/darwin.mm')
+	libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_darwin.mm')
 elif sys.platform == 'win32':
 	libcore_objects += coreEnv_osx.SharedObject('src/libcore/platform_win32.cpp')
 libcore = coreEnv.SharedLibrary('src/libcore/mitsuba-core', libcore_objects);
--- a/doc/compiling.tex
+++ b/doc/compiling.tex
@ -31,6 +31,8 @@ Some minor adjustments may have to be made to this file based on your configurat
 You may also set adjust certain compilation flags here:
 \begin{description}
 \item[\texttt{MTS\_DEBUG}] Enable assertions etc. Usually a good idea.
 \item[\texttt{MTS\_KD\_DEBUG}] Enable additional checks in the kd-Tree. This
 is quite slow and mainly useful to track down bugs when they are suspected.
 \item[\texttt{SINGLE\_PRECISION}] Do all computation in single precision. This is usually sufficient.
 \item[\texttt{DOUBLE\_PRECISION}] Do all computation in double precision. Incompatible with
 \texttt{MTS\_SSE}, \texttt{MTS\_HAS\_COHERENT\_RT}, and \texttt{MTS\_DEBUG\_FP}.
--- a/include/mitsuba/core/stl.h
+++ b/include/mitsuba/core/stl.h
@ -168,6 +168,8 @@ inline bool ubi_isnan(double f) {
 	int classification = ::_fpclass(f);
 	return classification == _FPCLASS_QNAN || classification == _FPCLASS_SNAN;
 }
 extern float nextafterf(float x, float y);
 #else
 inline bool ubi_isnan(float f) {
 	return std::fpclassify(f) == FP_NAN;
--- a/include/mitsuba/render/gkdtree.h
+++ b/include/mitsuba/render/gkdtree.h
@ -21,11 +21,9 @@
 #include <mitsuba/core/timer.h>
 #include <boost/static_assert.hpp>
 #include <boost/tuple/tuple.hpp>
 #define MTS_KD_MAX_DEPTH 48   ///< Compile-time KD-tree depth limit
 #define MTS_KD_STATISTICS 1   ///< Collect statistics during building/traversal 
 #define MTS_KD_DEBUG          ///< Enable serious KD-tree debugging (slow)
 #define MTS_KD_MINMAX_BINS 32
 MTS_NAMESPACE_BEGIN
@ -38,13 +36,23 @@ MTS_NAMESPACE_BEGIN
 */
 template <typename ShapeType> class GenericKDTree : public Object {
 public:
 	/// Data type of split candidates computed by the SAH optimization routines
 	typedef boost::tuple<Float, Float, int> split_candidate;
 	/// Index number format (max 2^32 prims)
 	typedef uint32_t index_type;
 	/// Size number format
 	typedef uint32_t size_type;
 	/**
 	 * \brief Data type for split candidates computed by 
 	 * the SAH optimization routines.
 	 * */
 	struct SplitCandidate {
 		Float cost;
 		float pos;
 		int axis;
 		int numLeft, numRight;
 		bool planarLeft;
 	};
 	GenericKDTree() {
 		m_traversalCost = 15;
 		m_intersectionCost = 20;
@ -55,15 +63,15 @@ public:
 	 * \brief Build a KD-tree over the supplied axis-aligned bounding
 	 * boxes.
 	 */
-	template <typename AABBFunctor>
+	template <typename Functor>
-	void build(const AABBFunctor &aabbFunctor) {
+	void build(const Functor &functor) {
 		/* Establish an ad-hoc depth cutoff value (Formula from PBRT) */
-		m_maxDepth = std::min((int) (8 + 1.3f * log2i(aabbFunctor.getPrimitiveCount())),
+		m_maxDepth = std::min((int) (8 + 1.3f * log2i(functor.getPrimitiveCount())),
 			MTS_KD_MAX_DEPTH);
 		m_aabb.reset();
-		for (size_type i=0; i<aabbFunctor.getPrimitiveCount(); ++i)
+		for (size_type i=0; i<functor.getPrimitiveCount(); ++i)
-			m_aabb.expandBy(aabbFunctor(i));
+			m_aabb.expandBy(functor.getAABB(i));
 		Log(EDebug, "kd-tree configuration:");
 		Log(EDebug, "   Traversal cost           : %.2f", m_traversalCost);
@ -74,23 +82,31 @@ public:
 		Log(EDebug, "   Scene bounding box (max) : %s", m_aabb.max.toString().c_str());
 		Log(EInfo, "Constructing a SAH kd-tree (%i primitives) ..", 
-				aabbFunctor.getPrimitiveCount());
+				functor.getPrimitiveCount());
 		ref<Timer> timer = new Timer();
 		Log(EInfo, "Performing initial clustering ..");
 		MinMaxBins<MTS_KD_MINMAX_BINS> bins(m_aabb);
-		bins.bin(aabbFunctor);
+		bins.bin(functor);
-		split_candidate candidate = bins.maximizeSAH(m_traversalCost,
+		SplitCandidate candidate = bins.maximizeSAH(m_traversalCost,
 			m_intersectionCost, m_emptySpaceBonus);
-		cout << "originalCost = " << aabbFunctor.getPrimitiveCount() * m_intersectionCost << endl;
+		AABB leftAABB, rightAABB;
 		bins.partition(functor, candidate, leftAABB, rightAABB,
 				m_traversalCost, m_intersectionCost);
 		cout << "originalCost = " << functor.getPrimitiveCount() * m_intersectionCost << endl;
 		cout << "bestCost = " 
-			<< boost::get<0>(candidate) 
+			<< candidate.cost
 			<< ", bestPos = "
-			<< boost::get<1>(candidate) 
+			<< candidate.pos
 			<< ", bestAxis = "
-			<< boost::get<2>(candidate) 
+			<< candidate.axis
 			<< ", numLeft = "
 			<< candidate.numLeft
 			<< ", numRight = "
 			<< candidate.numRight
 			<< endl;
 		Log(EInfo, "Initial clustering took %i ms", timer->getMilliseconds());
 	}
@ -111,23 +127,24 @@ protected:
 		inline EdgeEvent() { }
 		/// Create a new edge event
-		inline EdgeEvent(uint8_t type, Float t, index_type index)
+		inline EdgeEvent(int type, float t, index_type index)
-		 : t(t), index(index), type(type) {
+		 : t(t), index(index), type(type) { }
 		 }
-		/* Plane position */
+		/// Plane position
-		Float t;
+		float t;
-		/* Primitive index */
+		/// Primitive index
 		index_type index;
-		/* Event type: end/planar/start */
+		/// Event type: end/planar/start
-		uint8_t type;
+		int type;
 	};
 	BOOST_STATIC_ASSERT(sizeof(EdgeEvent) == 12);
 	typedef std::vector<EdgeEvent> edge_event_vector;
 	typedef edge_event_vector edge_event_vector3[3];
 	/// Edge event comparison functor
-	struct EdgeEventSorter : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
+	struct EdgeEventOrdering : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
 		inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
 			if (a.t != b.t)
 				return a.t < b.t;
@ -215,20 +232,18 @@ protected:
 		}
 		/// Return the split plane location (assuming that this is an interior node)
-		inline float getSplit() const {
+		FINLINE float getSplit() const {
 			return inner.split;
 		}
 		/// Return the split axis (assuming that this is an interior node)
-		inline int getAxis() const {
+		FINLINE int getAxis() const {
 			return inner.combined & EInnerAxisMask;
 		}
 	};
 	BOOST_STATIC_ASSERT(sizeof(KDNode) == 8);
 	/**
 	 * \brief Min-max binning as described in
 	 * "Highly Parallel Fast KD-tree Construction for Interactive
@ -249,21 +264,26 @@ protected:
 		 *     a given list of primitives
 		 * \param count Number of primitives
 		 */
-		template <typename AABBFunctor> void bin(
+		template <typename Functor> void bin(
-				const AABBFunctor &functor) {
+				const Functor &functor) {
 			m_primCount = functor.getPrimitiveCount();
 			memset(m_minBins, 0, sizeof(size_type) * 3 * BinCount);
 			memset(m_maxBins, 0, sizeof(size_type) * 3 * BinCount);
 			Vector invBinSize;
 			for (int axis=0; axis<3; ++axis) 
 				invBinSize[axis] = 1/m_binSize[axis];
 			for (size_type i=0; i<m_primCount; ++i) {
-				const AABB aabb = functor(i);
+				const AABB aabb = functor.getAABB(i);
 				for (int axis=0; axis<3; ++axis) {
-					int minIdx = (aabb.min[axis] - aabb.min[axis]) * invBinSize[axis];
+					int minIdx = (int) ((aabb.min[axis] - m_aabb.min[axis]) 
-					int maxIdx = (aabb.max[axis] - aabb.min[axis]) * invBinSize[axis];
+							* invBinSize[axis]);
-					m_maxBins[axis * BinCount + std::min(std::max(maxIdx, 0), BinCount-1)]++;
+					int maxIdx = (int) ((aabb.max[axis] - m_aabb.min[axis]) 
-					m_minBins[axis * BinCount + std::min(std::max(minIdx, 0), BinCount-1)]++;
+							* invBinSize[axis]);
 					Assert(minIdx >= 0 && maxIdx >= 0);
 					m_maxBins[axis * BinCount + std::min(maxIdx, BinCount-1)]++;
 					m_minBins[axis * BinCount + std::min(minIdx, BinCount-1)]++;
 				}
 			}
 		}
@ -274,6 +294,7 @@ protected:
 		 *    processors in a SMP machine.
 		 */
 		MinMaxBins& operator+=(const MinMaxBins &otherBins) {
 			m_primCount += otherBins.primCount;
 			for (int i=0; i<3*BinCount; ++i) {
 				m_minBins[i] += otherBins.m_minBins[i];
 				m_maxBins[i] += otherBins.m_maxBins[i];
@ -285,11 +306,13 @@ protected:
 		 * \brief Evaluate the surface area heuristic at each bin boundary
 		 * and return the maximizer for the given cost constants.
 		 */
-		split_candidate maximizeSAH(Float traversalCost,
+		SplitCandidate maximizeSAH(Float traversalCost,
 				Float intersectionCost, Float emptySpaceBonus) {
-			Float bestCost = std::numeric_limits<Float>::infinity(), bestPos = 0;
+			SplitCandidate candidate;
 			Float normalization = 2.0f / m_aabb.getSurfaceArea();
-			int binIdx = 0, bestAxis = -1;
+			int binIdx = 0, leftBin = 0;
 			candidate.planarLeft = false;
 			candidate.cost = std::numeric_limits<Float>::infinity();
 			for (int axis=0; axis<3; ++axis) {
 				Vector extents = m_aabb.getExtents();
@ -317,10 +340,12 @@ protected:
 					if (numLeft == 0 || numRight == 0)
 						cost *= emptySpaceBonus;
-					if (cost < bestCost) {
+					if (cost < candidate.cost) {
-						bestCost = cost;
+						candidate.cost = cost;
-						bestAxis = axis;
+						candidate.axis = axis;
-						bestPos = m_aabb.min[axis] + leftWidth;
+						candidate.numLeft = numLeft;
 						candidate.numRight = numRight;
 						leftBin = i;
 					}
 					binIdx++;
@ -328,9 +353,139 @@ protected:
 				binIdx++;
 			}
-			Assert(bestCost != std::numeric_limits<Float>::infinity());
+			Assert(candidate.cost != std::numeric_limits<Float>::infinity());
-			return boost::make_tuple(bestCost, bestPos, bestAxis);
+			const int axis = candidate.axis;
 			const float min = m_aabb.min[axis];
 			/* This part is ensures that the returned split plane is consistent
 			 * with the floating point calculations done by the binning code 
 			 * in \ref bin(). Since reciprocals and various floating point 
 			 * roundoff errors are involved, simply setting
 			 *
 			 * candidate.pos = m_aabb.min[axis] + (leftBin+1) * m_binSize[axis];
 			 *
 			 * will potentially lead to a different number primitives being
 			 * classified to the left and right compared to the numbers stored
 			 * in candidate.numLeft and candidate.numRight. We can't have that,
 			 * however, since the partitioning code assumes that these 
 			 * numbers are correct. This removes the need for an extra sweep
 			 * through the whole primitive list.
 			 */
 			float invBinSize = 1/m_binSize[axis],
 				  split = min + (leftBin + 1) * m_binSize[axis];
 			float splitNext = nextafterf(split, 
 				  std::numeric_limits<float>::max());
 			int idx     = (int) ((split - min) * invBinSize);
 			int idxNext = (int) ((splitNext - min) * invBinSize);
 			/**
 			 * The split plane should be along the last discrete floating
 			 * floating position, which would still be classified into
 			 * the left bin.
 			 */
 			if (!(idx == leftBin && idxNext > leftBin)) {
 				float direction;
 				/* First, determine the search direction */
 				if (idx > leftBin)
 					direction = -std::numeric_limits<float>::max();
 				else
 					direction = std::numeric_limits<float>::max();
 				while (true) {
 					split     = nextafterf(split, direction);
 					splitNext = nextafterf(split, 
 							std::numeric_limits<float>::max());
 					idx     = (int) ((split - min) * invBinSize);
 					idxNext = (int) ((splitNext - min) * invBinSize);
 					if (idx == leftBin && idxNext > leftBin)
 						break;
 				}
 			}
 			candidate.pos = split;
 			return candidate;
 		}
 		/**
 		 * \brief Given a suitable split candiate, compute tight bounding
 		 * boxes for the left and right subtrees and return associated
 		 * primitive lists.
 		 */
 		template <typename Functor> void partition(
 				const Functor &functor, SplitCandidate &split, 
 				AABB &left, AABB &right, Float traversalCost,
 				Float intersectionCost) {
 			const float splitPos = split.pos;
 			const int axis = split.axis;
 			int numLeft = 0, numRight = 0;
 			Assert(m_primCount == functor.getPrimitiveCount());
 			for (size_type i=0; i<m_primCount; ++i) {
 				const AABB aabb = functor.getAABB(i);
 				if (aabb.max[axis] <= splitPos) {
 					left.expandBy(aabb);
 					numLeft++;
 				} else if (aabb.min[axis] >= splitPos) {
 					right.expandBy(aabb);
 					numRight++;
 				} else {
 					left.expandBy(aabb);
 					right.expandBy(aabb);
 					numLeft++;
 					numRight++;
 				}
 			}
 			Assert(numLeft == split.numLeft);
 			Assert(numRight == split.numRight);
 			left.max[axis] = std::min(left.max[axis], (Float) splitPos);
 			right.min[axis] = std::max(right.min[axis], (Float) splitPos);
 			if (left.max[axis] != right.min[axis]) {
 				/* There is some space between the child nodes -- move the
 				   split plane onto one of the AABBs to minimize the SAH */
 				Float normalization = 2.0f / m_aabb.getSurfaceArea();
 				Vector extents = m_aabb.getExtents();
 				cout << "There is some space (split=" << splitPos << ") :" << endl;
 				cout << m_aabb.toString() << endl;
 				cout << left.toString() << endl;
 				cout << right.toString() << endl;
 				extents[axis] = left.max[axis] - m_aabb.min[axis];
 				Float pLeft1 = normalization * (extents.x*extents.y 
 							+ extents.x*extents.z + extents.y*extents.z);
 				extents[axis] = m_aabb.max[axis] - left.max[axis];
 				Float pRight1 = normalization * (extents.x*extents.y 
 							+ extents.x*extents.z + extents.y*extents.z);
 				Float sahCost1 = traversalCost + intersectionCost 
 					* (pLeft1 * numLeft + pRight1 * numRight);
 				extents[axis] = right.min[axis] - m_aabb.min[axis];
 				Float pLeft2 = normalization * (extents.x*extents.y 
 							+ extents.x*extents.z + extents.y*extents.z);
 				extents[axis] = m_aabb.max[axis] - right.min[axis];
 				Float pRight2 = normalization * (extents.x*extents.y 
 							+ extents.x*extents.z + extents.y*extents.z);
 				Float sahCost2 = traversalCost + intersectionCost 
 					* (pLeft2 * numLeft + pRight2 * numRight);
 				if (sahCost1 <= sahCost2) {
 					cout << "Adjusting cost from " << split.cost << " to " << sahCost1 << endl;
 					split.cost = sahCost1;
 					split.pos = left.max[axis];
 				} else {
 					cout << "Adjusting cost from " << split.cost << " to " << sahCost2 << endl;
 					split.cost = sahCost2;
 					split.pos = right.min[axis];
 				}
 			}
 		}
 	private:
 		size_type m_minBins[3*BinCount], m_maxBins[3*BinCount];
--- a/src/converter/mtsimport.cpp
+++ b/src/converter/mtsimport.cpp
@ -172,8 +172,6 @@ int ubi_main(int argc, char **argv) {
 	Thread::getThread()->getLogger()->setLogLevel(EInfo);
 	FileResolver *fileResolver = Thread::getThread()->getFileResolver();
 #if !defined(WIN32)
 	/* Correct number parsing on some locales (e.g. ru_RU) */
 	setlocale(LC_NUMERIC, "C");
--- a/src/libcore/platform_darwin.mm
+++ b/src/libcore/platform_darwin.mm
--- a/src/libcore/platform_win32.cpp
+++ b/src/libcore/platform_win32.cpp
@ -0,0 +1,77 @@
 /* s_nextafterf.c -- float version of s_nextafter.c.
 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
 */
 /*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
 #include <mitsuba/mitsuba.h>
 typedef union {
  float value;
  uint32_t word;
 } ieee_float_shape_type;
 #define GET_FLOAT_WORD(i,d) \
 do { \
  ieee_float_shape_type gf_u; \
  gf_u.value = (d); \
  (i) = gf_u.word; \
 } while (0)
 #define SET_FLOAT_WORD(d,i) \
 do { \
  ieee_float_shape_type sf_u; \
  sf_u.word = (i); \
  (d) = sf_u.value; \
 } while (0)
 float nextafterf(float x, float y) {
 	int32_t hx, hy, ix, iy;
 	GET_FLOAT_WORD(hx, x);
 	GET_FLOAT_WORD(hy, y);
 	ix = hx & 0x7fffffff;		/* |x| */
 	iy = hy & 0x7fffffff;		/* |y| */
 	/* x is nan or y is nan? */
 	if ((ix > 0x7f800000) || (iy > 0x7f800000))
 		return x + y;
 	if (x == y)
 		return y;
 	if (ix == 0) { /* x == 0? */
 		SET_FLOAT_WORD(x, (hy & 0x80000000) | 1);
 		return x;
 	}
 	if (hx >= 0) { /* x > 0 */
 		if (hx > hy) { /* x > y: x -= ulp */
 			hx -= 1;
 		} else { /* x < y: x += ulp */
 			hx += 1;
 		}
 	} else { /* x < 0 */
 		if (hy >= 0 || hx > hy) { /* x < y: x -= ulp */
 			hx -= 1;
 		} else { /* x > y: x += ulp */
 			hx += 1;
 		}
 	}
 	hy = hx & 0x7f800000;
 	if (hy >= 0x7f800000) {
 		x = x + x; /* overflow */
 		return x; /* overflow */
 	}
 	SET_FLOAT_WORD(x, hx);
 	return x;
 }
--- a/src/libcore/triangle.cpp
+++ b/src/libcore/triangle.cpp
@ -136,8 +136,17 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const {
 	Point vertices1[MAX_VERTS], vertices2[MAX_VERTS];
 	int nVertices = 3;
-	for (int i=0; i<3; ++i)
+#if defined(MTS_DEBUG_KD)
 	AABB origAABB;
 #endif
 	for (int i=0; i<3; ++i) {
 		vertices1[i] = buffer[idx[i]].p;
 #if defined(MTS_DEBUG_KD)
 		origAABB.expandBy(vertices1[i]);
 #endif
 	}
 	for (int axis=0; axis<3; ++axis) {
 		nVertices = sutherlandHodgman(vertices1, nVertices, vertices2, axis, aabb.min[axis], true);
@ -156,7 +165,11 @@ AABB Triangle::getClippedAABB(const Vertex *buffer, const AABB &aabb) const {
 	result.clip(aabb);
 #endif
 #if defined(MTS_DEBUG_KD)
 	SAssert(aabb.contains(result));
 	SAssert(origAABB.contains(result));
 #endif
 	return result;
 }
--- a/src/tests/test_kd.cpp
+++ b/src/tests/test_kd.cpp
@ -95,7 +95,7 @@ public:
 				return m_primitiveCount;
 			}
-			inline AABB operator()(uint32_t idx) const {
+			inline AABB getAABB(uint32_t idx) const {
 				return m_triangles[idx].getAABB(m_vertexBuffer);
 			}