From 577a732cd0a69998380d9b665383225ea2f66465 Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Sun, 10 Oct 2010 12:05:32 +0200 Subject: [PATCH] SAH costs seem more reasonable now --- include/mitsuba/render/gkdtree.h | 160 +++++++++++++++++++++++++------ 1 file changed, 129 insertions(+), 31 deletions(-) diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index 64bc6594..a259baed 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -23,10 +23,10 @@ #include #include -#define MTS_KD_MAX_DEPTH 48 ///< Compile-time KD-tree depth limit -#define MTS_KD_STATISTICS 1 ///< Collect statistics during building/traversal -#define MTS_KD_MINMAX_BINS 32 ///< Min-max bin count -#define MTS_KD_MIN_ALLOC 128 ///< Allocate memory in 128 KB chunks +#define MTS_KD_MAX_DEPTH 48 ///< Compile-time KD-tree depth limit +#define MTS_KD_STATISTICS 1 ///< Collect statistics during building/traversal +#define MTS_KD_MINMAX_BINS 1024 ///< Min-max bin count +#define MTS_KD_MIN_ALLOC 128 ///< Allocate memory in 128 KB chunks MTS_NAMESPACE_BEGIN @@ -53,11 +53,20 @@ public: /** * \brief Release all memory used by the allocator */ - inline void cleanup() { + void cleanup() { for (std::vector::iterator it = m_chunks.begin(); it != m_chunks.end(); ++it) freeAligned((*it).start); } + + /** + * \brief Merge the chunks of another allocator into this one + */ + void merge(const OrderedChunkAllocator &other) { + m_chunks.reserve(m_chunks.size() + other.m_chunks.size()); + m_chunks.insert(m_chunks.end(), other.m_chunks.begin(), + other.m_chunks.end()); + } /** * \brief Request a block of memory from the allocator @@ -65,7 +74,7 @@ public: * Walks through the list of chunks to find one with enough * free memory. If no chunk could be found, a new one is created. */ - template T *allocate(size_t size) { + template T * __restrict__ allocate(size_t size) { size *= sizeof(T); for (std::vector::iterator it = m_chunks.begin(); it != m_chunks.end(); ++it) { @@ -118,7 +127,7 @@ public: return; } } - SLog(EError, "OrderedChunkAllocator: Internal error in shrinkLast"); + SLog(EError, "OrderedChunkAllocator: Internal error in shrinkAllocation"); } inline size_t getChunkCount() const { return m_chunks.size(); } @@ -188,14 +197,12 @@ public: } inline void set(uint32_t index, uint8_t value) { - SAssert((index >> 2) < m_bufferSize); uint8_t *ptr = m_buffer + (index >> 2); uint8_t shift = (index & 3) << 1; *ptr = (*ptr & ~(3 << shift)) | (value << shift); } inline uint8_t get(uint32_t index) const { - SAssert((index >> 2) < m_bufferSize); uint8_t *ptr = m_buffer + (index >> 2); uint8_t shift = (index & 3) << 1; return (*ptr >> shift) & 3; @@ -218,6 +225,7 @@ template class GenericKDTree : public Object { protected: struct KDNode; struct EdgeEvent; + struct EdgeEventOrdering; public: /// Index number format (max 2^32 prims) @@ -238,9 +246,9 @@ public: m_stopPrims = 2; m_maxBadRefines = 3; m_exactDepth = 1; - m_maxDepth = 0; + m_maxDepth = 7; } - + /** * \brief Build a KD-tree over supplied geometry */ @@ -253,7 +261,8 @@ public: /* Establish an ad-hoc depth cutoff value (Formula from PBRT) */ if (m_maxDepth == 0) - m_maxDepth = std::min((int) (8 + 1.3f * log2i(primCount)), MTS_KD_MAX_DEPTH); + m_maxDepth = (int) (8 + 1.3f * log2i(primCount)); + m_maxDepth = std::min(m_maxDepth, (size_type) MTS_KD_MAX_DEPTH); Log(EDebug, "Creating a preliminary index list (%.2f KiB)", primCount * sizeof(index_type) / 1024.0f); @@ -288,6 +297,7 @@ public: m_builders.resize(procCount); for (size_type i=0; iincRef(); m_builders[i]->start(); } @@ -301,12 +311,12 @@ public: Assert(ctx.leftAlloc.getUsed() == 0); Assert(ctx.rightAlloc.getUsed() == 0); - Log(EInfo, "Finished -- took %i ms.", timer->getMilliseconds()); m_interface.done = true; m_interface.cond->broadcast(); - for (size_type i=0; ijoin(); Log(EDebug, ""); + Log(EInfo, "Finished -- took %i ms.", timer->getMilliseconds()); Log(EDebug, "Memory allocation statistics:"); Log(EDebug, " Classification storage : %.2f KiB", @@ -318,9 +328,12 @@ public: for (size_type i=0; igetContext(); - ctx.accumulateStatistics(subCtx); subCtx.printStats(); + ctx.accumulateStatistics(subCtx); + ctx.nodeAlloc.merge(subCtx.nodeAlloc); + m_builders[i]->decRef(); } + m_builders.clear(); Log(EDebug, ""); Float rootSA = m_aabb.getSurfaceArea(); @@ -374,6 +387,27 @@ protected: inline EdgeEvent(uint16_t type, uint16_t axis, float pos, index_type index) : pos(pos), index(index), type(type), axis(axis) { } + /// Return a string representation + std::string toString() const { + std::ostringstream oss; + oss << "EdgeEvent[" << endl + << " pos = " << pos << "," << endl + << " index = " << index << "," << endl + << " type = "; + if (type == EEdgeEnd) + oss << "end"; + else if (type == EEdgePlanar) + oss << "planar"; + else if (type == EEdgeStart) + oss << "start"; + else + oss << "unknown!"; + oss << "," << endl + << " axis = " << axis << endl + <<"]"; + return oss.str(); + } + /// Plane position float pos; /// Primitive index @@ -412,6 +446,19 @@ protected: sahCost(std::numeric_limits::infinity()), pos(0), axis(0), numLeft(0), numRight(0), planarLeft(false) { } + + std::string toString() const { + std::ostringstream oss; + oss << "SplitCandidate[" << endl + << " sahCost=" << sahCost << "," << endl + << " pos=" << pos << "," << endl + << " axis=" << axis << "," << endl + << " numLeft=" << numLeft << "," << endl + << " numRight=" << numRight << "," << endl + << " planarLeft=" << (planarLeft ? "yes" : "no") << endl + << "]"; + return oss.str(); + } }; /** @@ -783,7 +830,13 @@ protected: } ++badRefines; } - + + + cout << "Depth " << depth << endl; + cout << "AABB: " << nodeAABB.toString() << endl; + cout << "SAH cost: " << leafCost << " -> " << bestSplit.toString() << endl; + cout << endl; + /* ==================================================================== */ /* Partitioning */ /* ==================================================================== */ @@ -888,7 +941,6 @@ protected: Float buildTreeSAH(BuildContext &ctx, unsigned int depth, KDNode *node, const AABB &nodeAABB, EdgeEvent *eventStart, EdgeEvent *eventEnd, size_type primCount, bool isLeftChild, size_type badRefines) { - cout << "Depth: " << depth << endl; Float leafCost = primCount * m_intersectionCost; if (primCount <= m_stopPrims || depth >= m_maxDepth) { @@ -896,6 +948,33 @@ protected: return leafCost; } +#if 0 + EdgeEventOrdering ord; + int primCounts[3]; + primCounts[0] = 0; + primCounts[1] = 0; + primCounts[2] = 0; + for (EdgeEvent *event = eventStart; event < eventEnd; ++event) { + Assert(event->axis >= 0 && event->axis < 3); + Assert(event->type >= 0 && event->type < 3); + if (event->type == EdgeEvent::EEdgePlanar + || event->type == EdgeEvent::EEdgeStart) + primCounts[event->axis]++; + EdgeEvent *next = event+1; + if (next < eventEnd) { + if (!ord(*event, *next)) { + cout << event->toString() << endl; + cout << next->toString() << endl; + Assert(false); + } + } + } + + Assert(primCounts[0] == primCount); + Assert(primCounts[1] == primCount); + Assert(primCounts[2] == primCount); +#endif + SplitCandidate bestSplit; Float invSA = 1.0f / nodeAABB.getSurfaceArea(); @@ -928,19 +1007,19 @@ protected: size_type numStart = 0, numEnd = 0, numPlanar = 0; /* Count "end" events */ - while (event != eventEnd && event->pos == pos && event->axis == axis + while (event < eventEnd && event->pos == pos && event->axis == axis && event->type == EdgeEvent::EEdgeEnd) { ++numEnd; ++event; } /* Count "planar" events */ - while (event != eventEnd && event->pos == pos && event->axis == axis + while (event < eventEnd && event->pos == pos && event->axis == axis && event->type == EdgeEvent::EEdgePlanar) { ++numPlanar; ++event; } /* Count "start" events */ - while (event != eventEnd && event->pos == pos && event->axis == axis + while (event < eventEnd && event->pos == pos && event->axis == axis && event->type == EdgeEvent::EEdgeStart) { ++numStart; ++event; } @@ -957,7 +1036,7 @@ protected: /* Calculate a score using the surface area heuristic */ if (EXPECT_TAKEN(pos >= nodeAABB.min[axis] && pos <= nodeAABB.max[axis])) { - Float tmp = m_aabb.max[axis]; + Float tmp = nodeAABB.max[axis]; aabb.max[axis] = pos; Float pLeft = invSA * aabb.getSurfaceArea(); aabb.max[axis] = tmp; @@ -1008,6 +1087,11 @@ protected: Assert(bestSplit.sahCost != std::numeric_limits::infinity()); + cout << "Depth " << depth << endl; + cout << "AABB: " << nodeAABB.toString() << endl; + cout << "SAH cost: " << leafCost << " -> " << bestSplit.toString() << endl; + cout << endl; + /* "Bad refines" heuristic from PBRT */ if (bestSplit.sahCost >= leafCost) { if ((bestSplit.sahCost > 4 * leafCost && primCount < 16) @@ -1015,6 +1099,7 @@ protected: createLeaf(ctx, node, nodeAABB, primCount); return leafCost; } + cout << "Increasing bad refines " << primCount << ", leafCost=" << leafCost << ", sahCost=" << bestSplit.sahCost << endl; ++badRefines; } @@ -1188,31 +1273,44 @@ protected: rightAlloc.release(newEventsRightStart); rightAlloc.release(rightEventsTempStart); } else { - for (EdgeEvent *event = eventStart; eventindex); if (classification == ELeftSide) { /* Left-only primitive. Move to the left list and advance */ - *leftEventsEnd++ = *event; + if (leftEventsEnd == event) + leftEventsEnd++; + else + *leftEventsEnd++ = *event; } else if (classification == ERightSide) { /* Right-only primitive. Move to the right list and advance */ - *rightEventsEnd++ = *event; + if (rightEventsEnd == event) + rightEventsEnd++; + else + *rightEventsEnd++ = *event; } else if (classification == EBothSides) { /* The primitive overlaps the split plane. Its edge events must be added to both lists. */ - *leftEventsEnd++ = *event; - *rightEventsEnd++ = *event; + if (leftEventsEnd == event) + leftEventsEnd++; + else + *leftEventsEnd++ = *event; + if (rightEventsEnd == event) + rightEventsEnd++; + else + *rightEventsEnd++ = *event; } } + Assert(leftEventsEnd - leftEventsStart <= bestSplit.numLeft * 6); + Assert(rightEventsEnd - rightEventsStart <= bestSplit.numRight * 6); } - /* Shrink the edge event storage now that we know exactly how many are on each side */ - ctx.leftAlloc.shrinkAllocation(leftEventsStart, - leftEventsEnd - leftEventsStart); +// ctx.leftAlloc.shrinkAllocation(leftEventsStart, +// leftEventsEnd - leftEventsStart); - ctx.rightAlloc.shrinkAllocation(rightEventsStart, - rightEventsEnd - rightEventsStart); +// ctx.rightAlloc.shrinkAllocation(rightEventsStart, +// rightEventsEnd - rightEventsStart); /* ==================================================================== */ /* Recursion */