From 5feb7753d87d385707194b455535b7fc98d619f0 Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Tue, 12 Oct 2010 23:47:15 +0200 Subject: [PATCH] collect some more statistics, code for fitting SAH cost constants to empirical measurements --- include/mitsuba/core/transform.h | 28 +++ include/mitsuba/render/gkdtree.h | 302 +++++++++++++++++++++++--- include/mitsuba/render/kdtree.h | 6 +- include/mitsuba/render/triaccel.h | 40 +++- include/mitsuba/render/triaccel_sse.h | 2 +- src/libcore/transform.cpp | 6 + src/tests/test_kd.cpp | 187 +++++++++------- 7 files changed, 451 insertions(+), 120 deletions(-) diff --git a/include/mitsuba/core/transform.h b/include/mitsuba/core/transform.h index b1ae6d52..4f1617fc 100644 --- a/include/mitsuba/core/transform.h +++ b/include/mitsuba/core/transform.h @@ -33,6 +33,9 @@ public: /// Initialize with the identity matrix Matrix4x4(); + /// Initialize the matrix with constant entries + Matrix4x4(Float value); + /// Unserialize a matrix from a stream Matrix4x4(Stream *stream); @@ -228,6 +231,31 @@ public: dest.z = m_invTransform->m[0][2] * v.x + m_invTransform->m[1][2] * v.y + m_invTransform->m[2][2] * v.z; } + + /// 4D matrix-vector multiplication + inline Vector4 operator()(const Vector4 &v) const { + Float x = m_transform->m[0][0] * v.x + m_transform->m[0][1] * v.y + + m_transform->m[0][2] * v.z + m_transform->m[0][3] * v.w; + Float y = m_transform->m[1][0] * v.x + m_transform->m[1][1] * v.y + + m_transform->m[1][2] * v.z + m_transform->m[1][3] * v.w; + Float z = m_transform->m[2][0] * v.x + m_transform->m[2][1] * v.y + + m_transform->m[2][2] * v.z + m_transform->m[2][3] * v.w; + Float w = m_transform->m[3][0] * v.x + m_transform->m[3][1] * v.y + + m_transform->m[3][2] * v.z + m_transform->m[3][3] * v.w; + return Vector4(x,y,z,w); + } + + /// 4D matrix-vector multiplication + inline void operator()(const Vector4 &v, Vector4 &dest) const { + dest.x = m_transform->m[0][0] * v.x + m_transform->m[0][1] * v.y + + m_transform->m[0][2] * v.z + m_transform->m[0][3] * v.w; + dest.y = m_transform->m[1][0] * v.x + m_transform->m[1][1] * v.y + + m_transform->m[1][2] * v.z + m_transform->m[1][3] * v.w; + dest.z = m_transform->m[2][0] * v.x + m_transform->m[2][1] * v.y + + m_transform->m[2][2] * v.z + m_transform->m[2][3] * v.w; + dest.w = m_transform->m[3][0] * v.x + m_transform->m[3][1] * v.y + + m_transform->m[3][2] * v.z + m_transform->m[3][3] * v.w; + } /// Transform a ray. Assumes that there is no scaling inline void operator()(const Ray &a, Ray &b) const { diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index 670b8415..e665ffcf 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -24,7 +24,7 @@ #include /// Activate lots of extra checks -#define MTS_KD_DEBUG 1 +//#define MTS_KD_DEBUG 1 /** Compile-time KD-tree depth limit. Allows to put certain data structures on the stack */ @@ -584,7 +584,7 @@ public: inline size_type getMaxDepth() const { return m_maxDepth; } - + /** * \brief Specify whether or not to use primitive clipping will * be used in the tree construction. @@ -680,7 +680,7 @@ public: inline bool getExactPrimitiveThreshold() const { return m_exactPrimThreshold; } - + /** * \brief Return whether or not the kd-tree has been built */ @@ -689,9 +689,29 @@ public: } /** - * \brief Build a KD-tree over supplied geometry + * \brief Intersect a ray against all primitives stored in the kd-tree */ - void build() { + bool rayIntersect(const Ray &ray, Intersection &its) const; + + /** + * \brief Test a ray for intersection against all primitives stored in the kd-tree + */ + bool rayIntersect(const Ray &ray) const; + + /** + * \brief Empirically find the best traversal and intersection cost values + * + * This is done by running the traversal code on random rays and fitting + * the SAH cost model to the collected statistics. + */ + void findCosts(Float &traversalCost, Float &intersectionCost); +protected: + /** + * \brief Build a KD-tree over the supplied geometry + * + * To be called by the subclass. + */ + void buildInternal() { if (isBuilt()) Log(EError, "The kd-tree has already been built!"); @@ -806,6 +826,10 @@ public: Float expPrimitivesIntersected = 0; Float sahCost = 0; size_type nodePtr = 0, indexPtr = 0; + size_type maxPrimsInLeaf = 0; + const size_type primBucketCount = 16; + size_type primBuckets[primBucketCount]; + memset(primBuckets, 0, sizeof(size_type)*primBucketCount); m_nodes = static_cast (allocAligned( sizeof(KDNode) * (ctx.innerNodeCount + ctx.leafNodeCount))); @@ -829,7 +853,11 @@ public: expLeavesVisited += aabb.getSurfaceArea(); expPrimitivesIntersected += weightedSA; sahCost += weightedSA * m_intersectionCost; - + if (primCount < primBucketCount) + primBuckets[primCount]++; + if (primCount > maxPrimsInLeaf) + maxPrimsInLeaf = primCount; + const BlockedVector &indices = context->indices; for (size_type idx = primStart; idx(this); } - + /** * \brief Create an edge event list for a given list of primitives. * @@ -2449,8 +2482,6 @@ protected: template FINLINE bool rayIntersectHavran(const Ray &ray, Float mint, Float maxt, Float &t, void *temp) const { KDStackEntryHavran stack[MTS_KD_MAXDEPTH]; - const KDNode * __restrict farChild, - * __restrict currNode = m_nodes; #if 0 static const int prevAxisTable[] = { 2, 0, 1 }; static const int nextAxisTable[] = { 1, 2, 0 }; @@ -2470,11 +2501,13 @@ protected: stack[exPt].t = maxt; stack[exPt].p = ray(maxt); stack[exPt].node = NULL; - + + const KDNode * __restrict currNode = m_nodes; while (currNode != NULL) { while (EXPECT_TAKEN(!currNode->isLeaf())) { const Float splitVal = (Float) currNode->getSplit(); const int axis = currNode->getAxis(); + const KDNode * __restrict farChild; if (stack[enPt].p[axis] <= splitVal) { if (stack[exPt].p[axis] <= splitVal) { @@ -2558,7 +2591,7 @@ protected: EIntersectionResult result = cast()->intersect(ray, primIdx, searchStart, searchEnd, t, temp); - + if (result == EYes) { if (shadowRay) return true; @@ -2584,6 +2617,131 @@ protected: return false; } + /** + * \brief Internal kd-tree traversal implementation (Havran variant) + * + * This method is almost identical to \ref rayIntersectHavran, except + * that it additionally returns statistics on the number of traversed + * nodes, intersected shapes, as well as the time taken to do this + * (measured using rtdsc). + */ + FINLINE boost::tuple + rayIntersectHavranCollectStatistics(const Ray &ray, + Float mint, Float maxt, Float &t, void *temp) const { + KDStackEntryHavran stack[MTS_KD_MAXDEPTH]; + + /* Set up the entry point */ + uint32_t enPt = 0; + stack[enPt].t = mint; + stack[enPt].p = ray(mint); + + /* Set up the exit point */ + uint32_t exPt = 1; + stack[exPt].t = maxt; + stack[exPt].p = ray(maxt); + stack[exPt].node = NULL; + + uint32_t numTraversals = 0; + uint32_t numIntersections = 0; + uint64_t timer = rdtsc(); + + const KDNode * __restrict currNode = m_nodes; + while (currNode != NULL) { + while (EXPECT_TAKEN(!currNode->isLeaf())) { + const Float splitVal = (Float) currNode->getSplit(); + const int axis = currNode->getAxis(); + const KDNode * __restrict farChild; + + ++numTraversals; + if (stack[enPt].p[axis] <= splitVal) { + if (stack[exPt].p[axis] <= splitVal) { + /* Cases N1, N2, N3, P5, Z2 and Z3 (see thesis) */ + currNode = currNode->getLeft(); + continue; + } + + /* Typo in Havran's thesis: + (it specifies "stack[exPt].p == splitVal", which + is clearly incorrect) */ + if (stack[enPt].p[axis] == splitVal) { + /* Case Z1 */ + currNode = currNode->getRight(); + continue; + } + + /* Case N4 */ + currNode = currNode->getLeft(); + farChild = currNode + 1; // getRight() + } else { /* stack[enPt].p[axis] > splitVal */ + if (splitVal < stack[exPt].p[axis]) { + /* Cases P1, P2, P3 and N5 */ + currNode = currNode->getRight(); + continue; + } + /* Case P4 */ + farChild = currNode->getLeft(); + currNode = farChild + 1; // getRight() + } + + /* Cases P4 and N4 -- calculate the distance to the split plane */ + t = (splitVal - ray.o[axis]) * ray.dRcp[axis]; + + /* Set up a new exit point */ + const uint32_t tmp = exPt++; + if (exPt == enPt) /* Do not overwrite the entry point */ + ++exPt; + + KDAssert(exPt < MTS_KD_MAX_DEPTH); + stack[exPt].prev = tmp; + stack[exPt].t = t; + stack[exPt].node = farChild; + stack[exPt].p = ray(t); + stack[exPt].p[axis] = splitVal; + } + + #if defined(SINGLE_PRECISION) + const Float eps = 1e-3; + #else + const Float eps = 1e-5; + #endif + const Float m_eps = 1-eps, p_eps = 1+eps; + + /* Floating-point arithmetic.. - use both absolute and relative + epsilons when looking for intersections in the subinterval */ + const Float searchStart = std::max(mint, stack[enPt].t * m_eps - eps); + Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps); + + /* Reached a leaf node */ + bool foundIntersection = false; + for (unsigned int entry=currNode->getPrimStart(), + last = currNode->getPrimEnd(); entry != last; entry++) { + const index_type primIdx = m_indices[entry]; + + ++numIntersections; + EIntersectionResult result = cast()->intersect(ray, + primIdx, searchStart, searchEnd, t, temp); + + if (result == EYes) { + searchEnd = t; + foundIntersection = true; + } + } + + if (foundIntersection) + return boost::make_tuple(true, numTraversals, + numIntersections, rdtsc() - timer); + + + /* Pop from the stack and advance to the next node on the interval */ + enPt = exPt; + currNode = stack[exPt].node; + exPt = stack[enPt].prev; + } + + return boost::make_tuple(false, numTraversals, + numIntersections, rdtsc() - timer); + } + struct KDStackEntry { const KDNode * __restrict node; Float mint, maxt; @@ -2735,6 +2893,94 @@ template bool GenericKDTree::rayIntersect( } return false; } + +template void GenericKDTree::findCosts( + Float &traversalCost, Float &intersectionCost) { + ref random = new Random(); + uint32_t temp[MTS_KD_INTERSECTION_TEMP]; + BSphere bsphere = m_aabb.getBSphere(); + const size_t nRays = 10000000, warmup = nRays/4; + Vector *A = new Vector[nRays-warmup]; + Float *b = new Float[nRays-warmup]; + size_t nIntersections = 0, idx = 0; + + for (size_t i=0; inextFloat(), random->nextFloat()), + sample2(random->nextFloat(), random->nextFloat()); + Point p1 = bsphere.center + squareToSphere(sample1) * bsphere.radius; + Point p2 = bsphere.center + squareToSphere(sample2) * bsphere.radius; + Ray ray(p1, normalize(p2-p1)); + Float mint, maxt, t; + if (ray.mint > mint) mint = ray.mint; + if (ray.maxt < maxt) maxt = ray.maxt; + if (m_aabb.rayIntersect(ray, mint, maxt)) { + if (EXPECT_TAKEN(maxt > mint)) { + boost::tuple statistics = + rayIntersectHavranCollectStatistics(ray, mint, maxt, t, temp); + if (boost::get<0>(statistics)) + nIntersections++; + if (i > warmup) { + A[idx].x = 1; + A[idx].y = boost::get<1>(statistics); + A[idx].z = boost::get<2>(statistics); + b[idx] = boost::get<3>(statistics); + idx++; + } + } + } + } + + Log(EDebug, "Fitting to " SIZE_T_FMT " samples (" SIZE_T_FMT + " intersections)", idx, nIntersections); + + /* Solve using normal equations */ + ref M = new Matrix4x4(0.0f); + Vector4 rhs(0.0f), x; + + for (size_t i=0; i<3; ++i) { + for (size_t j=0; j<3; ++j) + for (size_t k=0; km[i][j] += A[k][i]*A[k][j]; + for (size_t k=0; km[3][3] = 1.0f; + + Transform(M->inverse())(rhs, x); + + Float avgRdtsc = 0, avgResidual = 0; + for (size_t i=0; i maxt) return false; @@ -115,8 +146,7 @@ FINLINE bool TriAccel::rayIntersect(const Ray &ray, Float mint, Float maxt, /* In barycentric coordinates */ u = hv * b_nu + hu * b_nv; v = hu * c_nu + hv * c_nv; - - return (u >= 0 && v >= 0 && u+v <= 1.0f); + return u >= 0 && v >= 0 && u+v <= 1.0f; } MTS_NAMESPACE_END diff --git a/include/mitsuba/render/triaccel_sse.h b/include/mitsuba/render/triaccel_sse.h index d791e27d..5a0638bf 100644 --- a/include/mitsuba/render/triaccel_sse.h +++ b/include/mitsuba/render/triaccel_sse.h @@ -57,7 +57,7 @@ struct TriAccel4 { FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet, __m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) const { - static const int waldModulo[4] = { 1, 2, 0, 1 }; + static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 }; const int ku = waldModulo[k], kv = waldModulo[k+1]; /* Get the u and v components */ diff --git a/src/libcore/transform.cpp b/src/libcore/transform.cpp index 0a8eb850..f08b86a5 100644 --- a/src/libcore/transform.cpp +++ b/src/libcore/transform.cpp @@ -232,6 +232,12 @@ Matrix4x4::Matrix4x4() { m[i][j] = (i == j) ? 1.0f : 0.0f; } +Matrix4x4::Matrix4x4(Float value) { + for (int i=0; i<4; i++) + for (int j=0; j<4; j++) + m[i][j] = value; +} + Matrix4x4::Matrix4x4(Float _m[4][4]) { memcpy(m, _m, sizeof(Float) * 16); } diff --git a/src/tests/test_kd.cpp b/src/tests/test_kd.cpp index 7bf8bab6..a9d29afb 100644 --- a/src/tests/test_kd.cpp +++ b/src/tests/test_kd.cpp @@ -21,6 +21,103 @@ #include MTS_NAMESPACE_BEGIN + +class TriKDTree : public GenericKDTree { +public: + TriKDTree(const Triangle *triangles, + const Vertex *vertexBuffer, + size_type triangleCount) + : m_triangles(triangles), + m_vertexBuffer(vertexBuffer), + m_triangleCount(triangleCount) { + } + + FINLINE AABB getAABB(index_type idx) const { + return m_triangles[idx].getAABB(m_vertexBuffer); + } + + FINLINE AABB getClippedAABB(index_type idx, const AABB &aabb) const { + return m_triangles[idx].getClippedAABB(m_vertexBuffer, aabb); + } + + FINLINE size_type getPrimitiveCount() const { + return m_triangleCount; + } + + FINLINE EIntersectionResult intersect(const Ray &ray, index_type idx, + Float mint, Float maxt, Float &t, void *tmp) const { + Float tempT, tempU, tempV; +#if 0 + if (m_triangles[idx].rayIntersect(m_vertexBuffer, ray, tempU, tempV, tempT)) { + if (tempT < mint && tempT > maxt) + return ENo; +#else + if (m_triAccel[idx].rayIntersect(ray, mint, maxt, tempU, tempV, tempT)) { +#endif + index_type *indexPtr = reinterpret_cast(tmp); + Float *floatPtr = reinterpret_cast(indexPtr + 1); + + t = tempT; + *indexPtr = idx; + *floatPtr++ = tempU; + *floatPtr++ = tempV; + + return EYes; + } + return ENo; + } + + void build() { + buildInternal(); + Log(EInfo, "Precomputing triangle intersection information (%s)", + memString(sizeof(TriAccel)*m_triangleCount).c_str()); + m_triAccel = static_cast(allocAligned(m_triangleCount * sizeof(TriAccel))); + for (size_t i=0; i(tmp); + //const Float *floatPtr = reinterpret_cast(indexPtr + 1); + + //const Triangle &tri = m_triangles[*indexPtr]; + //const Vertex &v0 = m_vertexBuffer[tri.idx[0]]; + //const Vertex &v1 = m_vertexBuffer[tri.idx[1]]; + //const Vertex &v2 = m_vertexBuffer[tri.idx[2]]; + + //const Float u = *floatPtr++, v = *floatPtr++; + //const Vector b(1 - u - v, u, v); +/* + its.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z; + its.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z; + its.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z; + + its.geoFrame = Frame(normalize(cross(v1.p-v0.p, v2.p-v0.p))); + its.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z); + its.shFrame.s = normalize(its.dpdu - its.shFrame.n + * dot(its.shFrame.n, its.dpdu)); + its.shFrame.t = cross(its.shFrame.n, its.shFrame.s); + its.wi = its.toLocal(-ray.d); +*/ + its.hasUVPartials = false; + } + +private: + const Triangle *m_triangles; + const Vertex *m_vertexBuffer; + TriAccel *m_triAccel; + size_type m_triangleCount; +}; + class TestKDTree : public TestCase { public: @@ -81,85 +178,6 @@ public: assertEquals(Point(1, 1, 0), clippedAABB.max); } - class TriKDTree : public GenericKDTree { - public: - TriKDTree(const Triangle *triangles, - const Vertex *vertexBuffer, - size_type triangleCount) - : m_triangles(triangles), - m_vertexBuffer(vertexBuffer), - m_triangleCount(triangleCount) { - } - - FINLINE AABB getAABB(index_type idx) const { - return m_triangles[idx].getAABB(m_vertexBuffer); - } - - FINLINE AABB getClippedAABB(index_type idx, const AABB &aabb) const { - return m_triangles[idx].getClippedAABB(m_vertexBuffer, aabb); - } - - FINLINE size_type getPrimitiveCount() const { - return m_triangleCount; - } - - FINLINE EIntersectionResult intersect(const Ray &ray, index_type idx, - Float mint, Float maxt, Float &t, void *tmp) const { - Float tempT, tempU, tempV; - if (m_triangles[idx].rayIntersect(m_vertexBuffer, ray, tempU, tempV, tempT)) { - if (tempT >= mint && tempT <= maxt) { - index_type *indexPtr = reinterpret_cast(tmp); - Float *floatPtr = reinterpret_cast(indexPtr + 1); - - t = tempT; - *indexPtr = idx; - *floatPtr++ = tempU; - *floatPtr++ = tempV; - - return EYes; - } - return ENo; - } else { - return ENever; - } - } - - FINLINE void fillIntersectionDetails(const Ray &ray, - Float t, const void *tmp, Intersection &its) const { - its.p = ray(t); - - //const index_type *indexPtr = reinterpret_cast(tmp); - //const Float *floatPtr = reinterpret_cast(indexPtr + 1); - - //const Triangle &tri = m_triangles[*indexPtr]; - //const Vertex &v0 = m_vertexBuffer[tri.idx[0]]; - //const Vertex &v1 = m_vertexBuffer[tri.idx[1]]; - //const Vertex &v2 = m_vertexBuffer[tri.idx[2]]; - - //const Float u = *floatPtr++, v = *floatPtr++; - //const Vector b(1 - u - v, u, v); -/* - its.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z; - its.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z; - its.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z; - - its.geoFrame = Frame(normalize(cross(v1.p-v0.p, v2.p-v0.p))); - its.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z); - its.shFrame.s = normalize(its.dpdu - its.shFrame.n - * dot(its.shFrame.n, its.dpdu)); - its.shFrame.t = cross(its.shFrame.n, its.shFrame.s); - its.wi = its.toLocal(-ray.d); -*/ - its.hasUVPartials = false; - } - - private: - const Triangle *m_triangles; - const Vertex *m_vertexBuffer; - size_type m_triangleCount; - }; - - void test02_buildSimple() { Properties bunnyProps("ply"); bunnyProps.setString("filename", "tools/tests/bunny.ply"); @@ -169,13 +187,17 @@ public: mesh->configure(); TriKDTree tree(mesh->getTriangles(), mesh->getVertexBuffer(), mesh->getTriangleCount()); + tree.setTraversalCost(10); + tree.setIntersectionCost(17); tree.build(); BSphere bsphere(mesh->getBSphere()); - /* + Float intersectionCost, traversalCost; + tree.findCosts(intersectionCost, traversalCost); + ref oldTree = new KDTree(); oldTree->addShape(mesh); - oldTree->build(); */ + oldTree->build(); for (int j=0; j<3; ++j) { ref random = new Random(); @@ -203,7 +225,7 @@ public: nIntersections, timer->getMilliseconds()); Log(EInfo, "New: %.3f MRays/s", nRays / (timer->getMilliseconds() * (Float) 1000)); -/* + random = new Random(); timer->reset(); nIntersections=0; @@ -224,7 +246,6 @@ public: nIntersections, timer->getMilliseconds()); Log(EInfo, "Old: %.3f MRays/s", nRays / (timer->getMilliseconds() * (Float) 1000)); -*/ } } };