From a1b8d2266daa12383819df09b10b172933a06a10 Mon Sep 17 00:00:00 2001 From: Wenzel Jakob Date: Thu, 14 Oct 2010 01:00:42 +0200 Subject: [PATCH] copied over coherent RT code from the previous KDTree impl. --- include/mitsuba/render/gkdtree.h | 10 +- include/mitsuba/render/kdtree.h | 51 +++++++- src/libhw/vpl.cpp | 2 +- src/librender/kdtree.cpp | 204 +++++++++++++++++++++++++++++++ 4 files changed, 263 insertions(+), 4 deletions(-) diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index b2137f9b..53554527 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -530,7 +530,7 @@ public: if (m_indices) delete[] m_indices; if (m_nodes) - freeAligned(m_nodes); + freeAligned(m_nodes-1); // undo alignment shift } /** @@ -840,8 +840,9 @@ protected: memset(primBuckets, 0, sizeof(size_type)*primBucketCount); m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount; + // +1 shift is for alignment purposes (see KDNode::getSibling) m_nodes = static_cast (allocAligned( - sizeof(KDNode) * m_nodeCount)); + sizeof(KDNode) * (m_nodeCount+1)))+1; m_indices = new index_type[ctx.primIndexCount]; stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb)); @@ -1278,6 +1279,11 @@ protected: ((inner.combined & EInnerOffsetMask) >> 2); } + /// Return the sibling of the current node + FINLINE const KDNode * __restrict getSibling() const { + return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8); + } + /// Return the left child (assuming that this is an interior node) FINLINE KDNode * __restrict getLeft() { return this + diff --git a/include/mitsuba/render/kdtree.h b/include/mitsuba/render/kdtree.h index c4797592..36e3e66e 100644 --- a/include/mitsuba/render/kdtree.h +++ b/include/mitsuba/render/kdtree.h @@ -23,7 +23,11 @@ #include #include -#define MTS_KD_CONSERVE_MEMORY 1 +#if defined(MTS_KD_CONSERVE_MEMORY) +#if defined(MTS_HAS_COHERENT_RT) +#error MTS_KD_CONSERVE_MEMORY & MTS_HAS_COHERENT_RT are incompatible +#endif +#endif MTS_NAMESPACE_BEGIN @@ -80,6 +84,31 @@ public: */ bool rayIntersect(const Ray &ray) const; + /** + * \brief Intersect four rays with the stored triangle meshes while making + * use of ray coherence to do this very efficiently. + * + * If the coherent ray tracing compile-time flag is disabled, this function + * simply does four separate mono-ray traversals. + */ + void rayIntersectPacket(const Ray *rays, Intersection *its) const; + +#if defined(MTS_HAS_COHERENT_RT) + /** + * \brief Intersect four rays with the stored triangle meshes while making + * use of ray coherence to do this very efficiently. Requires SSE. + */ + void rayIntersectPacket(const RayPacket4 &packet, + const RayInterval4 &interval, Intersection4 &its) const; + + /** + * \brief Fallback for incoherent rays + * \sa rayIntesectPacket + */ + void rayIntersectPacketIncoherent(const RayPacket4 &packet, + const RayInterval4 &interval, Intersection4 &its) const; +#endif + MTS_DECLARE_CLASS() protected: /** @@ -178,6 +207,26 @@ protected: return ENo; } +#if defined(MTS_HAS_COHERENT_RT) + /// Ray traversal stack entry for uncoherent ray tracing + struct CoherentKDStackEntry { + /* Current ray interval */ + RayInterval4 MM_ALIGN16 interval; + /* Pointer to the far child */ + const KDNode * __restrict node; + }; +#endif + + /** + * Fallback for incoherent rays + */ + inline void rayIntersectPacketIncoherent(const Ray *rays, Intersection *its) const { + for (int i=0; i<4; i++) { + if (!rayIntersect(rays[i], its[i])) + its[i].t = std::numeric_limits::infinity(); + } + } + /// Virtual destructor virtual ~KDTree(); private: diff --git a/src/libhw/vpl.cpp b/src/libhw/vpl.cpp index a99c00f6..5343bb8d 100644 --- a/src/libhw/vpl.cpp +++ b/src/libhw/vpl.cpp @@ -204,7 +204,7 @@ void VPLShaderManager::setVPL(const VPL &vpl) { m_minDist = nearClip + (farClip - nearClip) * m_clamping; - nearClip = std::min(nearClip, (Float) 0.05f); + nearClip = std::min(nearClip, (Float) 0.001f); farClip = std::min(farClip * 1.5f, m_maxClipDist); if (farClip < 0 || nearClip >= farClip) { diff --git a/src/librender/kdtree.cpp b/src/librender/kdtree.cpp index 881731f4..6d728159 100644 --- a/src/librender/kdtree.cpp +++ b/src/librender/kdtree.cpp @@ -17,6 +17,7 @@ */ #include +#include MTS_NAMESPACE_BEGIN @@ -34,6 +35,9 @@ KDTree::~KDTree() { #endif } +static StatsCounter raysTraced("General", "Normal rays traced"); +static StatsCounter shadowRaysTraced("General", "Shadow rays traced"); + void KDTree::addShape(const Shape *shape) { Assert(!isBuilt()); if (shape->isCompound()) @@ -99,6 +103,7 @@ bool KDTree::rayIntersect(const Ray &ray, Intersection &its) const { its.t = std::numeric_limits::infinity(); Float mint, maxt; + ++raysTraced; if (m_aabb.rayIntersect(ray, mint, maxt)) { /* Use an adaptive ray epsilon */ Float rayMinT = ray.mint; @@ -150,6 +155,7 @@ bool KDTree::rayIntersect(const Ray &ray) const { uint8_t temp[MTS_KD_INTERSECTION_TEMP]; Float mint, maxt, t = std::numeric_limits::infinity(); + ++shadowRaysTraced; if (m_aabb.rayIntersect(ray, mint, maxt)) { /* Use an adaptive ray epsilon */ Float rayMinT = ray.mint; @@ -167,5 +173,203 @@ bool KDTree::rayIntersect(const Ray &ray) const { return false; } + +#if defined(MTS_HAS_COHERENT_RT) +static StatsCounter coherentPackets("General", "Coherent ray packets"); +static StatsCounter incoherentPackets("General", "Incoherent ray packets"); + +void KDTree::rayIntersectPacket(const RayPacket4 &packet, + const RayInterval4 &rayInterval, Intersection4 &its) const { + CoherentKDStackEntry MM_ALIGN16 stack[MTS_KD_MAXDEPTH]; + RayInterval4 MM_ALIGN16 interval; + + const KDNode * __restrict currNode = m_nodes; + int stackIndex = 0; + + ++coherentPackets; + + /* First, intersect with the kd-tree AABB to determine + the intersection search intervals */ + if (!m_aabb.rayIntersectPacket(packet, interval)) + return; + + interval.mint.ps = _mm_max_ps(interval.mint.ps, rayInterval.mint.ps); + interval.maxt.ps = _mm_min_ps(interval.maxt.ps, rayInterval.maxt.ps); + + __m128 itsFound = _mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps), + masked = itsFound; + if (_mm_movemask_ps(itsFound) == 0xF) + return; + + while (currNode != NULL) { + while (EXPECT_TAKEN(!currNode->isLeaf())) { + const uint8_t axis = currNode->getAxis(); + + /* Calculate the plane intersection */ + const __m128 + splitVal = _mm_set1_ps(currNode->getSplit()), + t = _mm_mul_ps(_mm_sub_ps(splitVal, packet.o[axis].ps), + packet.dRcp[axis].ps); + + const __m128 + startsAfterSplit = _mm_or_ps(masked, + _mm_cmplt_ps(t, interval.mint.ps)), + endsBeforeSplit = _mm_or_ps(masked, + _mm_cmpgt_ps(t, interval.maxt.ps)); + + currNode = currNode->getLeft() + packet.signs[axis][0]; + + /* The interval completely completely lies on one side + of the split plane */ + if (EXPECT_TAKEN(_mm_movemask_ps(startsAfterSplit) == 15)) { + currNode = currNode->getSibling(); + continue; + } + + if (EXPECT_TAKEN(_mm_movemask_ps(endsBeforeSplit) == 15)) + continue; + + stack[stackIndex].node = currNode->getSibling(); + stack[stackIndex].interval.maxt = interval.maxt; + stack[stackIndex].interval.mint.ps = _mm_max_ps(t, interval.mint.ps); + interval.maxt.ps = _mm_min_ps(t, interval.maxt.ps); + masked = _mm_or_ps(masked, + _mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps)); + stackIndex++; + } + + /* Arrived at a leaf node - intersect against primitives */ + const index_type primStart = currNode->getPrimStart(); + const index_type primEnd = currNode->getPrimEnd(); + + if (EXPECT_NOT_TAKEN(primStart != primEnd)) { +#ifdef MTS_USE_TRIACCEL4 + const int count = m_packedTriangles[primStart].indirectionCount; + primStart = m_packedTriangles[primStart].indirectionIndex; + primEnd = primStart + count; +#endif + __m128 + searchStart = _mm_max_ps(rayInterval.mint.ps, + _mm_mul_ps(interval.mint.ps, SSEConstants::om_eps.ps)), + searchEnd = _mm_min_ps(rayInterval.maxt.ps, + _mm_mul_ps(interval.maxt.ps, SSEConstants::op_eps.ps)); + + for (index_type entry=primStart; entry != primEnd; entry++) { + const TriAccel &kdTri = m_triAccel[m_indices[entry]]; + if (EXPECT_TAKEN(kdTri.k != KNoTriangleFlag)) { + itsFound = _mm_or_ps(itsFound, + kdTri.rayIntersectPacket(packet, searchStart, searchEnd, masked, its)); + } else { + /* Not a triangle - invoke the shape's intersection routine */ + __m128 hasIts = m_shapes[kdTri.shapeIndex]->rayIntersectPacket(packet, + searchStart, searchEnd, masked, its); + itsFound = _mm_or_ps(itsFound, hasIts); + its.primIndex.pi = mux_epi32(pstoepi32(hasIts), + load1_epi32(kdTri.index), its.primIndex.pi); + its.shapeIndex.pi = mux_epi32(pstoepi32(hasIts), + load1_epi32(kdTri.shapeIndex), its.shapeIndex.pi); + } + searchEnd = _mm_min_ps(searchEnd, its.t.ps); + } + } + + /* Abort if the tree has been traversed or if + intersections have been found for all four rays */ + if (_mm_movemask_ps(itsFound) == 0xF || --stackIndex < 0) + break; + + /* Pop from the stack */ + currNode = stack[stackIndex].node; + interval = stack[stackIndex].interval; + masked = _mm_or_ps(itsFound, + _mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps)); + } +} + +void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const { + RayPacket4 MM_ALIGN16 packet; + RayInterval4 MM_ALIGN16 interval(rays); + Intersection4 MM_ALIGN16 its4; + + if (packet.load(rays)) { + rayIntersectPacket(packet, interval, its4); + + for (int i=0; i<4; i++) { + Intersection &it = its[i]; + + it.t = its4.t.f[i]; + + if (it.t != std::numeric_limits::infinity()) { + const uint32_t shapeIndex = its4.shapeIndex.i[i]; + const uint32_t primIndex = its4.primIndex.i[i]; + const Shape *shape = m_shapes[shapeIndex]; + + if (EXPECT_TAKEN(primIndex != KNoTriangleFlag)) { + const TriMesh *triMesh = static_cast(m_shapes[shapeIndex]); + const Triangle &t = triMesh->getTriangles()[primIndex]; + const Vertex &v0 = triMesh->getVertexBuffer()[t.idx[0]]; + const Vertex &v1 = triMesh->getVertexBuffer()[t.idx[1]]; + const Vertex &v2 = triMesh->getVertexBuffer()[t.idx[2]]; + const Vector b(1 - its4.u.f[i] - its4.v.f[i], + its4.u.f[i], its4.v.f[i]); + const Vector &rayD = rays[i].d; + it.p = rays[i].o + it.t * rayD; + + Normal faceNormal(normalize(cross(v1.p-v0.p, v2.p-v0.p))); + it.geoFrame.n = faceNormal; + coordinateSystem(it.geoFrame.n, it.geoFrame.s, it.geoFrame.t); + + it.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z; + it.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z; + it.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z; + it.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z); + + it.shFrame.s = normalize(it.dpdu - it.shFrame.n + * dot(it.shFrame.n, it.dpdu)); + it.geoFrame.t = cross(it.shFrame.n, it.shFrame.s); + it.wi = it.toLocal(-rayD); + it.hasUVPartials = false; + it.shape = shape; + } else { + /* Non-triangle shape: intersect again to fill in details */ + shape->rayIntersect(rays[i], it); + } + } + } + } else { + rayIntersectPacketIncoherent(rays, its); + } +} + +void KDTree::rayIntersectPacketIncoherent(const RayPacket4 &packet, + const RayInterval4 &rayInterval, Intersection4 &its4) const { + uint8_t temp[MTS_KD_INTERSECTION_TEMP]; + + ++incoherentPackets; + for (int i=0; i<4; i++) { + Ray ray; + for (int axis=0; axis<3; axis++) { + ray.o[axis] = packet.o[axis].f[i]; + ray.d[axis] = packet.d[axis].f[i]; + ray.dRcp[axis] = packet.dRcp[axis].f[i]; + } + ray.mint = rayInterval.mint.f[i]; + ray.maxt = rayInterval.maxt.f[i]; + if (ray.mint < ray.maxt && rayIntersectHavran(ray, ray.mint, ray.maxt, its4.t.f[i], temp)) { + const IntersectionCache *cache = reinterpret_cast(temp); + its4.u.f[i] = cache->u; + its4.v.f[i] = cache->u; + its4.shapeIndex.i[i] = cache->shapeIndex; + its4.primIndex.i[i] = cache->index; + } + } +} + +#else // !MTS_HAS_COHERENT_RT +void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const { + rayIntersectPacketIncoherent(rays, its); +} +#endif + MTS_IMPLEMENT_CLASS(KDTree, false, GenericKDTree) MTS_NAMESPACE_END