copied over coherent RT code from the previous KDTree impl.
parent
01aac828a9
commit
a1b8d2266d
|
@ -530,7 +530,7 @@ public:
|
||||||
if (m_indices)
|
if (m_indices)
|
||||||
delete[] m_indices;
|
delete[] m_indices;
|
||||||
if (m_nodes)
|
if (m_nodes)
|
||||||
freeAligned(m_nodes);
|
freeAligned(m_nodes-1); // undo alignment shift
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -840,8 +840,9 @@ protected:
|
||||||
memset(primBuckets, 0, sizeof(size_type)*primBucketCount);
|
memset(primBuckets, 0, sizeof(size_type)*primBucketCount);
|
||||||
m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
|
m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
|
||||||
|
|
||||||
|
// +1 shift is for alignment purposes (see KDNode::getSibling)
|
||||||
m_nodes = static_cast<KDNode *> (allocAligned(
|
m_nodes = static_cast<KDNode *> (allocAligned(
|
||||||
sizeof(KDNode) * m_nodeCount));
|
sizeof(KDNode) * (m_nodeCount+1)))+1;
|
||||||
m_indices = new index_type[ctx.primIndexCount];
|
m_indices = new index_type[ctx.primIndexCount];
|
||||||
|
|
||||||
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb));
|
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb));
|
||||||
|
@ -1278,6 +1279,11 @@ protected:
|
||||||
((inner.combined & EInnerOffsetMask) >> 2);
|
((inner.combined & EInnerOffsetMask) >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the sibling of the current node
|
||||||
|
FINLINE const KDNode * __restrict getSibling() const {
|
||||||
|
return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the left child (assuming that this is an interior node)
|
/// Return the left child (assuming that this is an interior node)
|
||||||
FINLINE KDNode * __restrict getLeft() {
|
FINLINE KDNode * __restrict getLeft() {
|
||||||
return this +
|
return this +
|
||||||
|
|
|
@ -23,7 +23,11 @@
|
||||||
#include <mitsuba/render/gkdtree.h>
|
#include <mitsuba/render/gkdtree.h>
|
||||||
#include <mitsuba/render/triaccel.h>
|
#include <mitsuba/render/triaccel.h>
|
||||||
|
|
||||||
#define MTS_KD_CONSERVE_MEMORY 1
|
#if defined(MTS_KD_CONSERVE_MEMORY)
|
||||||
|
#if defined(MTS_HAS_COHERENT_RT)
|
||||||
|
#error MTS_KD_CONSERVE_MEMORY & MTS_HAS_COHERENT_RT are incompatible
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
MTS_NAMESPACE_BEGIN
|
MTS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
@ -80,6 +84,31 @@ public:
|
||||||
*/
|
*/
|
||||||
bool rayIntersect(const Ray &ray) const;
|
bool rayIntersect(const Ray &ray) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Intersect four rays with the stored triangle meshes while making
|
||||||
|
* use of ray coherence to do this very efficiently.
|
||||||
|
*
|
||||||
|
* If the coherent ray tracing compile-time flag is disabled, this function
|
||||||
|
* simply does four separate mono-ray traversals.
|
||||||
|
*/
|
||||||
|
void rayIntersectPacket(const Ray *rays, Intersection *its) const;
|
||||||
|
|
||||||
|
#if defined(MTS_HAS_COHERENT_RT)
|
||||||
|
/**
|
||||||
|
* \brief Intersect four rays with the stored triangle meshes while making
|
||||||
|
* use of ray coherence to do this very efficiently. Requires SSE.
|
||||||
|
*/
|
||||||
|
void rayIntersectPacket(const RayPacket4 &packet,
|
||||||
|
const RayInterval4 &interval, Intersection4 &its) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Fallback for incoherent rays
|
||||||
|
* \sa rayIntesectPacket
|
||||||
|
*/
|
||||||
|
void rayIntersectPacketIncoherent(const RayPacket4 &packet,
|
||||||
|
const RayInterval4 &interval, Intersection4 &its) const;
|
||||||
|
#endif
|
||||||
|
|
||||||
MTS_DECLARE_CLASS()
|
MTS_DECLARE_CLASS()
|
||||||
protected:
|
protected:
|
||||||
/**
|
/**
|
||||||
|
@ -178,6 +207,26 @@ protected:
|
||||||
return ENo;
|
return ENo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(MTS_HAS_COHERENT_RT)
|
||||||
|
/// Ray traversal stack entry for uncoherent ray tracing
|
||||||
|
struct CoherentKDStackEntry {
|
||||||
|
/* Current ray interval */
|
||||||
|
RayInterval4 MM_ALIGN16 interval;
|
||||||
|
/* Pointer to the far child */
|
||||||
|
const KDNode * __restrict node;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback for incoherent rays
|
||||||
|
*/
|
||||||
|
inline void rayIntersectPacketIncoherent(const Ray *rays, Intersection *its) const {
|
||||||
|
for (int i=0; i<4; i++) {
|
||||||
|
if (!rayIntersect(rays[i], its[i]))
|
||||||
|
its[i].t = std::numeric_limits<float>::infinity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Virtual destructor
|
/// Virtual destructor
|
||||||
virtual ~KDTree();
|
virtual ~KDTree();
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -204,7 +204,7 @@ void VPLShaderManager::setVPL(const VPL &vpl) {
|
||||||
|
|
||||||
m_minDist = nearClip + (farClip - nearClip) * m_clamping;
|
m_minDist = nearClip + (farClip - nearClip) * m_clamping;
|
||||||
|
|
||||||
nearClip = std::min(nearClip, (Float) 0.05f);
|
nearClip = std::min(nearClip, (Float) 0.001f);
|
||||||
farClip = std::min(farClip * 1.5f, m_maxClipDist);
|
farClip = std::min(farClip * 1.5f, m_maxClipDist);
|
||||||
|
|
||||||
if (farClip < 0 || nearClip >= farClip) {
|
if (farClip < 0 || nearClip >= farClip) {
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <mitsuba/render/kdtree.h>
|
#include <mitsuba/render/kdtree.h>
|
||||||
|
#include <mitsuba/core/statistics.h>
|
||||||
|
|
||||||
MTS_NAMESPACE_BEGIN
|
MTS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
@ -34,6 +35,9 @@ KDTree::~KDTree() {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static StatsCounter raysTraced("General", "Normal rays traced");
|
||||||
|
static StatsCounter shadowRaysTraced("General", "Shadow rays traced");
|
||||||
|
|
||||||
void KDTree::addShape(const Shape *shape) {
|
void KDTree::addShape(const Shape *shape) {
|
||||||
Assert(!isBuilt());
|
Assert(!isBuilt());
|
||||||
if (shape->isCompound())
|
if (shape->isCompound())
|
||||||
|
@ -99,6 +103,7 @@ bool KDTree::rayIntersect(const Ray &ray, Intersection &its) const {
|
||||||
its.t = std::numeric_limits<Float>::infinity();
|
its.t = std::numeric_limits<Float>::infinity();
|
||||||
Float mint, maxt;
|
Float mint, maxt;
|
||||||
|
|
||||||
|
++raysTraced;
|
||||||
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
||||||
/* Use an adaptive ray epsilon */
|
/* Use an adaptive ray epsilon */
|
||||||
Float rayMinT = ray.mint;
|
Float rayMinT = ray.mint;
|
||||||
|
@ -150,6 +155,7 @@ bool KDTree::rayIntersect(const Ray &ray) const {
|
||||||
uint8_t temp[MTS_KD_INTERSECTION_TEMP];
|
uint8_t temp[MTS_KD_INTERSECTION_TEMP];
|
||||||
Float mint, maxt, t = std::numeric_limits<Float>::infinity();
|
Float mint, maxt, t = std::numeric_limits<Float>::infinity();
|
||||||
|
|
||||||
|
++shadowRaysTraced;
|
||||||
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
||||||
/* Use an adaptive ray epsilon */
|
/* Use an adaptive ray epsilon */
|
||||||
Float rayMinT = ray.mint;
|
Float rayMinT = ray.mint;
|
||||||
|
@ -167,5 +173,203 @@ bool KDTree::rayIntersect(const Ray &ray) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(MTS_HAS_COHERENT_RT)
|
||||||
|
static StatsCounter coherentPackets("General", "Coherent ray packets");
|
||||||
|
static StatsCounter incoherentPackets("General", "Incoherent ray packets");
|
||||||
|
|
||||||
|
void KDTree::rayIntersectPacket(const RayPacket4 &packet,
|
||||||
|
const RayInterval4 &rayInterval, Intersection4 &its) const {
|
||||||
|
CoherentKDStackEntry MM_ALIGN16 stack[MTS_KD_MAXDEPTH];
|
||||||
|
RayInterval4 MM_ALIGN16 interval;
|
||||||
|
|
||||||
|
const KDNode * __restrict currNode = m_nodes;
|
||||||
|
int stackIndex = 0;
|
||||||
|
|
||||||
|
++coherentPackets;
|
||||||
|
|
||||||
|
/* First, intersect with the kd-tree AABB to determine
|
||||||
|
the intersection search intervals */
|
||||||
|
if (!m_aabb.rayIntersectPacket(packet, interval))
|
||||||
|
return;
|
||||||
|
|
||||||
|
interval.mint.ps = _mm_max_ps(interval.mint.ps, rayInterval.mint.ps);
|
||||||
|
interval.maxt.ps = _mm_min_ps(interval.maxt.ps, rayInterval.maxt.ps);
|
||||||
|
|
||||||
|
__m128 itsFound = _mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps),
|
||||||
|
masked = itsFound;
|
||||||
|
if (_mm_movemask_ps(itsFound) == 0xF)
|
||||||
|
return;
|
||||||
|
|
||||||
|
while (currNode != NULL) {
|
||||||
|
while (EXPECT_TAKEN(!currNode->isLeaf())) {
|
||||||
|
const uint8_t axis = currNode->getAxis();
|
||||||
|
|
||||||
|
/* Calculate the plane intersection */
|
||||||
|
const __m128
|
||||||
|
splitVal = _mm_set1_ps(currNode->getSplit()),
|
||||||
|
t = _mm_mul_ps(_mm_sub_ps(splitVal, packet.o[axis].ps),
|
||||||
|
packet.dRcp[axis].ps);
|
||||||
|
|
||||||
|
const __m128
|
||||||
|
startsAfterSplit = _mm_or_ps(masked,
|
||||||
|
_mm_cmplt_ps(t, interval.mint.ps)),
|
||||||
|
endsBeforeSplit = _mm_or_ps(masked,
|
||||||
|
_mm_cmpgt_ps(t, interval.maxt.ps));
|
||||||
|
|
||||||
|
currNode = currNode->getLeft() + packet.signs[axis][0];
|
||||||
|
|
||||||
|
/* The interval completely completely lies on one side
|
||||||
|
of the split plane */
|
||||||
|
if (EXPECT_TAKEN(_mm_movemask_ps(startsAfterSplit) == 15)) {
|
||||||
|
currNode = currNode->getSibling();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (EXPECT_TAKEN(_mm_movemask_ps(endsBeforeSplit) == 15))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
stack[stackIndex].node = currNode->getSibling();
|
||||||
|
stack[stackIndex].interval.maxt = interval.maxt;
|
||||||
|
stack[stackIndex].interval.mint.ps = _mm_max_ps(t, interval.mint.ps);
|
||||||
|
interval.maxt.ps = _mm_min_ps(t, interval.maxt.ps);
|
||||||
|
masked = _mm_or_ps(masked,
|
||||||
|
_mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps));
|
||||||
|
stackIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Arrived at a leaf node - intersect against primitives */
|
||||||
|
const index_type primStart = currNode->getPrimStart();
|
||||||
|
const index_type primEnd = currNode->getPrimEnd();
|
||||||
|
|
||||||
|
if (EXPECT_NOT_TAKEN(primStart != primEnd)) {
|
||||||
|
#ifdef MTS_USE_TRIACCEL4
|
||||||
|
const int count = m_packedTriangles[primStart].indirectionCount;
|
||||||
|
primStart = m_packedTriangles[primStart].indirectionIndex;
|
||||||
|
primEnd = primStart + count;
|
||||||
|
#endif
|
||||||
|
__m128
|
||||||
|
searchStart = _mm_max_ps(rayInterval.mint.ps,
|
||||||
|
_mm_mul_ps(interval.mint.ps, SSEConstants::om_eps.ps)),
|
||||||
|
searchEnd = _mm_min_ps(rayInterval.maxt.ps,
|
||||||
|
_mm_mul_ps(interval.maxt.ps, SSEConstants::op_eps.ps));
|
||||||
|
|
||||||
|
for (index_type entry=primStart; entry != primEnd; entry++) {
|
||||||
|
const TriAccel &kdTri = m_triAccel[m_indices[entry]];
|
||||||
|
if (EXPECT_TAKEN(kdTri.k != KNoTriangleFlag)) {
|
||||||
|
itsFound = _mm_or_ps(itsFound,
|
||||||
|
kdTri.rayIntersectPacket(packet, searchStart, searchEnd, masked, its));
|
||||||
|
} else {
|
||||||
|
/* Not a triangle - invoke the shape's intersection routine */
|
||||||
|
__m128 hasIts = m_shapes[kdTri.shapeIndex]->rayIntersectPacket(packet,
|
||||||
|
searchStart, searchEnd, masked, its);
|
||||||
|
itsFound = _mm_or_ps(itsFound, hasIts);
|
||||||
|
its.primIndex.pi = mux_epi32(pstoepi32(hasIts),
|
||||||
|
load1_epi32(kdTri.index), its.primIndex.pi);
|
||||||
|
its.shapeIndex.pi = mux_epi32(pstoepi32(hasIts),
|
||||||
|
load1_epi32(kdTri.shapeIndex), its.shapeIndex.pi);
|
||||||
|
}
|
||||||
|
searchEnd = _mm_min_ps(searchEnd, its.t.ps);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Abort if the tree has been traversed or if
|
||||||
|
intersections have been found for all four rays */
|
||||||
|
if (_mm_movemask_ps(itsFound) == 0xF || --stackIndex < 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Pop from the stack */
|
||||||
|
currNode = stack[stackIndex].node;
|
||||||
|
interval = stack[stackIndex].interval;
|
||||||
|
masked = _mm_or_ps(itsFound,
|
||||||
|
_mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const {
|
||||||
|
RayPacket4 MM_ALIGN16 packet;
|
||||||
|
RayInterval4 MM_ALIGN16 interval(rays);
|
||||||
|
Intersection4 MM_ALIGN16 its4;
|
||||||
|
|
||||||
|
if (packet.load(rays)) {
|
||||||
|
rayIntersectPacket(packet, interval, its4);
|
||||||
|
|
||||||
|
for (int i=0; i<4; i++) {
|
||||||
|
Intersection &it = its[i];
|
||||||
|
|
||||||
|
it.t = its4.t.f[i];
|
||||||
|
|
||||||
|
if (it.t != std::numeric_limits<float>::infinity()) {
|
||||||
|
const uint32_t shapeIndex = its4.shapeIndex.i[i];
|
||||||
|
const uint32_t primIndex = its4.primIndex.i[i];
|
||||||
|
const Shape *shape = m_shapes[shapeIndex];
|
||||||
|
|
||||||
|
if (EXPECT_TAKEN(primIndex != KNoTriangleFlag)) {
|
||||||
|
const TriMesh *triMesh = static_cast<const TriMesh *>(m_shapes[shapeIndex]);
|
||||||
|
const Triangle &t = triMesh->getTriangles()[primIndex];
|
||||||
|
const Vertex &v0 = triMesh->getVertexBuffer()[t.idx[0]];
|
||||||
|
const Vertex &v1 = triMesh->getVertexBuffer()[t.idx[1]];
|
||||||
|
const Vertex &v2 = triMesh->getVertexBuffer()[t.idx[2]];
|
||||||
|
const Vector b(1 - its4.u.f[i] - its4.v.f[i],
|
||||||
|
its4.u.f[i], its4.v.f[i]);
|
||||||
|
const Vector &rayD = rays[i].d;
|
||||||
|
it.p = rays[i].o + it.t * rayD;
|
||||||
|
|
||||||
|
Normal faceNormal(normalize(cross(v1.p-v0.p, v2.p-v0.p)));
|
||||||
|
it.geoFrame.n = faceNormal;
|
||||||
|
coordinateSystem(it.geoFrame.n, it.geoFrame.s, it.geoFrame.t);
|
||||||
|
|
||||||
|
it.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z;
|
||||||
|
it.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z;
|
||||||
|
it.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z;
|
||||||
|
it.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z);
|
||||||
|
|
||||||
|
it.shFrame.s = normalize(it.dpdu - it.shFrame.n
|
||||||
|
* dot(it.shFrame.n, it.dpdu));
|
||||||
|
it.geoFrame.t = cross(it.shFrame.n, it.shFrame.s);
|
||||||
|
it.wi = it.toLocal(-rayD);
|
||||||
|
it.hasUVPartials = false;
|
||||||
|
it.shape = shape;
|
||||||
|
} else {
|
||||||
|
/* Non-triangle shape: intersect again to fill in details */
|
||||||
|
shape->rayIntersect(rays[i], it);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
rayIntersectPacketIncoherent(rays, its);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void KDTree::rayIntersectPacketIncoherent(const RayPacket4 &packet,
|
||||||
|
const RayInterval4 &rayInterval, Intersection4 &its4) const {
|
||||||
|
uint8_t temp[MTS_KD_INTERSECTION_TEMP];
|
||||||
|
|
||||||
|
++incoherentPackets;
|
||||||
|
for (int i=0; i<4; i++) {
|
||||||
|
Ray ray;
|
||||||
|
for (int axis=0; axis<3; axis++) {
|
||||||
|
ray.o[axis] = packet.o[axis].f[i];
|
||||||
|
ray.d[axis] = packet.d[axis].f[i];
|
||||||
|
ray.dRcp[axis] = packet.dRcp[axis].f[i];
|
||||||
|
}
|
||||||
|
ray.mint = rayInterval.mint.f[i];
|
||||||
|
ray.maxt = rayInterval.maxt.f[i];
|
||||||
|
if (ray.mint < ray.maxt && rayIntersectHavran<false>(ray, ray.mint, ray.maxt, its4.t.f[i], temp)) {
|
||||||
|
const IntersectionCache *cache = reinterpret_cast<const IntersectionCache *>(temp);
|
||||||
|
its4.u.f[i] = cache->u;
|
||||||
|
its4.v.f[i] = cache->u;
|
||||||
|
its4.shapeIndex.i[i] = cache->shapeIndex;
|
||||||
|
its4.primIndex.i[i] = cache->index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else // !MTS_HAS_COHERENT_RT
|
||||||
|
void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const {
|
||||||
|
rayIntersectPacketIncoherent(rays, its);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
MTS_IMPLEMENT_CLASS(KDTree, false, GenericKDTree)
|
MTS_IMPLEMENT_CLASS(KDTree, false, GenericKDTree)
|
||||||
MTS_NAMESPACE_END
|
MTS_NAMESPACE_END
|
||||||
|
|
Loading…
Reference in New Issue