copied over coherent RT code from the previous KDTree impl.
parent
01aac828a9
commit
a1b8d2266d
|
@ -530,7 +530,7 @@ public:
|
|||
if (m_indices)
|
||||
delete[] m_indices;
|
||||
if (m_nodes)
|
||||
freeAligned(m_nodes);
|
||||
freeAligned(m_nodes-1); // undo alignment shift
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -840,8 +840,9 @@ protected:
|
|||
memset(primBuckets, 0, sizeof(size_type)*primBucketCount);
|
||||
m_nodeCount = ctx.innerNodeCount + ctx.leafNodeCount;
|
||||
|
||||
// +1 shift is for alignment purposes (see KDNode::getSibling)
|
||||
m_nodes = static_cast<KDNode *> (allocAligned(
|
||||
sizeof(KDNode) * m_nodeCount));
|
||||
sizeof(KDNode) * (m_nodeCount+1)))+1;
|
||||
m_indices = new index_type[ctx.primIndexCount];
|
||||
|
||||
stack.push(boost::make_tuple(prelimRoot, &m_nodes[nodePtr++], &ctx, m_aabb));
|
||||
|
@ -1278,6 +1279,11 @@ protected:
|
|||
((inner.combined & EInnerOffsetMask) >> 2);
|
||||
}
|
||||
|
||||
/// Return the sibling of the current node
|
||||
FINLINE const KDNode * __restrict getSibling() const {
|
||||
return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
|
||||
}
|
||||
|
||||
/// Return the left child (assuming that this is an interior node)
|
||||
FINLINE KDNode * __restrict getLeft() {
|
||||
return this +
|
||||
|
|
|
@ -23,7 +23,11 @@
|
|||
#include <mitsuba/render/gkdtree.h>
|
||||
#include <mitsuba/render/triaccel.h>
|
||||
|
||||
#define MTS_KD_CONSERVE_MEMORY 1
|
||||
#if defined(MTS_KD_CONSERVE_MEMORY)
|
||||
#if defined(MTS_HAS_COHERENT_RT)
|
||||
#error MTS_KD_CONSERVE_MEMORY & MTS_HAS_COHERENT_RT are incompatible
|
||||
#endif
|
||||
#endif
|
||||
|
||||
MTS_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -80,6 +84,31 @@ public:
|
|||
*/
|
||||
bool rayIntersect(const Ray &ray) const;
|
||||
|
||||
/**
|
||||
* \brief Intersect four rays with the stored triangle meshes while making
|
||||
* use of ray coherence to do this very efficiently.
|
||||
*
|
||||
* If the coherent ray tracing compile-time flag is disabled, this function
|
||||
* simply does four separate mono-ray traversals.
|
||||
*/
|
||||
void rayIntersectPacket(const Ray *rays, Intersection *its) const;
|
||||
|
||||
#if defined(MTS_HAS_COHERENT_RT)
|
||||
/**
|
||||
* \brief Intersect four rays with the stored triangle meshes while making
|
||||
* use of ray coherence to do this very efficiently. Requires SSE.
|
||||
*/
|
||||
void rayIntersectPacket(const RayPacket4 &packet,
|
||||
const RayInterval4 &interval, Intersection4 &its) const;
|
||||
|
||||
/**
|
||||
* \brief Fallback for incoherent rays
|
||||
* \sa rayIntesectPacket
|
||||
*/
|
||||
void rayIntersectPacketIncoherent(const RayPacket4 &packet,
|
||||
const RayInterval4 &interval, Intersection4 &its) const;
|
||||
#endif
|
||||
|
||||
MTS_DECLARE_CLASS()
|
||||
protected:
|
||||
/**
|
||||
|
@ -178,6 +207,26 @@ protected:
|
|||
return ENo;
|
||||
}
|
||||
|
||||
#if defined(MTS_HAS_COHERENT_RT)
|
||||
/// Ray traversal stack entry for uncoherent ray tracing
|
||||
struct CoherentKDStackEntry {
|
||||
/* Current ray interval */
|
||||
RayInterval4 MM_ALIGN16 interval;
|
||||
/* Pointer to the far child */
|
||||
const KDNode * __restrict node;
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Fallback for incoherent rays
|
||||
*/
|
||||
inline void rayIntersectPacketIncoherent(const Ray *rays, Intersection *its) const {
|
||||
for (int i=0; i<4; i++) {
|
||||
if (!rayIntersect(rays[i], its[i]))
|
||||
its[i].t = std::numeric_limits<float>::infinity();
|
||||
}
|
||||
}
|
||||
|
||||
/// Virtual destructor
|
||||
virtual ~KDTree();
|
||||
private:
|
||||
|
|
|
@ -204,7 +204,7 @@ void VPLShaderManager::setVPL(const VPL &vpl) {
|
|||
|
||||
m_minDist = nearClip + (farClip - nearClip) * m_clamping;
|
||||
|
||||
nearClip = std::min(nearClip, (Float) 0.05f);
|
||||
nearClip = std::min(nearClip, (Float) 0.001f);
|
||||
farClip = std::min(farClip * 1.5f, m_maxClipDist);
|
||||
|
||||
if (farClip < 0 || nearClip >= farClip) {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
|
||||
#include <mitsuba/render/kdtree.h>
|
||||
#include <mitsuba/core/statistics.h>
|
||||
|
||||
MTS_NAMESPACE_BEGIN
|
||||
|
||||
|
@ -34,6 +35,9 @@ KDTree::~KDTree() {
|
|||
#endif
|
||||
}
|
||||
|
||||
static StatsCounter raysTraced("General", "Normal rays traced");
|
||||
static StatsCounter shadowRaysTraced("General", "Shadow rays traced");
|
||||
|
||||
void KDTree::addShape(const Shape *shape) {
|
||||
Assert(!isBuilt());
|
||||
if (shape->isCompound())
|
||||
|
@ -99,6 +103,7 @@ bool KDTree::rayIntersect(const Ray &ray, Intersection &its) const {
|
|||
its.t = std::numeric_limits<Float>::infinity();
|
||||
Float mint, maxt;
|
||||
|
||||
++raysTraced;
|
||||
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
||||
/* Use an adaptive ray epsilon */
|
||||
Float rayMinT = ray.mint;
|
||||
|
@ -150,6 +155,7 @@ bool KDTree::rayIntersect(const Ray &ray) const {
|
|||
uint8_t temp[MTS_KD_INTERSECTION_TEMP];
|
||||
Float mint, maxt, t = std::numeric_limits<Float>::infinity();
|
||||
|
||||
++shadowRaysTraced;
|
||||
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
||||
/* Use an adaptive ray epsilon */
|
||||
Float rayMinT = ray.mint;
|
||||
|
@ -167,5 +173,203 @@ bool KDTree::rayIntersect(const Ray &ray) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
#if defined(MTS_HAS_COHERENT_RT)
|
||||
static StatsCounter coherentPackets("General", "Coherent ray packets");
|
||||
static StatsCounter incoherentPackets("General", "Incoherent ray packets");
|
||||
|
||||
void KDTree::rayIntersectPacket(const RayPacket4 &packet,
|
||||
const RayInterval4 &rayInterval, Intersection4 &its) const {
|
||||
CoherentKDStackEntry MM_ALIGN16 stack[MTS_KD_MAXDEPTH];
|
||||
RayInterval4 MM_ALIGN16 interval;
|
||||
|
||||
const KDNode * __restrict currNode = m_nodes;
|
||||
int stackIndex = 0;
|
||||
|
||||
++coherentPackets;
|
||||
|
||||
/* First, intersect with the kd-tree AABB to determine
|
||||
the intersection search intervals */
|
||||
if (!m_aabb.rayIntersectPacket(packet, interval))
|
||||
return;
|
||||
|
||||
interval.mint.ps = _mm_max_ps(interval.mint.ps, rayInterval.mint.ps);
|
||||
interval.maxt.ps = _mm_min_ps(interval.maxt.ps, rayInterval.maxt.ps);
|
||||
|
||||
__m128 itsFound = _mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps),
|
||||
masked = itsFound;
|
||||
if (_mm_movemask_ps(itsFound) == 0xF)
|
||||
return;
|
||||
|
||||
while (currNode != NULL) {
|
||||
while (EXPECT_TAKEN(!currNode->isLeaf())) {
|
||||
const uint8_t axis = currNode->getAxis();
|
||||
|
||||
/* Calculate the plane intersection */
|
||||
const __m128
|
||||
splitVal = _mm_set1_ps(currNode->getSplit()),
|
||||
t = _mm_mul_ps(_mm_sub_ps(splitVal, packet.o[axis].ps),
|
||||
packet.dRcp[axis].ps);
|
||||
|
||||
const __m128
|
||||
startsAfterSplit = _mm_or_ps(masked,
|
||||
_mm_cmplt_ps(t, interval.mint.ps)),
|
||||
endsBeforeSplit = _mm_or_ps(masked,
|
||||
_mm_cmpgt_ps(t, interval.maxt.ps));
|
||||
|
||||
currNode = currNode->getLeft() + packet.signs[axis][0];
|
||||
|
||||
/* The interval completely completely lies on one side
|
||||
of the split plane */
|
||||
if (EXPECT_TAKEN(_mm_movemask_ps(startsAfterSplit) == 15)) {
|
||||
currNode = currNode->getSibling();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (EXPECT_TAKEN(_mm_movemask_ps(endsBeforeSplit) == 15))
|
||||
continue;
|
||||
|
||||
stack[stackIndex].node = currNode->getSibling();
|
||||
stack[stackIndex].interval.maxt = interval.maxt;
|
||||
stack[stackIndex].interval.mint.ps = _mm_max_ps(t, interval.mint.ps);
|
||||
interval.maxt.ps = _mm_min_ps(t, interval.maxt.ps);
|
||||
masked = _mm_or_ps(masked,
|
||||
_mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps));
|
||||
stackIndex++;
|
||||
}
|
||||
|
||||
/* Arrived at a leaf node - intersect against primitives */
|
||||
const index_type primStart = currNode->getPrimStart();
|
||||
const index_type primEnd = currNode->getPrimEnd();
|
||||
|
||||
if (EXPECT_NOT_TAKEN(primStart != primEnd)) {
|
||||
#ifdef MTS_USE_TRIACCEL4
|
||||
const int count = m_packedTriangles[primStart].indirectionCount;
|
||||
primStart = m_packedTriangles[primStart].indirectionIndex;
|
||||
primEnd = primStart + count;
|
||||
#endif
|
||||
__m128
|
||||
searchStart = _mm_max_ps(rayInterval.mint.ps,
|
||||
_mm_mul_ps(interval.mint.ps, SSEConstants::om_eps.ps)),
|
||||
searchEnd = _mm_min_ps(rayInterval.maxt.ps,
|
||||
_mm_mul_ps(interval.maxt.ps, SSEConstants::op_eps.ps));
|
||||
|
||||
for (index_type entry=primStart; entry != primEnd; entry++) {
|
||||
const TriAccel &kdTri = m_triAccel[m_indices[entry]];
|
||||
if (EXPECT_TAKEN(kdTri.k != KNoTriangleFlag)) {
|
||||
itsFound = _mm_or_ps(itsFound,
|
||||
kdTri.rayIntersectPacket(packet, searchStart, searchEnd, masked, its));
|
||||
} else {
|
||||
/* Not a triangle - invoke the shape's intersection routine */
|
||||
__m128 hasIts = m_shapes[kdTri.shapeIndex]->rayIntersectPacket(packet,
|
||||
searchStart, searchEnd, masked, its);
|
||||
itsFound = _mm_or_ps(itsFound, hasIts);
|
||||
its.primIndex.pi = mux_epi32(pstoepi32(hasIts),
|
||||
load1_epi32(kdTri.index), its.primIndex.pi);
|
||||
its.shapeIndex.pi = mux_epi32(pstoepi32(hasIts),
|
||||
load1_epi32(kdTri.shapeIndex), its.shapeIndex.pi);
|
||||
}
|
||||
searchEnd = _mm_min_ps(searchEnd, its.t.ps);
|
||||
}
|
||||
}
|
||||
|
||||
/* Abort if the tree has been traversed or if
|
||||
intersections have been found for all four rays */
|
||||
if (_mm_movemask_ps(itsFound) == 0xF || --stackIndex < 0)
|
||||
break;
|
||||
|
||||
/* Pop from the stack */
|
||||
currNode = stack[stackIndex].node;
|
||||
interval = stack[stackIndex].interval;
|
||||
masked = _mm_or_ps(itsFound,
|
||||
_mm_cmpgt_ps(interval.mint.ps, interval.maxt.ps));
|
||||
}
|
||||
}
|
||||
|
||||
void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const {
|
||||
RayPacket4 MM_ALIGN16 packet;
|
||||
RayInterval4 MM_ALIGN16 interval(rays);
|
||||
Intersection4 MM_ALIGN16 its4;
|
||||
|
||||
if (packet.load(rays)) {
|
||||
rayIntersectPacket(packet, interval, its4);
|
||||
|
||||
for (int i=0; i<4; i++) {
|
||||
Intersection &it = its[i];
|
||||
|
||||
it.t = its4.t.f[i];
|
||||
|
||||
if (it.t != std::numeric_limits<float>::infinity()) {
|
||||
const uint32_t shapeIndex = its4.shapeIndex.i[i];
|
||||
const uint32_t primIndex = its4.primIndex.i[i];
|
||||
const Shape *shape = m_shapes[shapeIndex];
|
||||
|
||||
if (EXPECT_TAKEN(primIndex != KNoTriangleFlag)) {
|
||||
const TriMesh *triMesh = static_cast<const TriMesh *>(m_shapes[shapeIndex]);
|
||||
const Triangle &t = triMesh->getTriangles()[primIndex];
|
||||
const Vertex &v0 = triMesh->getVertexBuffer()[t.idx[0]];
|
||||
const Vertex &v1 = triMesh->getVertexBuffer()[t.idx[1]];
|
||||
const Vertex &v2 = triMesh->getVertexBuffer()[t.idx[2]];
|
||||
const Vector b(1 - its4.u.f[i] - its4.v.f[i],
|
||||
its4.u.f[i], its4.v.f[i]);
|
||||
const Vector &rayD = rays[i].d;
|
||||
it.p = rays[i].o + it.t * rayD;
|
||||
|
||||
Normal faceNormal(normalize(cross(v1.p-v0.p, v2.p-v0.p)));
|
||||
it.geoFrame.n = faceNormal;
|
||||
coordinateSystem(it.geoFrame.n, it.geoFrame.s, it.geoFrame.t);
|
||||
|
||||
it.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z;
|
||||
it.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z;
|
||||
it.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z;
|
||||
it.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z);
|
||||
|
||||
it.shFrame.s = normalize(it.dpdu - it.shFrame.n
|
||||
* dot(it.shFrame.n, it.dpdu));
|
||||
it.geoFrame.t = cross(it.shFrame.n, it.shFrame.s);
|
||||
it.wi = it.toLocal(-rayD);
|
||||
it.hasUVPartials = false;
|
||||
it.shape = shape;
|
||||
} else {
|
||||
/* Non-triangle shape: intersect again to fill in details */
|
||||
shape->rayIntersect(rays[i], it);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rayIntersectPacketIncoherent(rays, its);
|
||||
}
|
||||
}
|
||||
|
||||
void KDTree::rayIntersectPacketIncoherent(const RayPacket4 &packet,
|
||||
const RayInterval4 &rayInterval, Intersection4 &its4) const {
|
||||
uint8_t temp[MTS_KD_INTERSECTION_TEMP];
|
||||
|
||||
++incoherentPackets;
|
||||
for (int i=0; i<4; i++) {
|
||||
Ray ray;
|
||||
for (int axis=0; axis<3; axis++) {
|
||||
ray.o[axis] = packet.o[axis].f[i];
|
||||
ray.d[axis] = packet.d[axis].f[i];
|
||||
ray.dRcp[axis] = packet.dRcp[axis].f[i];
|
||||
}
|
||||
ray.mint = rayInterval.mint.f[i];
|
||||
ray.maxt = rayInterval.maxt.f[i];
|
||||
if (ray.mint < ray.maxt && rayIntersectHavran<false>(ray, ray.mint, ray.maxt, its4.t.f[i], temp)) {
|
||||
const IntersectionCache *cache = reinterpret_cast<const IntersectionCache *>(temp);
|
||||
its4.u.f[i] = cache->u;
|
||||
its4.v.f[i] = cache->u;
|
||||
its4.shapeIndex.i[i] = cache->shapeIndex;
|
||||
its4.primIndex.i[i] = cache->index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else // !MTS_HAS_COHERENT_RT
|
||||
void KDTree::rayIntersectPacket(const Ray *rays, Intersection *its) const {
|
||||
rayIntersectPacketIncoherent(rays, its);
|
||||
}
|
||||
#endif
|
||||
|
||||
MTS_IMPLEMENT_CLASS(KDTree, false, GenericKDTree)
|
||||
MTS_NAMESPACE_END
|
||||
|
|
Loading…
Reference in New Issue