518 lines
16 KiB
C++
518 lines
16 KiB
C++
/*
|
|
This file is part of Mitsuba, a physically based rendering system.
|
|
|
|
Copyright (c) 2007-2010 by Wenzel Jakob and others.
|
|
|
|
Mitsuba is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License Version 3
|
|
as published by the Free Software Foundation.
|
|
|
|
Mitsuba is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#if !defined(__KD_TREE_H)
|
|
#define __KD_TREE_H
|
|
|
|
#include <mitsuba/render/shape.h>
|
|
|
|
/**
|
|
* First, some default configuration definitions:
|
|
*
|
|
* If SSE is available, use the quad-packed TriAccel4 intersection method.
|
|
* Otherwise, fall back to the scalar TriAccel implementation. To use the
|
|
* scalar Moeller-Trumbore intersection algorithm (slower), remove this
|
|
* whole following block:
|
|
*/
|
|
#ifdef MTS_SSE
|
|
#define MTS_USE_TRIACCEL4 1
|
|
#define MTS_USE_TRIACCEL 1
|
|
#else
|
|
#define MTS_USE_TRIACCEL 1
|
|
#endif
|
|
|
|
#if defined(MTS_HAS_COHERENT_RT)
|
|
/* Coherent ray tracing needs the plain TriAccel data stucture.
|
|
Note that both TriAccel and TriAccel4 can be pre-computed
|
|
at the same time in order to have a performance gain with
|
|
both coherent and non-coherent methods (at the cost of higher
|
|
memory usage) */
|
|
#define MTS_USE_TRIACCEL 1
|
|
#if !defined(MTS_SSE)
|
|
#error Coherent ray tracing requires SSE
|
|
#endif
|
|
#endif
|
|
|
|
#if !defined(MTS_USE_TRIACCEL4) && !defined(MTS_USE_TRIACCEL)
|
|
/* Switch to Moeller-Trumbore if neither TriAccel4 nor TriAccel is selected */
|
|
#define MTS_USE_MT 1
|
|
#endif
|
|
|
|
#include <mitsuba/render/triaccel.h>
|
|
|
|
/**
|
|
* Pre-defined max. stack size for the ray traversal algorithm
|
|
*/
|
|
#define MTS_KD_MAXDEPTH 35
|
|
|
|
MTS_NAMESPACE_BEGIN
|
|
|
|
/**
|
|
* SAH KD-tree acceleration data structure for fast ray-triangle intersections.
|
|
* Implements the construction algorithm for 'perfect split' trees as outlined
|
|
* in the paper "On Bulding fast kd-Trees for Ray Tracing, and on doing that in
|
|
* O(N log N)" by Ingo Wald and Vlastimil Havran. Non-triangle shapes are also
|
|
* supported, but most optimizations here target large triangle meshes.
|
|
*
|
|
* This class offers a choice of several different triangle intersection algorithms:
|
|
* By default, intersections are computed using the "TriAccel" projection with
|
|
* pre-computation method from Ingo Wald's PhD thesis "Realtime Ray Tracing
|
|
* and Interactive Global Illumination". When SSE is available on the target system,
|
|
* leaf triangles are stored in an explicit, packed format so that up to four
|
|
* intersections can be performed simultaneously. While about 15% faster, this also
|
|
* tends to duplicate a lot of geometry, which results in high memory usage.
|
|
* Therefore, this behavior can optionally be turned off with a #define. The third
|
|
* choice is the Moeller-Trumbore intersection test, which requires the least amount
|
|
* of memory, but is also the slowest.
|
|
*
|
|
* When SSE is enabled, packets of 4 rays can efficiently be traced to make
|
|
* use of any ray coherence. This requires "TriAccel" or "TriAccel4" to be
|
|
* enabled.
|
|
*
|
|
* During the kd-tree construction, this class uses a technique named
|
|
* "primitive clipping" to significantly improve the quality of the resulting
|
|
* trees. However, the involved Sutherland-Hodgman iterations are expensive
|
|
* and can lead to long construction times. The setClip method can be used
|
|
* to deactivate primitive clipping at the cost of slower intersections.
|
|
*
|
|
* Finally, this class also uses an optimized ray traversal algorithm
|
|
* (TA^B_{rec}), which is explained in Vlastimil Havran's PhD thesis
|
|
* "Heuristic Ray Shooting Algorithms".
|
|
*
|
|
* @author Wenzel Jakob
|
|
*/
|
|
class MTS_EXPORT_RENDER KDTree : public Object {
|
|
public:
|
|
/// Construct a new kd-tree in an unbuilt state
|
|
KDTree();
|
|
|
|
/// Add geometry to the kd-tree
|
|
void addShape(const Shape *shape);
|
|
|
|
/// Return the list of stored shapes
|
|
inline const std::vector<const Shape *> &getShapes() const { return m_shapes; }
|
|
|
|
/// Set the relative cost of an intersection operation
|
|
inline void setIntersectionCost(Float pCost) { m_intersectionCost = pCost; }
|
|
|
|
/// Return the relative cost of an intersection operation
|
|
inline Float getIntersectionCost() const { return m_intersectionCost; }
|
|
|
|
/// Set the relative cost of a traversal operation
|
|
inline void setTraversalCost(Float pCost) { m_traversalCost = pCost; }
|
|
|
|
/// Return the relative cost of a traversal operation
|
|
inline Float getTraversalCost() const { return m_traversalCost; }
|
|
|
|
/// Set a bonus factor for cutting away empty space
|
|
inline void setEmptyBonus(Float pCost) { m_emptyBonus = pCost; }
|
|
|
|
/// Return the bonus factor for cutting away empty space
|
|
inline Float getEmptyBonus() const { return m_emptyBonus; }
|
|
|
|
/// Set the min. number of primitives, which will never be split.
|
|
inline void setStopPrims(int pCost) { m_stopPrims = pCost; }
|
|
|
|
/// Return the min. number of primitives, which will never be split.
|
|
inline int getStopPrims() const { return m_stopPrims; }
|
|
|
|
/// Enable or disable primitive clipping
|
|
inline void setClip(bool pClip) { m_clip = pClip; }
|
|
|
|
/// Return whether primitive clipping is enabled
|
|
inline bool getClip() const { return m_clip; }
|
|
|
|
/// Has the kd-tree been built?
|
|
inline bool isBuilt() const { return m_built; }
|
|
|
|
/// Return the axis-aligned bounding box containing all primitives
|
|
inline const AABB &getAABB() const { return m_rootBounds; }
|
|
|
|
/// Return the bounding sphere containing all primitives
|
|
inline const BSphere &getBSphere() const { return m_bsphere; }
|
|
|
|
/// Build the kd-tree
|
|
void build();
|
|
|
|
/**
|
|
* Intersect a ray with the stored triangle meshes and only
|
|
* check for intersections. This is the fastest intersection test.
|
|
*/
|
|
bool rayIntersect(const Ray &ray) const;
|
|
|
|
/**
|
|
* Intersect a ray with the stored triangle meshes and return
|
|
* a detailed intersection information record
|
|
*/
|
|
bool rayIntersect(const Ray &ray, Intersection &its) const;
|
|
|
|
/**
|
|
* Intersect four rays with the stored triangle meshes while making
|
|
* use of ray coherence to do this very efficiently. If the coherent
|
|
* ray tracing #define is missing, this function simply does four
|
|
* separate mono-ray traversals.
|
|
*/
|
|
void rayIntersectPacket(const Ray *rays, Intersection *its) const;
|
|
|
|
#if defined(MTS_HAS_COHERENT_RT)
|
|
/**
|
|
* Intersect four rays with the stored triangle meshes while making
|
|
* use of ray coherence to do this very efficiently. Requires SSE.
|
|
*/
|
|
void rayIntersectPacket(const RayPacket4 &packet,
|
|
const RayInterval4 &interval, Intersection4 &its) const;
|
|
|
|
/**
|
|
* Fallback for incoherent rays
|
|
*/
|
|
void rayIntersectPacketIncoherent(const RayPacket4 &packet,
|
|
const RayInterval4 &interval, Intersection4 &its) const;
|
|
#endif
|
|
|
|
MTS_DECLARE_CLASS()
|
|
protected:
|
|
/// Virtual destructor
|
|
virtual ~KDTree();
|
|
|
|
/// \cond
|
|
|
|
/// KD-tree node in 8 bytes
|
|
struct KDNode {
|
|
union {
|
|
/* Inner node */
|
|
struct {
|
|
/* Bit layout:
|
|
31 : False (inner node)
|
|
30-3 : Offset to the right child
|
|
3-0 : Split axis
|
|
*/
|
|
uint32_t combined;
|
|
|
|
/// Split plane coordinate
|
|
float split;
|
|
} inner;
|
|
|
|
/* Leaf node */
|
|
struct {
|
|
/* Bit layout:
|
|
31 : True (leaf node)
|
|
30-0 : Offset to the node's triangle list
|
|
*/
|
|
uint32_t combined;
|
|
|
|
/// End offset of the triangle list
|
|
uint32_t end;
|
|
} leaf;
|
|
};
|
|
|
|
enum EMask {
|
|
ETypeMask = 1 << 31,
|
|
ELeafOffsetMask = ~ETypeMask,
|
|
EInnerAxisMask = 3,
|
|
EInnerOffsetMask = ~EInnerAxisMask
|
|
};
|
|
|
|
inline void setLeaf(unsigned int offset, unsigned int numPrims) {
|
|
leaf.combined = ETypeMask | offset;
|
|
leaf.end = offset + numPrims;
|
|
}
|
|
|
|
inline void setInner(int axis, unsigned int offset, Float split) {
|
|
inner.combined = axis | (offset << 2);
|
|
inner.split = (float) split;
|
|
}
|
|
|
|
FINLINE bool isLeaf() const {
|
|
return leaf.combined & ETypeMask;
|
|
}
|
|
|
|
FINLINE uint32_t getPrimStart() const {
|
|
return leaf.combined & ELeafOffsetMask;
|
|
}
|
|
|
|
FINLINE uint32_t getPrimEnd() const {
|
|
return leaf.end;
|
|
}
|
|
|
|
FINLINE const KDNode * __restrict getLeft() const {
|
|
return this +
|
|
((inner.combined & EInnerOffsetMask) >> 2);
|
|
}
|
|
|
|
FINLINE const KDNode * __restrict getOtherChild() const {
|
|
return (const KDNode *) ((ptrdiff_t) this ^ (ptrdiff_t) 8);
|
|
}
|
|
|
|
FINLINE const KDNode * __restrict getRight() const {
|
|
return getLeft() + 1;
|
|
}
|
|
|
|
inline float getSplit() const {
|
|
return inner.split;
|
|
}
|
|
|
|
inline int getAxis() const {
|
|
return inner.combined & EInnerAxisMask;
|
|
}
|
|
};
|
|
|
|
/// Primitive classification during tree-construction
|
|
enum EClassificationResult {
|
|
EBothSides = 1,
|
|
ELeftSide = 2,
|
|
ERightSide = 3,
|
|
EBothSidesProcessed = 4
|
|
};
|
|
|
|
#if defined(MTS_USE_TRIACCEL)
|
|
typedef TriAccel KDTriangle;
|
|
#else
|
|
struct KDTriangle {
|
|
uint32_t k;
|
|
uint32_t index;
|
|
uint32_t shapeIndex;
|
|
};
|
|
#endif
|
|
|
|
/// AABB edge event data structure
|
|
struct EdgeEvent {
|
|
/// Possible event types
|
|
enum EEventType {
|
|
EEdgeEnd = 0,
|
|
EEdgePlanar = 1,
|
|
EEdgeStart = 2
|
|
};
|
|
|
|
/// Dummy constructor
|
|
inline EdgeEvent() { }
|
|
|
|
/// Create a new edge event
|
|
inline EdgeEvent(uint8_t type, Float t, int index)
|
|
: t(t), index(index), type(type) {
|
|
}
|
|
|
|
/* Plane position */
|
|
Float t;
|
|
/* Triangle index */
|
|
int index;
|
|
/* Event type: end/planar/start */
|
|
uint8_t type;
|
|
};
|
|
|
|
typedef std::vector<EdgeEvent> EdgeEventVec;
|
|
typedef EdgeEventVec EdgeEventVec3[3];
|
|
|
|
/// Edge event comparison functor
|
|
struct EdgeEventSorter : public std::binary_function<EdgeEvent, EdgeEvent, bool> {
|
|
inline bool operator()(const EdgeEvent &a, const EdgeEvent &b) const {
|
|
if (a.t != b.t)
|
|
return a.t < b.t;
|
|
return a.type < b.type;
|
|
}
|
|
};
|
|
|
|
/// Score for a split candidate
|
|
struct Score {
|
|
enum EPlanarSide {
|
|
EPlanarLeft = 0,
|
|
EPlanarRight = 1
|
|
};
|
|
|
|
/// Create an upper bound score
|
|
inline Score() : score(std::numeric_limits<Float>::infinity()) {
|
|
}
|
|
|
|
/// Create a new store record
|
|
inline Score(Float score, Float t, int nLeft, int nRight,
|
|
uint8_t axis, uint8_t planarSide) : score(score), t(t),
|
|
nLeft(nLeft), nRight(nRight), axis(axis), planarSide(planarSide) {
|
|
}
|
|
|
|
/// Return a string representation
|
|
std::string toString() const {
|
|
std::ostringstream oss;
|
|
oss << "Score[value=" << score << ", t=" << t
|
|
<< ", axis=" << (int) axis << ", planarSide="
|
|
<< (planarSide == EPlanarLeft ? "left" : "right")
|
|
<< ", nLeft=" << nLeft << ", nRight=" << nRight
|
|
<< "]";
|
|
return oss.str();
|
|
}
|
|
|
|
/// Numerical score value
|
|
Float score;
|
|
/// Split position
|
|
Float t;
|
|
/// Primitive counts on the left and right side
|
|
int nLeft, nRight;
|
|
/// Split axis
|
|
uint8_t axis;
|
|
/// Should planar prims be placed left or right?
|
|
uint8_t planarSide;
|
|
/// Score comparison operator
|
|
inline bool operator<(const Score &b) const {
|
|
return score < b.score;
|
|
}
|
|
};
|
|
|
|
/// Surface area heuristic (SAH) cost function
|
|
inline Float SAH(Float prLeft, Float prRight, int numLeft, int numRight) {
|
|
Float cost = m_traversalCost + m_intersectionCost
|
|
* (prLeft * numLeft + prRight * numRight);
|
|
|
|
/* Favor splits, which cut off empty regions of space */
|
|
if (numLeft == 0 || numRight == 0)
|
|
cost *= m_emptyBonus;
|
|
return cost;
|
|
}
|
|
|
|
/// Surface area heuristic
|
|
inline Score SAH(int axis, Float invArea, AABB aabb, Float t, int numLeft,
|
|
int numRight, int numPlanar) {
|
|
/* Generate the left+right node bounding boxes */
|
|
AABB leftBounds = aabb, rightBounds = aabb;
|
|
leftBounds.max[axis] = t; rightBounds.min[axis] = t;
|
|
|
|
if (std::abs(aabb.min[axis]-t) < Epsilon ||
|
|
std::abs(aabb.max[axis]-t) < Epsilon) {
|
|
/* Do not allow tiny splits */
|
|
return Score(std::numeric_limits<Float>::infinity(),
|
|
t, numLeft, numRight+numPlanar, axis, Score::EPlanarRight);
|
|
}
|
|
|
|
/* Determinate approximate intersection probabilities
|
|
for uniformly distributed rays */
|
|
Float prLeft = leftBounds.getSurfaceArea() * invArea,
|
|
prRight = rightBounds.getSurfaceArea() * invArea;
|
|
|
|
/* Two costs are calculated depending on whether planar
|
|
primitives are put on the left or right side */
|
|
Float costPlanarLeft = SAH(prLeft, prRight,
|
|
numLeft + numPlanar, numRight);
|
|
Float costPlanarRight = SAH(prLeft, prRight,
|
|
numLeft, numRight + numPlanar);
|
|
|
|
if (costPlanarLeft < costPlanarRight)
|
|
return Score(costPlanarLeft, t, numLeft+numPlanar, numRight,
|
|
axis, Score::EPlanarLeft);
|
|
else
|
|
return Score(costPlanarRight, t, numLeft, numRight+numPlanar,
|
|
axis, Score::EPlanarRight);
|
|
}
|
|
|
|
/// Ray traversal stack entry for incoherent ray tracing
|
|
struct KDStackEntry {
|
|
/* Pointer to the far child */
|
|
const KDNode * __restrict node;
|
|
/* Distance traveled along the ray (entry or exit) */
|
|
Float t;
|
|
/* Previous stack item */
|
|
uint32_t prev;
|
|
/* Associated point */
|
|
Point pb;
|
|
};
|
|
|
|
#if defined(MTS_HAS_COHERENT_RT)
|
|
/// Ray traversal stack entry for uncoherent ray tracing
|
|
struct CoherentKDStackEntry {
|
|
/* Current ray interval */
|
|
RayInterval4 MM_ALIGN16 interval;
|
|
/* Pointer to the far child */
|
|
const KDNode * __restrict node;
|
|
};
|
|
#endif
|
|
/// \endcond
|
|
|
|
/**
|
|
* Intersection method from Vlastimil Havran's
|
|
* PhD thesis (algorithm TA^B_{rec})
|
|
*/
|
|
bool rayIntersect(const Ray &ray, Intersection &its, Float mint, Float maxt,
|
|
bool shadowRay, unsigned int &shapeIndex, unsigned int &primIndex) const;
|
|
|
|
/// Recursive tree-building algorithm
|
|
void buildTree(int nodeIndex, int depth, int badRefines,
|
|
int numPrims, const AABB &aabb, EdgeEventVec3 &allEvents);
|
|
|
|
/// Create a leaf kd-tree node
|
|
void createLeaf(int nodeIndex, int depth, int numPrims,
|
|
EdgeEventVec3 &allEvents);
|
|
|
|
/**
|
|
* Fallback for incoherent rays
|
|
*/
|
|
inline void rayIntersectPacketIncoherent(const Ray *rays, Intersection *its) const {
|
|
for (int i=0; i<4; i++) {
|
|
if (!rayIntersect(rays[i], its[i]))
|
|
its[i].t = std::numeric_limits<float>::infinity();
|
|
}
|
|
}
|
|
protected:
|
|
/* Has the kd-tree been built yet? */
|
|
bool m_built;
|
|
/* Axis-aligned bounding box of the root node */
|
|
AABB m_rootBounds;
|
|
/* Bounding sphere of the root node */
|
|
BSphere m_bsphere;
|
|
/* Pointers to all contained shapes */
|
|
std::vector<const Shape *> m_shapes;
|
|
/// Storage for kd-tree nodes
|
|
std::vector<KDNode, aligned_allocator<KDNode> > m_nodes;
|
|
#if defined(MTS_USE_TRIACCEL4)
|
|
/// Explicitly store per-leaf quad-packed triangles
|
|
TriAccel4 *m_packedTriangles;
|
|
std::vector<TriAccel4> m_tempPackedTriangles;
|
|
unsigned int m_packedTriangleCount;
|
|
#endif
|
|
#if defined(MTS_USE_TRIACCEL) || defined(MTS_USE_MT)
|
|
/// Storage for triangle redirection indices
|
|
std::vector<unsigned int> m_indices;
|
|
#endif
|
|
/// Total number of triangles/non-triangles in the tree
|
|
unsigned int m_triangleCount, m_nonTriangleCount, m_primitiveCount;
|
|
/// Geometry storage
|
|
KDTriangle* m_triangles;
|
|
/// Cost values for the surface are heuristic
|
|
Float m_traversalCost, m_intersectionCost, m_emptyBonus;
|
|
/// Ad-hoc depth cutoff value when building the tree
|
|
int m_maxDepth;
|
|
/// Allowed number of bad refines before creating a leaf
|
|
int m_maxBadRefines;
|
|
/// Minimal number of primitives per node
|
|
int m_stopPrims;
|
|
/// Use primitive clipping?
|
|
bool m_clip;
|
|
/// Storage for edge events
|
|
EdgeEventVec3 m_events, *m_rightEvents;
|
|
/// Short-term storage for edge events completely located left or right
|
|
EdgeEventVec m_eventsL, m_eventsR;
|
|
/// Short-term storage for edge events from by prims overlapping the split
|
|
EdgeEventVec m_sEventsL, m_sEventsR;
|
|
/// Statistics
|
|
int m_leafNodes, m_innerNodes;
|
|
int m_minLeafPrims, m_maxLeafPrims;
|
|
int m_totalLeafDepth, m_actualMaxDepth;
|
|
int m_leafPrims, m_clippedAway;
|
|
int m_badRefines, m_bucketCount;
|
|
int m_failures, *m_triBuckets;
|
|
};
|
|
|
|
MTS_NAMESPACE_END
|
|
|
|
#endif /* __KD_TREE_H */
|