collect some more statistics, code for fitting SAH cost constants to empirical measurements
parent
efb75bbf7b
commit
5feb7753d8
|
@ -33,6 +33,9 @@ public:
|
||||||
/// Initialize with the identity matrix
|
/// Initialize with the identity matrix
|
||||||
Matrix4x4();
|
Matrix4x4();
|
||||||
|
|
||||||
|
/// Initialize the matrix with constant entries
|
||||||
|
Matrix4x4(Float value);
|
||||||
|
|
||||||
/// Unserialize a matrix from a stream
|
/// Unserialize a matrix from a stream
|
||||||
Matrix4x4(Stream *stream);
|
Matrix4x4(Stream *stream);
|
||||||
|
|
||||||
|
@ -229,6 +232,31 @@ public:
|
||||||
+ m_invTransform->m[2][2] * v.z;
|
+ m_invTransform->m[2][2] * v.z;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// 4D matrix-vector multiplication
|
||||||
|
inline Vector4 operator()(const Vector4 &v) const {
|
||||||
|
Float x = m_transform->m[0][0] * v.x + m_transform->m[0][1] * v.y
|
||||||
|
+ m_transform->m[0][2] * v.z + m_transform->m[0][3] * v.w;
|
||||||
|
Float y = m_transform->m[1][0] * v.x + m_transform->m[1][1] * v.y
|
||||||
|
+ m_transform->m[1][2] * v.z + m_transform->m[1][3] * v.w;
|
||||||
|
Float z = m_transform->m[2][0] * v.x + m_transform->m[2][1] * v.y
|
||||||
|
+ m_transform->m[2][2] * v.z + m_transform->m[2][3] * v.w;
|
||||||
|
Float w = m_transform->m[3][0] * v.x + m_transform->m[3][1] * v.y
|
||||||
|
+ m_transform->m[3][2] * v.z + m_transform->m[3][3] * v.w;
|
||||||
|
return Vector4(x,y,z,w);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 4D matrix-vector multiplication
|
||||||
|
inline void operator()(const Vector4 &v, Vector4 &dest) const {
|
||||||
|
dest.x = m_transform->m[0][0] * v.x + m_transform->m[0][1] * v.y
|
||||||
|
+ m_transform->m[0][2] * v.z + m_transform->m[0][3] * v.w;
|
||||||
|
dest.y = m_transform->m[1][0] * v.x + m_transform->m[1][1] * v.y
|
||||||
|
+ m_transform->m[1][2] * v.z + m_transform->m[1][3] * v.w;
|
||||||
|
dest.z = m_transform->m[2][0] * v.x + m_transform->m[2][1] * v.y
|
||||||
|
+ m_transform->m[2][2] * v.z + m_transform->m[2][3] * v.w;
|
||||||
|
dest.w = m_transform->m[3][0] * v.x + m_transform->m[3][1] * v.y
|
||||||
|
+ m_transform->m[3][2] * v.z + m_transform->m[3][3] * v.w;
|
||||||
|
}
|
||||||
|
|
||||||
/// Transform a ray. Assumes that there is no scaling
|
/// Transform a ray. Assumes that there is no scaling
|
||||||
inline void operator()(const Ray &a, Ray &b) const {
|
inline void operator()(const Ray &a, Ray &b) const {
|
||||||
b.mint = a.mint;
|
b.mint = a.mint;
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
#include <boost/tuple/tuple.hpp>
|
#include <boost/tuple/tuple.hpp>
|
||||||
|
|
||||||
/// Activate lots of extra checks
|
/// Activate lots of extra checks
|
||||||
#define MTS_KD_DEBUG 1
|
//#define MTS_KD_DEBUG 1
|
||||||
|
|
||||||
/** Compile-time KD-tree depth limit. Allows to put certain
|
/** Compile-time KD-tree depth limit. Allows to put certain
|
||||||
data structures on the stack */
|
data structures on the stack */
|
||||||
|
@ -689,9 +689,29 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Build a KD-tree over supplied geometry
|
* \brief Intersect a ray against all primitives stored in the kd-tree
|
||||||
*/
|
*/
|
||||||
void build() {
|
bool rayIntersect(const Ray &ray, Intersection &its) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Test a ray for intersection against all primitives stored in the kd-tree
|
||||||
|
*/
|
||||||
|
bool rayIntersect(const Ray &ray) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Empirically find the best traversal and intersection cost values
|
||||||
|
*
|
||||||
|
* This is done by running the traversal code on random rays and fitting
|
||||||
|
* the SAH cost model to the collected statistics.
|
||||||
|
*/
|
||||||
|
void findCosts(Float &traversalCost, Float &intersectionCost);
|
||||||
|
protected:
|
||||||
|
/**
|
||||||
|
* \brief Build a KD-tree over the supplied geometry
|
||||||
|
*
|
||||||
|
* To be called by the subclass.
|
||||||
|
*/
|
||||||
|
void buildInternal() {
|
||||||
if (isBuilt())
|
if (isBuilt())
|
||||||
Log(EError, "The kd-tree has already been built!");
|
Log(EError, "The kd-tree has already been built!");
|
||||||
|
|
||||||
|
@ -806,6 +826,10 @@ public:
|
||||||
Float expPrimitivesIntersected = 0;
|
Float expPrimitivesIntersected = 0;
|
||||||
Float sahCost = 0;
|
Float sahCost = 0;
|
||||||
size_type nodePtr = 0, indexPtr = 0;
|
size_type nodePtr = 0, indexPtr = 0;
|
||||||
|
size_type maxPrimsInLeaf = 0;
|
||||||
|
const size_type primBucketCount = 16;
|
||||||
|
size_type primBuckets[primBucketCount];
|
||||||
|
memset(primBuckets, 0, sizeof(size_type)*primBucketCount);
|
||||||
|
|
||||||
m_nodes = static_cast<KDNode *> (allocAligned(
|
m_nodes = static_cast<KDNode *> (allocAligned(
|
||||||
sizeof(KDNode) * (ctx.innerNodeCount + ctx.leafNodeCount)));
|
sizeof(KDNode) * (ctx.innerNodeCount + ctx.leafNodeCount)));
|
||||||
|
@ -829,6 +853,10 @@ public:
|
||||||
expLeavesVisited += aabb.getSurfaceArea();
|
expLeavesVisited += aabb.getSurfaceArea();
|
||||||
expPrimitivesIntersected += weightedSA;
|
expPrimitivesIntersected += weightedSA;
|
||||||
sahCost += weightedSA * m_intersectionCost;
|
sahCost += weightedSA * m_intersectionCost;
|
||||||
|
if (primCount < primBucketCount)
|
||||||
|
primBuckets[primCount]++;
|
||||||
|
if (primCount > maxPrimsInLeaf)
|
||||||
|
maxPrimsInLeaf = primCount;
|
||||||
|
|
||||||
const BlockedVector<index_type, MTS_KD_BLOCKSIZE_IDX> &indices
|
const BlockedVector<index_type, MTS_KD_BLOCKSIZE_IDX> &indices
|
||||||
= context->indices;
|
= context->indices;
|
||||||
|
@ -906,18 +934,33 @@ public:
|
||||||
m_aabb.max += (m_aabb.max-m_aabb.min) * Epsilon
|
m_aabb.max += (m_aabb.max-m_aabb.min) * Epsilon
|
||||||
+ Vector(Epsilon, Epsilon, Epsilon);
|
+ Vector(Epsilon, Epsilon, Epsilon);
|
||||||
|
|
||||||
Log(EDebug, "Detailed kd-tree statistics:");
|
Log(EDebug, "Structural kd-tree statistics:");
|
||||||
Log(EDebug, " Inner nodes : %i", ctx.innerNodeCount);
|
Log(EDebug, " Parallel work units : " SIZE_T_FMT,
|
||||||
Log(EDebug, " Leaf nodes : %i", ctx.leafNodeCount);
|
m_interface.threadMap.size());
|
||||||
Log(EDebug, " Nonempty leaf nodes : %i", ctx.nonemptyLeafNodeCount);
|
|
||||||
Log(EDebug, " Node storage cost : %s",
|
Log(EDebug, " Node storage cost : %s",
|
||||||
memString(nodePtr * sizeof(KDNode)).c_str());
|
memString(nodePtr * sizeof(KDNode)).c_str());
|
||||||
Log(EDebug, " Index storage cost : %s",
|
Log(EDebug, " Index storage cost : %s",
|
||||||
memString(indexPtr * sizeof(index_type)).c_str());
|
memString(indexPtr * sizeof(index_type)).c_str());
|
||||||
Log(EDebug, " Parallel work units : " SIZE_T_FMT,
|
Log(EDebug, " Inner nodes : %i", ctx.innerNodeCount);
|
||||||
m_interface.threadMap.size());
|
Log(EDebug, " Leaf nodes : %i", ctx.leafNodeCount);
|
||||||
|
Log(EDebug, " Nonempty leaf nodes : %i", ctx.nonemptyLeafNodeCount);
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << " Leaf node histogram : ";
|
||||||
|
for (int i=0; i<primBucketCount; i++) {
|
||||||
|
oss << i << "(" << primBuckets[i] << ") ";
|
||||||
|
if ((i+1)%4==0 && i+1<primBucketCount) {
|
||||||
|
Log(EDebug, "%s", oss.str().c_str());
|
||||||
|
oss.str("");
|
||||||
|
oss << " ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Log(EDebug, "%s", oss.str().c_str());
|
||||||
|
Log(EDebug, "");
|
||||||
|
Log(EDebug, "Qualitative kd-tree statistics:");
|
||||||
Log(EDebug, " Retracted splits : %i", ctx.retractedSplits);
|
Log(EDebug, " Retracted splits : %i", ctx.retractedSplits);
|
||||||
Log(EDebug, " Pruned primitives : %i", ctx.pruned);
|
Log(EDebug, " Pruned primitives : %i", ctx.pruned);
|
||||||
|
Log(EDebug, " Largest leaf node : %i primitives",
|
||||||
|
maxPrimsInLeaf);
|
||||||
Log(EDebug, " Avg. prims/nonempty leaf : %.2f",
|
Log(EDebug, " Avg. prims/nonempty leaf : %.2f",
|
||||||
ctx.primIndexCount / (Float) ctx.nonemptyLeafNodeCount);
|
ctx.primIndexCount / (Float) ctx.nonemptyLeafNodeCount);
|
||||||
Log(EDebug, " Expected traversals/ray : %.2f", expTraversalSteps);
|
Log(EDebug, " Expected traversals/ray : %.2f", expTraversalSteps);
|
||||||
|
@ -927,16 +970,6 @@ public:
|
||||||
Log(EDebug, "");
|
Log(EDebug, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Intersect a ray against all primitives stored in the kd-tree
|
|
||||||
*/
|
|
||||||
bool rayIntersect(const Ray &ray, Intersection &its) const;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* \brief Test a ray for intersection against all primitives stored in the kd-tree
|
|
||||||
*/
|
|
||||||
bool rayIntersect(const Ray &ray) const;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
/// Primitive classification during tree-construction
|
/// Primitive classification during tree-construction
|
||||||
enum EClassificationResult {
|
enum EClassificationResult {
|
||||||
|
@ -2449,8 +2482,6 @@ protected:
|
||||||
template<bool shadowRay> FINLINE bool rayIntersectHavran(const Ray &ray,
|
template<bool shadowRay> FINLINE bool rayIntersectHavran(const Ray &ray,
|
||||||
Float mint, Float maxt, Float &t, void *temp) const {
|
Float mint, Float maxt, Float &t, void *temp) const {
|
||||||
KDStackEntryHavran stack[MTS_KD_MAXDEPTH];
|
KDStackEntryHavran stack[MTS_KD_MAXDEPTH];
|
||||||
const KDNode * __restrict farChild,
|
|
||||||
* __restrict currNode = m_nodes;
|
|
||||||
#if 0
|
#if 0
|
||||||
static const int prevAxisTable[] = { 2, 0, 1 };
|
static const int prevAxisTable[] = { 2, 0, 1 };
|
||||||
static const int nextAxisTable[] = { 1, 2, 0 };
|
static const int nextAxisTable[] = { 1, 2, 0 };
|
||||||
|
@ -2471,10 +2502,12 @@ protected:
|
||||||
stack[exPt].p = ray(maxt);
|
stack[exPt].p = ray(maxt);
|
||||||
stack[exPt].node = NULL;
|
stack[exPt].node = NULL;
|
||||||
|
|
||||||
|
const KDNode * __restrict currNode = m_nodes;
|
||||||
while (currNode != NULL) {
|
while (currNode != NULL) {
|
||||||
while (EXPECT_TAKEN(!currNode->isLeaf())) {
|
while (EXPECT_TAKEN(!currNode->isLeaf())) {
|
||||||
const Float splitVal = (Float) currNode->getSplit();
|
const Float splitVal = (Float) currNode->getSplit();
|
||||||
const int axis = currNode->getAxis();
|
const int axis = currNode->getAxis();
|
||||||
|
const KDNode * __restrict farChild;
|
||||||
|
|
||||||
if (stack[enPt].p[axis] <= splitVal) {
|
if (stack[enPt].p[axis] <= splitVal) {
|
||||||
if (stack[exPt].p[axis] <= splitVal) {
|
if (stack[exPt].p[axis] <= splitVal) {
|
||||||
|
@ -2584,6 +2617,131 @@ protected:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Internal kd-tree traversal implementation (Havran variant)
|
||||||
|
*
|
||||||
|
* This method is almost identical to \ref rayIntersectHavran, except
|
||||||
|
* that it additionally returns statistics on the number of traversed
|
||||||
|
* nodes, intersected shapes, as well as the time taken to do this
|
||||||
|
* (measured using rtdsc).
|
||||||
|
*/
|
||||||
|
FINLINE boost::tuple<bool, uint32_t, uint32_t, uint64_t>
|
||||||
|
rayIntersectHavranCollectStatistics(const Ray &ray,
|
||||||
|
Float mint, Float maxt, Float &t, void *temp) const {
|
||||||
|
KDStackEntryHavran stack[MTS_KD_MAXDEPTH];
|
||||||
|
|
||||||
|
/* Set up the entry point */
|
||||||
|
uint32_t enPt = 0;
|
||||||
|
stack[enPt].t = mint;
|
||||||
|
stack[enPt].p = ray(mint);
|
||||||
|
|
||||||
|
/* Set up the exit point */
|
||||||
|
uint32_t exPt = 1;
|
||||||
|
stack[exPt].t = maxt;
|
||||||
|
stack[exPt].p = ray(maxt);
|
||||||
|
stack[exPt].node = NULL;
|
||||||
|
|
||||||
|
uint32_t numTraversals = 0;
|
||||||
|
uint32_t numIntersections = 0;
|
||||||
|
uint64_t timer = rdtsc();
|
||||||
|
|
||||||
|
const KDNode * __restrict currNode = m_nodes;
|
||||||
|
while (currNode != NULL) {
|
||||||
|
while (EXPECT_TAKEN(!currNode->isLeaf())) {
|
||||||
|
const Float splitVal = (Float) currNode->getSplit();
|
||||||
|
const int axis = currNode->getAxis();
|
||||||
|
const KDNode * __restrict farChild;
|
||||||
|
|
||||||
|
++numTraversals;
|
||||||
|
if (stack[enPt].p[axis] <= splitVal) {
|
||||||
|
if (stack[exPt].p[axis] <= splitVal) {
|
||||||
|
/* Cases N1, N2, N3, P5, Z2 and Z3 (see thesis) */
|
||||||
|
currNode = currNode->getLeft();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Typo in Havran's thesis:
|
||||||
|
(it specifies "stack[exPt].p == splitVal", which
|
||||||
|
is clearly incorrect) */
|
||||||
|
if (stack[enPt].p[axis] == splitVal) {
|
||||||
|
/* Case Z1 */
|
||||||
|
currNode = currNode->getRight();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Case N4 */
|
||||||
|
currNode = currNode->getLeft();
|
||||||
|
farChild = currNode + 1; // getRight()
|
||||||
|
} else { /* stack[enPt].p[axis] > splitVal */
|
||||||
|
if (splitVal < stack[exPt].p[axis]) {
|
||||||
|
/* Cases P1, P2, P3 and N5 */
|
||||||
|
currNode = currNode->getRight();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
/* Case P4 */
|
||||||
|
farChild = currNode->getLeft();
|
||||||
|
currNode = farChild + 1; // getRight()
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cases P4 and N4 -- calculate the distance to the split plane */
|
||||||
|
t = (splitVal - ray.o[axis]) * ray.dRcp[axis];
|
||||||
|
|
||||||
|
/* Set up a new exit point */
|
||||||
|
const uint32_t tmp = exPt++;
|
||||||
|
if (exPt == enPt) /* Do not overwrite the entry point */
|
||||||
|
++exPt;
|
||||||
|
|
||||||
|
KDAssert(exPt < MTS_KD_MAX_DEPTH);
|
||||||
|
stack[exPt].prev = tmp;
|
||||||
|
stack[exPt].t = t;
|
||||||
|
stack[exPt].node = farChild;
|
||||||
|
stack[exPt].p = ray(t);
|
||||||
|
stack[exPt].p[axis] = splitVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(SINGLE_PRECISION)
|
||||||
|
const Float eps = 1e-3;
|
||||||
|
#else
|
||||||
|
const Float eps = 1e-5;
|
||||||
|
#endif
|
||||||
|
const Float m_eps = 1-eps, p_eps = 1+eps;
|
||||||
|
|
||||||
|
/* Floating-point arithmetic.. - use both absolute and relative
|
||||||
|
epsilons when looking for intersections in the subinterval */
|
||||||
|
const Float searchStart = std::max(mint, stack[enPt].t * m_eps - eps);
|
||||||
|
Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps);
|
||||||
|
|
||||||
|
/* Reached a leaf node */
|
||||||
|
bool foundIntersection = false;
|
||||||
|
for (unsigned int entry=currNode->getPrimStart(),
|
||||||
|
last = currNode->getPrimEnd(); entry != last; entry++) {
|
||||||
|
const index_type primIdx = m_indices[entry];
|
||||||
|
|
||||||
|
++numIntersections;
|
||||||
|
EIntersectionResult result = cast()->intersect(ray,
|
||||||
|
primIdx, searchStart, searchEnd, t, temp);
|
||||||
|
|
||||||
|
if (result == EYes) {
|
||||||
|
searchEnd = t;
|
||||||
|
foundIntersection = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (foundIntersection)
|
||||||
|
return boost::make_tuple(true, numTraversals,
|
||||||
|
numIntersections, rdtsc() - timer);
|
||||||
|
|
||||||
|
|
||||||
|
/* Pop from the stack and advance to the next node on the interval */
|
||||||
|
enPt = exPt;
|
||||||
|
currNode = stack[exPt].node;
|
||||||
|
exPt = stack[enPt].prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
return boost::make_tuple(false, numTraversals,
|
||||||
|
numIntersections, rdtsc() - timer);
|
||||||
|
}
|
||||||
|
|
||||||
struct KDStackEntry {
|
struct KDStackEntry {
|
||||||
const KDNode * __restrict node;
|
const KDNode * __restrict node;
|
||||||
Float mint, maxt;
|
Float mint, maxt;
|
||||||
|
@ -2736,6 +2894,94 @@ template <typename Derived> bool GenericKDTree<Derived>::rayIntersect(
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Derived> void GenericKDTree<Derived>::findCosts(
|
||||||
|
Float &traversalCost, Float &intersectionCost) {
|
||||||
|
ref<Random> random = new Random();
|
||||||
|
uint32_t temp[MTS_KD_INTERSECTION_TEMP];
|
||||||
|
BSphere bsphere = m_aabb.getBSphere();
|
||||||
|
const size_t nRays = 10000000, warmup = nRays/4;
|
||||||
|
Vector *A = new Vector[nRays-warmup];
|
||||||
|
Float *b = new Float[nRays-warmup];
|
||||||
|
size_t nIntersections = 0, idx = 0;
|
||||||
|
|
||||||
|
for (size_t i=0; i<nRays; ++i) {
|
||||||
|
Point2 sample1(random->nextFloat(), random->nextFloat()),
|
||||||
|
sample2(random->nextFloat(), random->nextFloat());
|
||||||
|
Point p1 = bsphere.center + squareToSphere(sample1) * bsphere.radius;
|
||||||
|
Point p2 = bsphere.center + squareToSphere(sample2) * bsphere.radius;
|
||||||
|
Ray ray(p1, normalize(p2-p1));
|
||||||
|
Float mint, maxt, t;
|
||||||
|
if (ray.mint > mint) mint = ray.mint;
|
||||||
|
if (ray.maxt < maxt) maxt = ray.maxt;
|
||||||
|
if (m_aabb.rayIntersect(ray, mint, maxt)) {
|
||||||
|
if (EXPECT_TAKEN(maxt > mint)) {
|
||||||
|
boost::tuple<bool, uint32_t, uint32_t, uint64_t> statistics =
|
||||||
|
rayIntersectHavranCollectStatistics(ray, mint, maxt, t, temp);
|
||||||
|
if (boost::get<0>(statistics))
|
||||||
|
nIntersections++;
|
||||||
|
if (i > warmup) {
|
||||||
|
A[idx].x = 1;
|
||||||
|
A[idx].y = boost::get<1>(statistics);
|
||||||
|
A[idx].z = boost::get<2>(statistics);
|
||||||
|
b[idx] = boost::get<3>(statistics);
|
||||||
|
idx++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Log(EDebug, "Fitting to " SIZE_T_FMT " samples (" SIZE_T_FMT
|
||||||
|
" intersections)", idx, nIntersections);
|
||||||
|
|
||||||
|
/* Solve using normal equations */
|
||||||
|
ref<Matrix4x4> M = new Matrix4x4(0.0f);
|
||||||
|
Vector4 rhs(0.0f), x;
|
||||||
|
|
||||||
|
for (size_t i=0; i<3; ++i) {
|
||||||
|
for (size_t j=0; j<3; ++j)
|
||||||
|
for (size_t k=0; k<idx; ++k)
|
||||||
|
M->m[i][j] += A[k][i]*A[k][j];
|
||||||
|
for (size_t k=0; k<idx; ++k)
|
||||||
|
rhs[i] += A[k][i]*b[k];
|
||||||
|
}
|
||||||
|
M->m[3][3] = 1.0f;
|
||||||
|
|
||||||
|
Transform(M->inverse())(rhs, x);
|
||||||
|
|
||||||
|
Float avgRdtsc = 0, avgResidual = 0;
|
||||||
|
for (size_t i=0; i<idx; ++i) {
|
||||||
|
avgRdtsc += b[i];
|
||||||
|
Float model = x[0] * A[i][0]
|
||||||
|
+ x[1] * A[i][1]
|
||||||
|
+ x[2] * A[i][2];
|
||||||
|
//cout << b[i] << " vs " << (int) model << endl;
|
||||||
|
avgResidual += std::abs(b[i] - model);
|
||||||
|
}
|
||||||
|
avgRdtsc /= idx;
|
||||||
|
avgResidual /= idx;
|
||||||
|
|
||||||
|
for (size_t k=0; k<idx; ++k)
|
||||||
|
avgRdtsc += b[k];
|
||||||
|
avgRdtsc /= idx;
|
||||||
|
|
||||||
|
Log(EDebug, "Least squares fit:");
|
||||||
|
Log(EDebug, " Constant overhead = %.2f", x[0]);
|
||||||
|
Log(EDebug, " Traversal cost = %.2f", x[1]);
|
||||||
|
Log(EDebug, " Intersection cost = %.2f", x[2]);
|
||||||
|
Log(EDebug, " Average rdtsc value = %.2f", avgRdtsc);
|
||||||
|
Log(EDebug, " Avg. residual = %.2f", avgResidual);
|
||||||
|
x *= 10/x[1];
|
||||||
|
Log(EDebug, "Re-scaled:");
|
||||||
|
Log(EDebug, " Constant overhead = %.2f", x[0]);
|
||||||
|
Log(EDebug, " Traversal cost = %.2f", x[1]);
|
||||||
|
Log(EDebug, " Intersection cost = %.2f", x[2]);
|
||||||
|
|
||||||
|
delete[] A;
|
||||||
|
delete[] b;
|
||||||
|
traversalCost = x[1];
|
||||||
|
intersectionCost = x[2];
|
||||||
|
}
|
||||||
|
|
||||||
MTS_NAMESPACE_END
|
MTS_NAMESPACE_END
|
||||||
|
|
||||||
#endif /* __KDTREE_GENERIC_H */
|
#endif /* __KDTREE_GENERIC_H */
|
||||||
|
|
|
@ -30,10 +30,10 @@
|
||||||
* whole following block:
|
* whole following block:
|
||||||
*/
|
*/
|
||||||
#ifdef MTS_SSE
|
#ifdef MTS_SSE
|
||||||
//#define MTS_USE_TRIACCEL4 1
|
#define MTS_USE_TRIACCEL4 1
|
||||||
//#define MTS_USE_TRIACCEL 1
|
#define MTS_USE_TRIACCEL 1
|
||||||
#else
|
#else
|
||||||
//#define MTS_USE_TRIACCEL 1
|
#define MTS_USE_TRIACCEL 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(MTS_HAS_COHERENT_RT)
|
#if defined(MTS_HAS_COHERENT_RT)
|
||||||
|
|
|
@ -95,15 +95,46 @@ inline int TriAccel::load(const Point &A, const Point &B, const Point &C) {
|
||||||
|
|
||||||
FINLINE bool TriAccel::rayIntersect(const Ray &ray, Float mint, Float maxt,
|
FINLINE bool TriAccel::rayIntersect(const Ray &ray, Float mint, Float maxt,
|
||||||
Float &u, Float &v, Float &t) const {
|
Float &u, Float &v, Float &t) const {
|
||||||
static const int waldModulo[4] = { 1, 2, 0, 1 };
|
|
||||||
const int ku = waldModulo[k], kv = waldModulo[k+1];
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 };
|
||||||
|
const int ku = waldModulo[k], kv = waldModulo[k+1];
|
||||||
/* Get the u and v components */
|
/* Get the u and v components */
|
||||||
const Float o_u = ray.o[ku], o_v = ray.o[kv], o_k = ray.o[k],
|
const Float o_u = ray.o[ku], o_v = ray.o[kv], o_k = ray.o[k],
|
||||||
d_u = ray.d[ku], d_v = ray.d[kv], d_k = ray.d[k];
|
d_u = ray.d[ku], d_v = ray.d[kv], d_k = ray.d[k];
|
||||||
|
#else
|
||||||
|
Float o_u, o_v, o_k, d_u, d_v, d_k;
|
||||||
|
switch (k) {
|
||||||
|
case 0:
|
||||||
|
o_u = ray.o[1];
|
||||||
|
o_v = ray.o[2];
|
||||||
|
o_k = ray.o[0];
|
||||||
|
d_u = ray.d[1];
|
||||||
|
d_v = ray.d[2];
|
||||||
|
d_k = ray.d[0];
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
o_u = ray.o[2];
|
||||||
|
o_v = ray.o[0];
|
||||||
|
o_k = ray.o[1];
|
||||||
|
d_u = ray.d[2];
|
||||||
|
d_v = ray.d[0];
|
||||||
|
d_k = ray.d[1];
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
o_u = ray.o[0];
|
||||||
|
o_v = ray.o[1];
|
||||||
|
o_k = ray.o[2];
|
||||||
|
d_u = ray.d[0];
|
||||||
|
d_v = ray.d[1];
|
||||||
|
d_k = ray.d[2];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Calculate the plane intersection (Typo in the thesis?) */
|
/* Calculate the plane intersection (Typo in the thesis?) */
|
||||||
t = (n_d - o_u*n_u - o_v*n_v - o_k) / (d_u * n_u + d_v * n_v + d_k);
|
Float recip = 1.0f / (d_u * n_u + d_v * n_v + d_k);
|
||||||
|
t = (n_d - o_u*n_u - o_v*n_v - o_k) * recip;
|
||||||
|
|
||||||
if (t < mint || t > maxt)
|
if (t < mint || t > maxt)
|
||||||
return false;
|
return false;
|
||||||
|
@ -115,8 +146,7 @@ FINLINE bool TriAccel::rayIntersect(const Ray &ray, Float mint, Float maxt,
|
||||||
/* In barycentric coordinates */
|
/* In barycentric coordinates */
|
||||||
u = hv * b_nu + hu * b_nv;
|
u = hv * b_nu + hu * b_nv;
|
||||||
v = hu * c_nu + hv * c_nv;
|
v = hu * c_nu + hv * c_nv;
|
||||||
|
return u >= 0 && v >= 0 && u+v <= 1.0f;
|
||||||
return (u >= 0 && v >= 0 && u+v <= 1.0f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MTS_NAMESPACE_END
|
MTS_NAMESPACE_END
|
||||||
|
|
|
@ -57,7 +57,7 @@ struct TriAccel4 {
|
||||||
|
|
||||||
FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet,
|
FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet,
|
||||||
__m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) const {
|
__m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) const {
|
||||||
static const int waldModulo[4] = { 1, 2, 0, 1 };
|
static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 };
|
||||||
const int ku = waldModulo[k], kv = waldModulo[k+1];
|
const int ku = waldModulo[k], kv = waldModulo[k+1];
|
||||||
|
|
||||||
/* Get the u and v components */
|
/* Get the u and v components */
|
||||||
|
|
|
@ -232,6 +232,12 @@ Matrix4x4::Matrix4x4() {
|
||||||
m[i][j] = (i == j) ? 1.0f : 0.0f;
|
m[i][j] = (i == j) ? 1.0f : 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Matrix4x4::Matrix4x4(Float value) {
|
||||||
|
for (int i=0; i<4; i++)
|
||||||
|
for (int j=0; j<4; j++)
|
||||||
|
m[i][j] = value;
|
||||||
|
}
|
||||||
|
|
||||||
Matrix4x4::Matrix4x4(Float _m[4][4]) {
|
Matrix4x4::Matrix4x4(Float _m[4][4]) {
|
||||||
memcpy(m, _m, sizeof(Float) * 16);
|
memcpy(m, _m, sizeof(Float) * 16);
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,103 @@
|
||||||
|
|
||||||
MTS_NAMESPACE_BEGIN
|
MTS_NAMESPACE_BEGIN
|
||||||
|
|
||||||
|
class TriKDTree : public GenericKDTree<TriKDTree> {
|
||||||
|
public:
|
||||||
|
TriKDTree(const Triangle *triangles,
|
||||||
|
const Vertex *vertexBuffer,
|
||||||
|
size_type triangleCount)
|
||||||
|
: m_triangles(triangles),
|
||||||
|
m_vertexBuffer(vertexBuffer),
|
||||||
|
m_triangleCount(triangleCount) {
|
||||||
|
}
|
||||||
|
|
||||||
|
FINLINE AABB getAABB(index_type idx) const {
|
||||||
|
return m_triangles[idx].getAABB(m_vertexBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
FINLINE AABB getClippedAABB(index_type idx, const AABB &aabb) const {
|
||||||
|
return m_triangles[idx].getClippedAABB(m_vertexBuffer, aabb);
|
||||||
|
}
|
||||||
|
|
||||||
|
FINLINE size_type getPrimitiveCount() const {
|
||||||
|
return m_triangleCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
FINLINE EIntersectionResult intersect(const Ray &ray, index_type idx,
|
||||||
|
Float mint, Float maxt, Float &t, void *tmp) const {
|
||||||
|
Float tempT, tempU, tempV;
|
||||||
|
#if 0
|
||||||
|
if (m_triangles[idx].rayIntersect(m_vertexBuffer, ray, tempU, tempV, tempT)) {
|
||||||
|
if (tempT < mint && tempT > maxt)
|
||||||
|
return ENo;
|
||||||
|
#else
|
||||||
|
if (m_triAccel[idx].rayIntersect(ray, mint, maxt, tempU, tempV, tempT)) {
|
||||||
|
#endif
|
||||||
|
index_type *indexPtr = reinterpret_cast<index_type *>(tmp);
|
||||||
|
Float *floatPtr = reinterpret_cast<Float *>(indexPtr + 1);
|
||||||
|
|
||||||
|
t = tempT;
|
||||||
|
*indexPtr = idx;
|
||||||
|
*floatPtr++ = tempU;
|
||||||
|
*floatPtr++ = tempV;
|
||||||
|
|
||||||
|
return EYes;
|
||||||
|
}
|
||||||
|
return ENo;
|
||||||
|
}
|
||||||
|
|
||||||
|
void build() {
|
||||||
|
buildInternal();
|
||||||
|
Log(EInfo, "Precomputing triangle intersection information (%s)",
|
||||||
|
memString(sizeof(TriAccel)*m_triangleCount).c_str());
|
||||||
|
m_triAccel = static_cast<TriAccel *>(allocAligned(m_triangleCount * sizeof(TriAccel)));
|
||||||
|
for (size_t i=0; i<m_triangleCount; ++i) {
|
||||||
|
const Triangle &tri = m_triangles[i];
|
||||||
|
const Vertex &v0 = m_vertexBuffer[tri.idx[0]];
|
||||||
|
const Vertex &v1 = m_vertexBuffer[tri.idx[1]];
|
||||||
|
const Vertex &v2 = m_vertexBuffer[tri.idx[2]];
|
||||||
|
|
||||||
|
m_triAccel[i].load(v0.p, v1.p, v2.p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FINLINE void fillIntersectionDetails(const Ray &ray,
|
||||||
|
Float t, const void *tmp, Intersection &its) const {
|
||||||
|
its.p = ray(t);
|
||||||
|
|
||||||
|
//const index_type *indexPtr = reinterpret_cast<const index_type *>(tmp);
|
||||||
|
//const Float *floatPtr = reinterpret_cast<const Float *>(indexPtr + 1);
|
||||||
|
|
||||||
|
//const Triangle &tri = m_triangles[*indexPtr];
|
||||||
|
//const Vertex &v0 = m_vertexBuffer[tri.idx[0]];
|
||||||
|
//const Vertex &v1 = m_vertexBuffer[tri.idx[1]];
|
||||||
|
//const Vertex &v2 = m_vertexBuffer[tri.idx[2]];
|
||||||
|
|
||||||
|
//const Float u = *floatPtr++, v = *floatPtr++;
|
||||||
|
//const Vector b(1 - u - v, u, v);
|
||||||
|
/*
|
||||||
|
its.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z;
|
||||||
|
its.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z;
|
||||||
|
its.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z;
|
||||||
|
|
||||||
|
its.geoFrame = Frame(normalize(cross(v1.p-v0.p, v2.p-v0.p)));
|
||||||
|
its.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z);
|
||||||
|
its.shFrame.s = normalize(its.dpdu - its.shFrame.n
|
||||||
|
* dot(its.shFrame.n, its.dpdu));
|
||||||
|
its.shFrame.t = cross(its.shFrame.n, its.shFrame.s);
|
||||||
|
its.wi = its.toLocal(-ray.d);
|
||||||
|
*/
|
||||||
|
its.hasUVPartials = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Triangle *m_triangles;
|
||||||
|
const Vertex *m_vertexBuffer;
|
||||||
|
TriAccel *m_triAccel;
|
||||||
|
size_type m_triangleCount;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
class TestKDTree : public TestCase {
|
class TestKDTree : public TestCase {
|
||||||
public:
|
public:
|
||||||
MTS_BEGIN_TESTCASE()
|
MTS_BEGIN_TESTCASE()
|
||||||
|
@ -81,85 +178,6 @@ public:
|
||||||
assertEquals(Point(1, 1, 0), clippedAABB.max);
|
assertEquals(Point(1, 1, 0), clippedAABB.max);
|
||||||
}
|
}
|
||||||
|
|
||||||
class TriKDTree : public GenericKDTree<TriKDTree> {
|
|
||||||
public:
|
|
||||||
TriKDTree(const Triangle *triangles,
|
|
||||||
const Vertex *vertexBuffer,
|
|
||||||
size_type triangleCount)
|
|
||||||
: m_triangles(triangles),
|
|
||||||
m_vertexBuffer(vertexBuffer),
|
|
||||||
m_triangleCount(triangleCount) {
|
|
||||||
}
|
|
||||||
|
|
||||||
FINLINE AABB getAABB(index_type idx) const {
|
|
||||||
return m_triangles[idx].getAABB(m_vertexBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
FINLINE AABB getClippedAABB(index_type idx, const AABB &aabb) const {
|
|
||||||
return m_triangles[idx].getClippedAABB(m_vertexBuffer, aabb);
|
|
||||||
}
|
|
||||||
|
|
||||||
FINLINE size_type getPrimitiveCount() const {
|
|
||||||
return m_triangleCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
FINLINE EIntersectionResult intersect(const Ray &ray, index_type idx,
|
|
||||||
Float mint, Float maxt, Float &t, void *tmp) const {
|
|
||||||
Float tempT, tempU, tempV;
|
|
||||||
if (m_triangles[idx].rayIntersect(m_vertexBuffer, ray, tempU, tempV, tempT)) {
|
|
||||||
if (tempT >= mint && tempT <= maxt) {
|
|
||||||
index_type *indexPtr = reinterpret_cast<index_type *>(tmp);
|
|
||||||
Float *floatPtr = reinterpret_cast<Float *>(indexPtr + 1);
|
|
||||||
|
|
||||||
t = tempT;
|
|
||||||
*indexPtr = idx;
|
|
||||||
*floatPtr++ = tempU;
|
|
||||||
*floatPtr++ = tempV;
|
|
||||||
|
|
||||||
return EYes;
|
|
||||||
}
|
|
||||||
return ENo;
|
|
||||||
} else {
|
|
||||||
return ENever;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
FINLINE void fillIntersectionDetails(const Ray &ray,
|
|
||||||
Float t, const void *tmp, Intersection &its) const {
|
|
||||||
its.p = ray(t);
|
|
||||||
|
|
||||||
//const index_type *indexPtr = reinterpret_cast<const index_type *>(tmp);
|
|
||||||
//const Float *floatPtr = reinterpret_cast<const Float *>(indexPtr + 1);
|
|
||||||
|
|
||||||
//const Triangle &tri = m_triangles[*indexPtr];
|
|
||||||
//const Vertex &v0 = m_vertexBuffer[tri.idx[0]];
|
|
||||||
//const Vertex &v1 = m_vertexBuffer[tri.idx[1]];
|
|
||||||
//const Vertex &v2 = m_vertexBuffer[tri.idx[2]];
|
|
||||||
|
|
||||||
//const Float u = *floatPtr++, v = *floatPtr++;
|
|
||||||
//const Vector b(1 - u - v, u, v);
|
|
||||||
/*
|
|
||||||
its.uv = v0.uv * b.x + v1.uv * b.y + v2.uv * b.z;
|
|
||||||
its.dpdu = v0.dpdu * b.x + v1.dpdu * b.y + v2.dpdu * b.z;
|
|
||||||
its.dpdv = v0.dpdv * b.x + v1.dpdv * b.y + v2.dpdv * b.z;
|
|
||||||
|
|
||||||
its.geoFrame = Frame(normalize(cross(v1.p-v0.p, v2.p-v0.p)));
|
|
||||||
its.shFrame.n = normalize(v0.n * b.x + v1.n * b.y + v2.n * b.z);
|
|
||||||
its.shFrame.s = normalize(its.dpdu - its.shFrame.n
|
|
||||||
* dot(its.shFrame.n, its.dpdu));
|
|
||||||
its.shFrame.t = cross(its.shFrame.n, its.shFrame.s);
|
|
||||||
its.wi = its.toLocal(-ray.d);
|
|
||||||
*/
|
|
||||||
its.hasUVPartials = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const Triangle *m_triangles;
|
|
||||||
const Vertex *m_vertexBuffer;
|
|
||||||
size_type m_triangleCount;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
void test02_buildSimple() {
|
void test02_buildSimple() {
|
||||||
Properties bunnyProps("ply");
|
Properties bunnyProps("ply");
|
||||||
bunnyProps.setString("filename", "tools/tests/bunny.ply");
|
bunnyProps.setString("filename", "tools/tests/bunny.ply");
|
||||||
|
@ -169,13 +187,17 @@ public:
|
||||||
mesh->configure();
|
mesh->configure();
|
||||||
TriKDTree tree(mesh->getTriangles(),
|
TriKDTree tree(mesh->getTriangles(),
|
||||||
mesh->getVertexBuffer(), mesh->getTriangleCount());
|
mesh->getVertexBuffer(), mesh->getTriangleCount());
|
||||||
|
tree.setTraversalCost(10);
|
||||||
|
tree.setIntersectionCost(17);
|
||||||
tree.build();
|
tree.build();
|
||||||
BSphere bsphere(mesh->getBSphere());
|
BSphere bsphere(mesh->getBSphere());
|
||||||
|
|
||||||
/*
|
Float intersectionCost, traversalCost;
|
||||||
|
tree.findCosts(intersectionCost, traversalCost);
|
||||||
|
|
||||||
ref<KDTree> oldTree = new KDTree();
|
ref<KDTree> oldTree = new KDTree();
|
||||||
oldTree->addShape(mesh);
|
oldTree->addShape(mesh);
|
||||||
oldTree->build(); */
|
oldTree->build();
|
||||||
|
|
||||||
for (int j=0; j<3; ++j) {
|
for (int j=0; j<3; ++j) {
|
||||||
ref<Random> random = new Random();
|
ref<Random> random = new Random();
|
||||||
|
@ -203,7 +225,7 @@ public:
|
||||||
nIntersections, timer->getMilliseconds());
|
nIntersections, timer->getMilliseconds());
|
||||||
Log(EInfo, "New: %.3f MRays/s",
|
Log(EInfo, "New: %.3f MRays/s",
|
||||||
nRays / (timer->getMilliseconds() * (Float) 1000));
|
nRays / (timer->getMilliseconds() * (Float) 1000));
|
||||||
/*
|
|
||||||
random = new Random();
|
random = new Random();
|
||||||
timer->reset();
|
timer->reset();
|
||||||
nIntersections=0;
|
nIntersections=0;
|
||||||
|
@ -224,7 +246,6 @@ public:
|
||||||
nIntersections, timer->getMilliseconds());
|
nIntersections, timer->getMilliseconds());
|
||||||
Log(EInfo, "Old: %.3f MRays/s",
|
Log(EInfo, "Old: %.3f MRays/s",
|
||||||
nRays / (timer->getMilliseconds() * (Float) 1000));
|
nRays / (timer->getMilliseconds() * (Float) 1000));
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue