diff --git a/include/mitsuba/render/gkdtree.h b/include/mitsuba/render/gkdtree.h index d42e96fc..7934b375 100644 --- a/include/mitsuba/render/gkdtree.h +++ b/include/mitsuba/render/gkdtree.h @@ -2506,12 +2506,8 @@ protected: /* Set up the entry point */ uint32_t enPt = 0; stack[enPt].t = mint; - //stack[enPt].p = ray(mint); - if (mint >= 0.0f) - stack[enPt].p = ray(mint); - else - stack[enPt].p = ray.o; - + stack[enPt].p = ray(mint); + /* Set up the exit point */ uint32_t exPt = 1; stack[exPt].t = maxt; @@ -2556,9 +2552,7 @@ protected: } /* Cases P4 and N4 -- calculate the distance to the split plane */ - //XXX -// Float distToSplit = (splitVal - ray.o[axis]) * ray.dRcp[axis]; - Float distToSplit = (splitVal - ray.o[axis]) / ray.d[axis]; + Float distToSplit = (splitVal - ray.o[axis]) * ray.dRcp[axis]; /* Set up a new exit point */ const uint32_t tmp = exPt++; @@ -2588,15 +2582,15 @@ protected: /* Floating-point arithmetic.. - use both absolute and relative epsilons when looking for intersections in the subinterval */ - #if defined(SINGLE_PRECISION) - const Float eps = 1e-3; - #else - const Float eps = 1e-5; - #endif +#if defined(SINGLE_PRECISION) + const Float eps = 1e-3; +#else + const Float eps = 1e-5; +#endif const Float m_eps = 1-eps, p_eps = 1+eps; const Float searchStart = std::max(mint, stack[enPt].t * m_eps - eps); - Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps); + Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps); bool foundIntersection = false; @@ -2610,7 +2604,7 @@ protected: #endif EIntersectionResult result = cast()->intersect(ray, - primIdx, stack[enPt].t-Epsilon, stack[exPt].t+Epsilon, t, temp); + primIdx, searchStart, searchEnd, t, temp); if (result == EYes) { if (shadowRay) diff --git a/include/mitsuba/render/kdtree.h b/include/mitsuba/render/kdtree.h index 160c5ebd..271c5436 100644 --- a/include/mitsuba/render/kdtree.h +++ b/include/mitsuba/render/kdtree.h @@ -151,6 +151,7 @@ protected: return EYes; } } else { + cout << "Encountered a non-triangle shape!" << endl; //int shape = m_triAccel[idx].shapeIndex; } #else @@ -170,6 +171,7 @@ protected: return EYes; } } else { + cout << "Encountered a non-triangle shape!" << endl; } #endif return ENo; diff --git a/include/mitsuba/render/triaccel_sse.h b/include/mitsuba/render/triaccel_sse.h index 268456a8..d98a3625 100644 --- a/include/mitsuba/render/triaccel_sse.h +++ b/include/mitsuba/render/triaccel_sse.h @@ -23,38 +23,6 @@ MTS_NAMESPACE_BEGIN -/** - * Pre-computed triangle representation using Ingo Wald's TriAccel layout. - * This is a special version, which can store and simultaneously intersect - * against four triangles using SSE instructions. - */ -struct TriAccel4 { - uint8_t k; - uint8_t nonTriFlag; // Flags any non-triangle shapes that may be referenced - uint16_t shapeIndex[4]; - uint16_t indirectionCount; - uint32_t indirectionIndex; - float nu[4]; - float nv[4]; - float nd[4]; - float au[4]; - float av[4]; - float bnu[4]; - float bnv[4]; - float cnu[4]; - float cnv[4]; - uint32_t index[4]; - - /// Create from vertex data. Returns the number of failures - inline int load(const Point *A, const Point *B, const Point *C, - const uint32_t *shapeIndex, const uint32_t *index); - - /// Fast ray-triangle intersection test - inline bool rayIntersect(const __m128 o, const __m128 d, float _mint, - float _maxt, float *_t, float *_u, float *_v, unsigned int &_shapeIndex, - unsigned int &_index) const; -}; - FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet, __m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) const { static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 }; @@ -141,804 +109,6 @@ FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet, return hasIts; } -inline int TriAccel4::load(const Point *A, const Point *B, const Point *C, - const uint32_t *_shapeIndex, const uint32_t *_index) { - static const int waldModulo[4] = { 1, 2, 0, 1 }; - int factor = 1, failures = 0; - k = 0; - - for (int i=0; i<4; i++) { - Vector b = C[i]-A[i], c = B[i]-A[i], N = cross(c, b); - - /* Determine the closest projection axis */ - int kk = 0; - for (int j=0; j<3; j++) { - if (std::abs(N[j]) > std::abs(N[kk])) - kk = j; - } - - int u = waldModulo[kk], - v = waldModulo[kk+1]; - const Float n_k = N[kk], - denom = b[u]*c[v] - b[v]*c[u]; - - if (denom == 0) { - if ((_shapeIndex[i] != 0 || _index[i] != 0) && _index[i] != KNoTriangleFlag) - failures++; - } - - /* Pre-compute intersection calculation - constants */ - nu[i] = N[u] / n_k; - nv[i] = N[v] / n_k; - nd[i] = dot(Vector(A[i]), N) / n_k; - bnu[i] = b[u] / denom; - bnv[i] = -b[v] / denom; - au[i] = A[i][u]; - av[i] = A[i][v]; - cnu[i] = c[v] / denom; - cnv[i] = -c[u] / denom; - shapeIndex[i] = _shapeIndex[i]; - index[i] = _index[i]; - k += factor * kk; - factor *= 3; - } - return failures; -} - -inline bool TriAccel4::rayIntersect(const __m128 o, const __m128 d, float _mint, - float _maxt, float *_t, float *_u, float *_v, unsigned int &_shapeIndex, - unsigned int &_index) const { - __m128 o_k, o_u, o_v, d_k, d_u, d_v; - /* Arrange the ray according to the projection axes of the - four packed triangles. This requires a *good* compiler - (read: GCC 4.2 or ICC) - */ - -#ifdef MTS_DEBUG_FP - /* If debugging, turn off FP exceptions while testing for intersections - kd-tree (the code makes use of NaNs) */ - disable_fpexcept(); -#endif - - switch (k) { - case 0: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2)); - break; - case 1: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0)); - break; - case 2: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1)); - break; - case 3: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2)); - break; - case 4: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0)); - break; - case 5: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1)); - break; - case 6: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2)); - break; - case 7: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0)); - break; - case 8: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1)); - break; - case 9: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2)); - break; - case 10: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0)); - break; - case 11: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1)); - break; - case 12: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2)); - break; - case 13: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0)); - break; - case 14: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1)); - break; - case 15: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2)); - break; - case 16: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0)); - break; - case 17: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1)); - break; - case 18: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2)); - break; - case 19: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0)); - break; - case 20: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1)); - break; - case 21: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2)); - break; - case 22: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0)); - break; - case 23: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1)); - break; - case 24: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2)); - break; - case 25: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0)); - break; - case 26: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1)); - break; - case 27: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2)); - break; - case 28: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0)); - break; - case 29: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1)); - break; - case 30: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2)); - break; - case 31: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0)); - break; - case 32: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1)); - break; - case 33: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2)); - break; - case 34: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0)); - break; - case 35: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1)); - break; - case 36: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2)); - break; - case 37: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0)); - break; - case 38: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1)); - break; - case 39: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2)); - break; - case 40: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0)); - break; - case 41: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1)); - break; - case 42: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2)); - break; - case 43: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0)); - break; - case 44: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1)); - break; - case 45: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2)); - break; - case 46: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0)); - break; - case 47: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1)); - break; - case 48: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2)); - break; - case 49: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0)); - break; - case 50: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1)); - break; - case 51: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2)); - break; - case 52: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0)); - break; - case 53: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1)); - break; - case 54: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2)); - break; - case 55: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0)); - break; - case 56: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1)); - break; - case 57: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2)); - break; - case 58: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0)); - break; - case 59: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1)); - break; - case 60: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2)); - break; - case 61: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0)); - break; - case 62: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1)); - break; - case 63: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2)); - break; - case 64: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0)); - break; - case 65: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1)); - break; - case 66: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2)); - break; - case 67: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0)); - break; - case 68: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1)); - break; - case 69: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2)); - break; - case 70: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0)); - break; - case 71: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1)); - break; - case 72: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2)); - break; - case 73: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0)); - break; - case 74: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1)); - break; - case 75: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2)); - break; - case 76: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0)); - break; - case 77: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1)); - break; - case 78: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2)); - break; - case 79: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0)); - break; - case 80: - o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2)); - o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0)); - o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1)); - d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2)); - d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0)); - d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1)); - break; - default: - return false; - } - - const __m128 - n_d = _mm_load_ps(nd), - n_u = _mm_load_ps(nu), - n_v = _mm_load_ps(nv); - - const __m128 - ounu = _mm_mul_ps(o_u, n_u), - ovnv = _mm_mul_ps(o_v, n_v), - dunu = _mm_mul_ps(d_u, n_u), - dvnv = _mm_mul_ps(d_v, n_v); - - const __m128 - num = _mm_sub_ps(_mm_sub_ps(_mm_sub_ps(n_d, ounu), ovnv), o_k), - denom = _mm_add_ps(_mm_add_ps(dunu, dvnv), d_k); - - const __m128 - t = _mm_div_ps(num, denom), - mint = _mm_load1_ps(&_mint), - maxt = _mm_load1_ps(&_maxt); - - __m128 hasIts = - _mm_and_ps(_mm_cmpgt_ps(maxt, t), _mm_cmpgt_ps(t, mint)); - - if (_mm_movemask_ps(hasIts) == 0) { -#ifdef MTS_DEBUG_FP - enable_fpexcept(); -#endif - return false; - } - - const __m128 - a_u = _mm_load_ps(au), - a_v = _mm_load_ps(av); - - const __m128 - hu = _mm_add_ps(o_u, _mm_sub_ps(_mm_mul_ps(t, d_u), a_u)), - hv = _mm_add_ps(o_v, _mm_sub_ps(_mm_mul_ps(t, d_v), a_v)); - - const __m128 - b_nu = _mm_load_ps(bnu), - b_nv = _mm_load_ps(bnv), - c_nu = _mm_load_ps(cnu), - c_nv = _mm_load_ps(cnv); - - const __m128 - u = _mm_add_ps(_mm_mul_ps(hv, b_nu), _mm_mul_ps(hu, b_nv)), - v = _mm_add_ps(_mm_mul_ps(hu, c_nu), _mm_mul_ps(hv, c_nv)); - - const __m128 - zero = _mm_setzero_ps(), - one = SSEConstants::one.ps; - - hasIts = _mm_and_ps(hasIts, - _mm_and_ps(_mm_and_ps(_mm_cmpge_ps(u, zero), _mm_cmpge_ps(v, zero)), - _mm_cmpge_ps(one, _mm_add_ps(u, v)))); - - if (_mm_movemask_ps(hasIts) != 0) { - _mm_store_ps(_t, _mm_and_ps(t, hasIts)); - _mm_store_ps(_u, u); - _mm_store_ps(_v, v); - - int closest = 0; - float closestValue = std::numeric_limits::max(); - for (int i=0; i<4; i++) { - if (_t[i] != 0 && _t[i] <= closestValue) { - closest = i; - closestValue = _t[i]; - } - } - _t[0] = closestValue; - _u[0] = _u[closest]; - _v[0] = _v[closest]; - _shapeIndex = shapeIndex[closest]; - _index = index[closest]; -#ifdef MTS_DEBUG_FP - enable_fpexcept(); -#endif - return true; - } -#ifdef MTS_DEBUG_FP - enable_fpexcept(); -#endif - return false; -} - MTS_NAMESPACE_END #endif /* __TRIACCEL_SSE_H */ diff --git a/src/librender/scene.cpp b/src/librender/scene.cpp index 573374c1..183d0365 100644 --- a/src/librender/scene.cpp +++ b/src/librender/scene.cpp @@ -82,6 +82,10 @@ Scene::Scene(const Properties &props) /* kd-tree construction: specify whether or not bad splits can be "retracted". */ if (props.hasProperty("kdRetract")) m_kdtree->setRetract(props.getBoolean("kdRetract")); + /* kd-tree construction: Set the number of bad refines allowed to happen + in succession before a leaf node will be created.*/ + if (props.hasProperty("kdMaxBadRefines")) + m_kdtree->setMaxBadRefines(props.getInteger("kdMaxBadRefines")); } Scene::Scene(Scene *scene) : NetworkedObject(Properties()) { @@ -134,6 +138,7 @@ Scene::Scene(Stream *stream, InstanceManager *manager) m_kdtree->setExactPrimitiveThreshold(stream->readUInt()); m_kdtree->setParallelBuild(stream->readBool()); m_kdtree->setRetract(stream->readBool()); + m_kdtree->setMaxBadRefines(stream->readUInt()); m_importanceSampleLuminaires = stream->readBool(); m_testType = (ETestType) stream->readInt(); m_testThresh = stream->readFloat(); @@ -612,6 +617,7 @@ void Scene::serialize(Stream *stream, InstanceManager *manager) const { stream->writeUInt(m_kdtree->getExactPrimitiveThreshold()); stream->writeBool(m_kdtree->getParallelBuild()); stream->writeBool(m_kdtree->getRetract()); + stream->writeUInt(m_kdtree->getMaxBadRefines()); stream->writeBool(m_importanceSampleLuminaires); stream->writeInt(m_testType); stream->writeFloat(m_testThresh);