metadata
Wenzel Jakob 2010-10-13 20:56:13 +02:00
parent 7f4d55bdbd
commit 6a173cb9fd
4 changed files with 18 additions and 846 deletions

View File

@ -2506,12 +2506,8 @@ protected:
/* Set up the entry point */
uint32_t enPt = 0;
stack[enPt].t = mint;
//stack[enPt].p = ray(mint);
if (mint >= 0.0f)
stack[enPt].p = ray(mint);
else
stack[enPt].p = ray.o;
stack[enPt].p = ray(mint);
/* Set up the exit point */
uint32_t exPt = 1;
stack[exPt].t = maxt;
@ -2556,9 +2552,7 @@ protected:
}
/* Cases P4 and N4 -- calculate the distance to the split plane */
//XXX
// Float distToSplit = (splitVal - ray.o[axis]) * ray.dRcp[axis];
Float distToSplit = (splitVal - ray.o[axis]) / ray.d[axis];
Float distToSplit = (splitVal - ray.o[axis]) * ray.dRcp[axis];
/* Set up a new exit point */
const uint32_t tmp = exPt++;
@ -2588,15 +2582,15 @@ protected:
/* Floating-point arithmetic.. - use both absolute and relative
epsilons when looking for intersections in the subinterval */
#if defined(SINGLE_PRECISION)
const Float eps = 1e-3;
#else
const Float eps = 1e-5;
#endif
#if defined(SINGLE_PRECISION)
const Float eps = 1e-3;
#else
const Float eps = 1e-5;
#endif
const Float m_eps = 1-eps, p_eps = 1+eps;
const Float searchStart = std::max(mint, stack[enPt].t * m_eps - eps);
Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps);
Float searchEnd = std::min(maxt, stack[exPt].t * p_eps + eps);
bool foundIntersection = false;
@ -2610,7 +2604,7 @@ protected:
#endif
EIntersectionResult result = cast()->intersect(ray,
primIdx, stack[enPt].t-Epsilon, stack[exPt].t+Epsilon, t, temp);
primIdx, searchStart, searchEnd, t, temp);
if (result == EYes) {
if (shadowRay)

View File

@ -151,6 +151,7 @@ protected:
return EYes;
}
} else {
cout << "Encountered a non-triangle shape!" << endl;
//int shape = m_triAccel[idx].shapeIndex;
}
#else
@ -170,6 +171,7 @@ protected:
return EYes;
}
} else {
cout << "Encountered a non-triangle shape!" << endl;
}
#endif
return ENo;

View File

@ -23,38 +23,6 @@
MTS_NAMESPACE_BEGIN
/**
* Pre-computed triangle representation using Ingo Wald's TriAccel layout.
* This is a special version, which can store and simultaneously intersect
* against four triangles using SSE instructions.
*/
struct TriAccel4 {
uint8_t k;
uint8_t nonTriFlag; // Flags any non-triangle shapes that may be referenced
uint16_t shapeIndex[4];
uint16_t indirectionCount;
uint32_t indirectionIndex;
float nu[4];
float nv[4];
float nd[4];
float au[4];
float av[4];
float bnu[4];
float bnv[4];
float cnu[4];
float cnv[4];
uint32_t index[4];
/// Create from vertex data. Returns the number of failures
inline int load(const Point *A, const Point *B, const Point *C,
const uint32_t *shapeIndex, const uint32_t *index);
/// Fast ray-triangle intersection test
inline bool rayIntersect(const __m128 o, const __m128 d, float _mint,
float _maxt, float *_t, float *_u, float *_v, unsigned int &_shapeIndex,
unsigned int &_index) const;
};
FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet,
__m128 mint, __m128 maxt, __m128 inactive, Intersection4 &its) const {
static const MM_ALIGN16 int waldModulo[4] = { 1, 2, 0, 1 };
@ -141,804 +109,6 @@ FINLINE __m128 TriAccel::rayIntersectPacket(const RayPacket4 &packet,
return hasIts;
}
inline int TriAccel4::load(const Point *A, const Point *B, const Point *C,
const uint32_t *_shapeIndex, const uint32_t *_index) {
static const int waldModulo[4] = { 1, 2, 0, 1 };
int factor = 1, failures = 0;
k = 0;
for (int i=0; i<4; i++) {
Vector b = C[i]-A[i], c = B[i]-A[i], N = cross(c, b);
/* Determine the closest projection axis */
int kk = 0;
for (int j=0; j<3; j++) {
if (std::abs(N[j]) > std::abs(N[kk]))
kk = j;
}
int u = waldModulo[kk],
v = waldModulo[kk+1];
const Float n_k = N[kk],
denom = b[u]*c[v] - b[v]*c[u];
if (denom == 0) {
if ((_shapeIndex[i] != 0 || _index[i] != 0) && _index[i] != KNoTriangleFlag)
failures++;
}
/* Pre-compute intersection calculation
constants */
nu[i] = N[u] / n_k;
nv[i] = N[v] / n_k;
nd[i] = dot(Vector(A[i]), N) / n_k;
bnu[i] = b[u] / denom;
bnv[i] = -b[v] / denom;
au[i] = A[i][u];
av[i] = A[i][v];
cnu[i] = c[v] / denom;
cnv[i] = -c[u] / denom;
shapeIndex[i] = _shapeIndex[i];
index[i] = _index[i];
k += factor * kk;
factor *= 3;
}
return failures;
}
inline bool TriAccel4::rayIntersect(const __m128 o, const __m128 d, float _mint,
float _maxt, float *_t, float *_u, float *_v, unsigned int &_shapeIndex,
unsigned int &_index) const {
__m128 o_k, o_u, o_v, d_k, d_u, d_v;
/* Arrange the ray according to the projection axes of the
four packed triangles. This requires a *good* compiler
(read: GCC 4.2 or ICC)
*/
#ifdef MTS_DEBUG_FP
/* If debugging, turn off FP exceptions while testing for intersections
kd-tree (the code makes use of NaNs) */
disable_fpexcept();
#endif
switch (k) {
case 0:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2));
break;
case 1:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0));
break;
case 2:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1));
break;
case 3:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2));
break;
case 4:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0));
break;
case 5:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1));
break;
case 6:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2));
break;
case 7:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0));
break;
case 8:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1));
break;
case 9:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2));
break;
case 10:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0));
break;
case 11:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1));
break;
case 12:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2));
break;
case 13:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0));
break;
case 14:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1));
break;
case 15:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2));
break;
case 16:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0));
break;
case 17:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1));
break;
case 18:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2));
break;
case 19:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0));
break;
case 20:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1));
break;
case 21:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2));
break;
case 22:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0));
break;
case 23:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1));
break;
case 24:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2));
break;
case 25:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0));
break;
case 26:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1));
break;
case 27:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2));
break;
case 28:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0));
break;
case 29:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1));
break;
case 30:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2));
break;
case 31:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0));
break;
case 32:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1));
break;
case 33:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2));
break;
case 34:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0));
break;
case 35:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1));
break;
case 36:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2));
break;
case 37:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0));
break;
case 38:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1));
break;
case 39:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2));
break;
case 40:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0));
break;
case 41:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1));
break;
case 42:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2));
break;
case 43:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0));
break;
case 44:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1));
break;
case 45:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2));
break;
case 46:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0));
break;
case 47:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1));
break;
case 48:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2));
break;
case 49:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0));
break;
case 50:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1));
break;
case 51:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2));
break;
case 52:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0));
break;
case 53:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1));
break;
case 54:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,2));
break;
case 55:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,0));
break;
case 56:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,2,1));
break;
case 57:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,2));
break;
case 58:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,0));
break;
case 59:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,0,1));
break;
case 60:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,2));
break;
case 61:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,0));
break;
case 62:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,0,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,1,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,2,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,0,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,1,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,2,1,1));
break;
case 63:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,2));
break;
case 64:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,0));
break;
case 65:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,2,1));
break;
case 66:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,2));
break;
case 67:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,0));
break;
case 68:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,0,1));
break;
case 69:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,2));
break;
case 70:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,0));
break;
case 71:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,1,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,2,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,0,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,1,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,2,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,0,1,1));
break;
case 72:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,2));
break;
case 73:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,0));
break;
case 74:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,0,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,1,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,2,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,0,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,1,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,2,1));
break;
case 75:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,2));
break;
case 76:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,0));
break;
case 77:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,1,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,2,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,0,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,1,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,2,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,0,1));
break;
case 78:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,0));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,1));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,2));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,0));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,1));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,2));
break;
case 79:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,1));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,2));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,0));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,1));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,2));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,0));
break;
case 80:
o_k = _mm_shuffle_ps(o, o, _MM_SHUFFLE(2,2,2,2));
o_u = _mm_shuffle_ps(o, o, _MM_SHUFFLE(0,0,0,0));
o_v = _mm_shuffle_ps(o, o, _MM_SHUFFLE(1,1,1,1));
d_k = _mm_shuffle_ps(d, d, _MM_SHUFFLE(2,2,2,2));
d_u = _mm_shuffle_ps(d, d, _MM_SHUFFLE(0,0,0,0));
d_v = _mm_shuffle_ps(d, d, _MM_SHUFFLE(1,1,1,1));
break;
default:
return false;
}
const __m128
n_d = _mm_load_ps(nd),
n_u = _mm_load_ps(nu),
n_v = _mm_load_ps(nv);
const __m128
ounu = _mm_mul_ps(o_u, n_u),
ovnv = _mm_mul_ps(o_v, n_v),
dunu = _mm_mul_ps(d_u, n_u),
dvnv = _mm_mul_ps(d_v, n_v);
const __m128
num = _mm_sub_ps(_mm_sub_ps(_mm_sub_ps(n_d, ounu), ovnv), o_k),
denom = _mm_add_ps(_mm_add_ps(dunu, dvnv), d_k);
const __m128
t = _mm_div_ps(num, denom),
mint = _mm_load1_ps(&_mint),
maxt = _mm_load1_ps(&_maxt);
__m128 hasIts =
_mm_and_ps(_mm_cmpgt_ps(maxt, t), _mm_cmpgt_ps(t, mint));
if (_mm_movemask_ps(hasIts) == 0) {
#ifdef MTS_DEBUG_FP
enable_fpexcept();
#endif
return false;
}
const __m128
a_u = _mm_load_ps(au),
a_v = _mm_load_ps(av);
const __m128
hu = _mm_add_ps(o_u, _mm_sub_ps(_mm_mul_ps(t, d_u), a_u)),
hv = _mm_add_ps(o_v, _mm_sub_ps(_mm_mul_ps(t, d_v), a_v));
const __m128
b_nu = _mm_load_ps(bnu),
b_nv = _mm_load_ps(bnv),
c_nu = _mm_load_ps(cnu),
c_nv = _mm_load_ps(cnv);
const __m128
u = _mm_add_ps(_mm_mul_ps(hv, b_nu), _mm_mul_ps(hu, b_nv)),
v = _mm_add_ps(_mm_mul_ps(hu, c_nu), _mm_mul_ps(hv, c_nv));
const __m128
zero = _mm_setzero_ps(),
one = SSEConstants::one.ps;
hasIts = _mm_and_ps(hasIts,
_mm_and_ps(_mm_and_ps(_mm_cmpge_ps(u, zero), _mm_cmpge_ps(v, zero)),
_mm_cmpge_ps(one, _mm_add_ps(u, v))));
if (_mm_movemask_ps(hasIts) != 0) {
_mm_store_ps(_t, _mm_and_ps(t, hasIts));
_mm_store_ps(_u, u);
_mm_store_ps(_v, v);
int closest = 0;
float closestValue = std::numeric_limits<float>::max();
for (int i=0; i<4; i++) {
if (_t[i] != 0 && _t[i] <= closestValue) {
closest = i;
closestValue = _t[i];
}
}
_t[0] = closestValue;
_u[0] = _u[closest];
_v[0] = _v[closest];
_shapeIndex = shapeIndex[closest];
_index = index[closest];
#ifdef MTS_DEBUG_FP
enable_fpexcept();
#endif
return true;
}
#ifdef MTS_DEBUG_FP
enable_fpexcept();
#endif
return false;
}
MTS_NAMESPACE_END
#endif /* __TRIACCEL_SSE_H */

View File

@ -82,6 +82,10 @@ Scene::Scene(const Properties &props)
/* kd-tree construction: specify whether or not bad splits can be "retracted". */
if (props.hasProperty("kdRetract"))
m_kdtree->setRetract(props.getBoolean("kdRetract"));
/* kd-tree construction: Set the number of bad refines allowed to happen
in succession before a leaf node will be created.*/
if (props.hasProperty("kdMaxBadRefines"))
m_kdtree->setMaxBadRefines(props.getInteger("kdMaxBadRefines"));
}
Scene::Scene(Scene *scene) : NetworkedObject(Properties()) {
@ -134,6 +138,7 @@ Scene::Scene(Stream *stream, InstanceManager *manager)
m_kdtree->setExactPrimitiveThreshold(stream->readUInt());
m_kdtree->setParallelBuild(stream->readBool());
m_kdtree->setRetract(stream->readBool());
m_kdtree->setMaxBadRefines(stream->readUInt());
m_importanceSampleLuminaires = stream->readBool();
m_testType = (ETestType) stream->readInt();
m_testThresh = stream->readFloat();
@ -612,6 +617,7 @@ void Scene::serialize(Stream *stream, InstanceManager *manager) const {
stream->writeUInt(m_kdtree->getExactPrimitiveThreshold());
stream->writeBool(m_kdtree->getParallelBuild());
stream->writeBool(m_kdtree->getRetract());
stream->writeUInt(m_kdtree->getMaxBadRefines());
stream->writeBool(m_importanceSampleLuminaires);
stream->writeInt(m_testType);
stream->writeFloat(m_testThresh);