forked from juj/MathGeoLib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTriangleMesh_IntersectRay_SSE.inl
270 lines (216 loc) · 8.77 KB
/
TriangleMesh_IntersectRay_SSE.inl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/* Copyright Jukka Jylänki
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/** @file TriangleMesh_IntersectRay_SSE.inl
@author Jukka Jylänki
@brief SSE implementation of ray-mesh intersection routines. */
MATH_BEGIN_NAMESPACE
#if defined(MATH_GEN_SSE2) && !defined(MATH_GEN_TRIANGLEINDEX)
float TriangleMesh::IntersectRay_SSE2(const Ray &ray) const
#elif defined(MATH_GEN_SSE2) && defined(MATH_GEN_TRIANGLEINDEX) && !defined(MATH_GEN_UV)
float TriangleMesh::IntersectRay_TriangleIndex_SSE2(const Ray &ray, int &outTriangleIndex) const
#elif defined(MATH_GEN_SSE2) && defined(MATH_GEN_TRIANGLEINDEX) && defined(MATH_GEN_UV)
float TriangleMesh::IntersectRay_TriangleIndex_UV_SSE2(const Ray &ray, int &outTriangleIndex, float &outU, float &outV) const
#elif defined(MATH_GEN_SSE41) && !defined(MATH_GEN_TRIANGLEINDEX)
float TriangleMesh::IntersectRay_SSE41(const Ray &ray) const
#elif defined(MATH_GEN_SSE41) && defined(MATH_GEN_TRIANGLEINDEX) && !defined(MATH_GEN_UV)
float TriangleMesh::IntersectRay_TriangleIndex_SSE41(const Ray &ray, int &outTriangleIndex) const
#elif defined(MATH_GEN_SSE41) && defined(MATH_GEN_TRIANGLEINDEX) && defined(MATH_GEN_UV)
float TriangleMesh::IntersectRay_TriangleIndex_UV_SSE41(const Ray &ray, int &outTriangleIndex, float &outU, float &outV) const
#endif
{
// std::cout << numTris << " tris: ";
// TRACESTART(RayTriMeshIntersectSSE);
assert(sizeof(float3) == 3*sizeof(float));
assert(sizeof(Triangle) == 3*sizeof(float3));
#ifdef _DEBUG
assert(vertexDataLayout == 1); // Must be SoA4 structured!
#endif
__m128 nearestD = _mm_set1_ps(inf);
#ifdef MATH_GEN_UV
__m128 nearestU = _mm_set1_ps(inf);
__m128 nearestV = _mm_set1_ps(inf);
#endif
#ifdef MATH_GEN_TRIANGLEINDEX
__m128i nearestIndex = _mm_set1_epi32(-1);
#endif
const __m128 lX = _mm_load1_ps(&ray.pos.x);
const __m128 lY = _mm_load1_ps(&ray.pos.y);
const __m128 lZ = _mm_load1_ps(&ray.pos.z);
const __m128 dX = _mm_load1_ps(&ray.dir.x);
const __m128 dY = _mm_load1_ps(&ray.dir.y);
const __m128 dZ = _mm_load1_ps(&ray.dir.z);
const __m128 epsilon = _mm_set1_ps(1e-4f);
const __m128 zero = _mm_setzero_ps();
const __m128 one = _mm_set1_ps(1.f);
const __m128 sign_mask = _mm_set1_ps(-0.f); // -0.f = 1 << 31
assert(((uintptr_t)data & 0xF) == 0);
const float *tris = reinterpret_cast<const float*>(data);
for(int i = 0; i+4 <= numTriangles; i += 4)
{
__m128 v0x = _mm_load_ps(tris);
__m128 v0y = _mm_load_ps(tris+4);
__m128 v0z = _mm_load_ps(tris+8);
#ifdef SOA_HAS_EDGES
__m128 e1x = _mm_load_ps(tris+12);
__m128 e1y = _mm_load_ps(tris+16);
__m128 e1z = _mm_load_ps(tris+20);
__m128 e2x = _mm_load_ps(tris+24);
__m128 e2y = _mm_load_ps(tris+28);
__m128 e2z = _mm_load_ps(tris+32);
#else
__m128 v1x = _mm_load_ps(tris+12);
__m128 v1y = _mm_load_ps(tris+16);
__m128 v1z = _mm_load_ps(tris+20);
__m128 v2x = _mm_load_ps(tris+24);
__m128 v2y = _mm_load_ps(tris+28);
__m128 v2z = _mm_load_ps(tris+32);
// Edge vectors
__m128 e1x = _mm_sub_ps(v1x, v0x);
__m128 e1y = _mm_sub_ps(v1y, v0y);
__m128 e1z = _mm_sub_ps(v1z, v0z);
__m128 e2x = _mm_sub_ps(v2x, v0x);
__m128 e2y = _mm_sub_ps(v2y, v0y);
__m128 e2z = _mm_sub_ps(v2z, v0z);
#endif
// _mm_prefetch((const char *)(tris+36), _MM_HINT_T0);
// begin calculating determinant - also used to calculate U parameter
__m128 px = _mm_sub_ps(_mm_mul_ps(dY, e2z), _mm_mul_ps(dZ, e2y));
__m128 py = _mm_sub_ps(_mm_mul_ps(dZ, e2x), _mm_mul_ps(dX, e2z));
__m128 pz = _mm_sub_ps(_mm_mul_ps(dX, e2y), _mm_mul_ps(dY, e2x));
// If det < 0, intersecting backfacing tri, > 0, intersecting frontfacing tri, 0, parallel to plane.
__m128 det = _mm_add_ps(_mm_add_ps(_mm_mul_ps(e1x, px), _mm_mul_ps(e1y, py)), _mm_mul_ps(e1z, pz));
// If determinant is near zero, ray lies in plane of triangle.
// if (fabs(det) <= epsilon)
// return FLOAT_INF;
__m128 recipDet = _mm_rcp_ps(det);
__m128 absdet = _mm_andnot_ps(sign_mask, det);
__m128 out = _mm_cmple_ps(absdet, epsilon);
// Calculate distance from v0 to ray origin
__m128 tx = _mm_sub_ps(lX, v0x);
__m128 ty = _mm_sub_ps(lY, v0y);
__m128 tz = _mm_sub_ps(lZ, v0z);
// Output barycentric u
__m128 u = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, px), _mm_mul_ps(ty, py)), _mm_mul_ps(tz, pz)), recipDet);
// if (u < 0.f || u > 1.f)
// return FLOAT_INF; // Barycentric U is outside the triangle - early out.
__m128 out2 = _mm_cmplt_ps(u, zero);
out = _mm_or_ps(out, out2);
out2 = _mm_cmpgt_ps(u, one);
out = _mm_or_ps(out, out2);
// Prepare to test V parameter
__m128 qx = _mm_sub_ps(_mm_mul_ps(ty, e1z), _mm_mul_ps(tz, e1y));
__m128 qy = _mm_sub_ps(_mm_mul_ps(tz, e1x), _mm_mul_ps(tx, e1z));
__m128 qz = _mm_sub_ps(_mm_mul_ps(tx, e1y), _mm_mul_ps(ty, e1x));
// Output barycentric v
__m128 v = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(dX, qx), _mm_mul_ps(dY, qy)), _mm_mul_ps(dZ, qz)), recipDet);
// if (v < 0.f || u + v > 1.f) // Barycentric V or the combination of U and V are outside the triangle - no intersection.
// return FLOAT_INF;
out2 = _mm_cmplt_ps(v, zero);
out = _mm_or_ps(out, out2);
__m128 uv = _mm_add_ps(u, v);
out2 = _mm_cmpgt_ps(uv, one);
out = _mm_or_ps(out, out2);
// Output signed distance from ray to triangle.
__m128 t = _mm_mul_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(e2x, qx), _mm_mul_ps(e2y, qy)), _mm_mul_ps(e2z, qz)), recipDet);
// t < 0?
out2 = _mm_cmplt_ps(t, zero);
out = _mm_or_ps(out, out2);
// Worse than previous result?
out2 = _mm_cmpge_ps(t, nearestD);
out = _mm_or_ps(out, out2);
// The mask 'out' now contains 0xFF in all indices which are worse than previous, and
// 0x00 in indices which are better.
#ifdef MATH_GEN_SSE41
nearestD = _mm_blendv_ps(t, nearestD, out);
#else
// If SSE 4.1 is not available:
nearestD = _mm_and_ps(out, nearestD);
t = _mm_andnot_ps(out, t);
nearestD = _mm_or_ps(t, nearestD);
#endif
#ifdef MATH_GEN_UV
#ifdef MATH_GEN_SSE41
nearestU = _mm_blendv_ps(u, nearestU, out); // 'blend' requires SSE4.1!
nearestV = _mm_blendv_ps(v, nearestV, out); // 'blend' requires SSE4.1!
#else
// If SSE 4.1 is not available:
nearestU = _mm_and_ps(out, nearestU);
nearestV = _mm_and_ps(out, nearestV);
u = _mm_andnot_ps(out, u);
v = _mm_andnot_ps(out, v);
nearestU = _mm_or_ps(u, nearestU);
nearestV = _mm_or_ps(v, nearestV);
#endif
#endif
#ifdef MATH_GEN_TRIANGLEINDEX
__m128i hitIndex = _mm_set1_epi32(i);
#ifdef MATH_GEN_SSE41
nearestIndex = _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(hitIndex), _mm_castsi128_ps(nearestIndex), out)); // 'blend' requires SSE4.1!
#else
// If SSE 4.1 is not available:
// Store the index of the triangle that was hit.
nearestIndex = _mm_and_si128(_mm_castps_si128(out), nearestIndex);
hitIndex = _mm_andnot_si128(_mm_castps_si128(out), hitIndex);
nearestIndex = _mm_or_si128(hitIndex, nearestIndex);
#endif
#endif
tris += 36;
}
float4 d = nearestD;
#ifdef MATH_GEN_UV
float4 u = nearestU;
float4 v = nearestV;
#endif
#ifdef MATH_GEN_TRIANGLEINDEX
u32 idx[4];
_mm_store_si128((__m128i*)idx, nearestIndex);
#endif
float smallestT = FLOAT_INF;
for(int i = 0; i < 4; ++i)
if (d[i] < smallestT)
{
smallestT = d[i];
#ifdef MATH_GEN_TRIANGLEINDEX
outTriangleIndex = idx[i]+i;
#endif
#ifdef MATH_GEN_UV
outU = u[i];
outV = v[i];
#endif
}
// TRACEEND(RayTriMeshIntersectSSE);
// static double avgtimes = 0.f;
// static double nAvgTimes = 0;
// static double processedBytes;
// processedBytes += numTris * 3 * 4;
// avgtimes += Clock::TicksToMillisecondsD(time_RayTriMeshIntersectSSE);
// ++nAvgTimes;
// std::cout << "Total avg (SSE): " << (avgtimes / nAvgTimes) << std::endl;
// std::cout << "Hit distance (SSE): " << smallestT << ", index: " << hitTriangleIndex << ", UV: (" << u << ", " << v << ")" << std::endl;
// std::cout << "(SSE) " << processedBytes / avgtimes * 1000.0 / 1024.0 / 1024.0 / 1024.0 << "GB/sec." << std::endl;
return smallestT;
}
//float TriangleMesh::IntersectRay_AVX(const Ray &ray) const
//float TriangleMesh::IntersectRay_TriangleIndex_AVX(const Ray &ray, int &outIndex) const
//float TriangleMesh::IntersectRay_TriangleIndex_UV_AVX(const Ray &ray, int &outIndex, float &outU, float &outV) const
#ifdef MATH_GEN_SSE2
#undef MATH_GEN_SSE2
#endif
#ifdef MATH_GEN_SSE41
#undef MATH_GEN_SSE41
#endif
#ifdef MATH_GEN_TRIANGLEINDEX
#undef MATH_GEN_TRIANGLEINDEX
#endif
#ifdef MATH_GEN_UV
#undef MATH_GEN_UV
#endif
MATH_END_NAMESPACE