1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7
8 #pragma once
9
10 #include "shared.h"
11 #include "intrinsics.h"
12 #ifndef __OPENCL_VERSION__
13 #include "stdio.h"
14 #endif
15
16 GRL_NAMESPACE_BEGIN(GRL)
17 GRL_NAMESPACE_BEGIN(RTAS)
18 /* ====== QUAD ENCODING config ====== */
19
20 #define QUAD_GEOMID_BITS 27 // dxr limit is 2^24 geos... we have headroom
21 #define QUAD_PRIMID_DIFF_BITS (32 - QUAD_GEOMID_BITS)
22 #define QUAD_GEOMID_MASK ((1<<QUAD_GEOMID_BITS)-1)
23
24 #define QUAD_PRIMID_BITS 29 // dxr limit is 2^29 prims total within one blas
25 #define QUAD_PRIMID_MASK ((1<<QUAD_PRIMID_BITS)-1)
26
27 #define INSTANCE_ID_BITS 24
28 #define INSTANCE_ID_MASK ((1<<INSTANCE_ID_BITS)-1)
29
30 // JDB TODO: Make this a separate, dedicated structure.. Aliasing a float4 AABB as a primref is needlessly obfuscated
31
32 typedef struct AABB PrimRef;
33
AABB_init(struct AABB * aabb)34 GRL_INLINE void AABB_init(struct AABB *aabb)
35 {
36 aabb->lower = (float4)(INFINITY, INFINITY, INFINITY, 0);
37 aabb->upper = -(float4)(INFINITY, INFINITY, INFINITY, 0);
38 }
39
PRIMREF_geomID(PrimRef * aabb)40 GRL_INLINE uint PRIMREF_geomID( PrimRef* aabb)
41 {
42 const uint v = as_uint(aabb->lower.w);
43 return v & QUAD_GEOMID_MASK;
44 }
45
PRIMREF_primID0(PrimRef * aabb)46 GRL_INLINE uint PRIMREF_primID0( PrimRef* aabb)
47 {
48 return as_uint( aabb->upper.w ) & QUAD_PRIMID_MASK;
49 }
50
PRIMREF_primID1(PrimRef * aabb)51 GRL_INLINE uint PRIMREF_primID1( PrimRef* aabb)
52 {
53 const uint v = as_uint(aabb->lower.w);
54 const uint primID0 = as_uint(aabb->upper.w) & QUAD_PRIMID_MASK;
55 const uint deltaID = v >> QUAD_GEOMID_BITS;
56 const uint primID1 = primID0 + deltaID;
57 return primID1;
58 }
59
PRIMREF_geomFlags(PrimRef * aabb)60 GRL_INLINE uint PRIMREF_geomFlags( PrimRef* aabb )
61 {
62 const uint v = as_uint( aabb->upper.w );
63 return (v >> QUAD_PRIMID_BITS) ;
64 }
65
PRIMREF_instanceIndex(PrimRef * aabb)66 GRL_INLINE uint PRIMREF_instanceIndex( PrimRef* aabb )
67 {
68 return as_uint(aabb->lower.w) & INSTANCE_ID_MASK;
69 }
70
PRIMREF_instanceMask(PrimRef * aabb)71 GRL_INLINE uchar PRIMREF_instanceMask( PrimRef* aabb )
72 {
73 return as_uint(aabb->lower.w) >> INSTANCE_ID_BITS;
74 }
75
PRIMREF_setProceduralMetaData(PrimRef * primref,uint geomID,uint primID,uint geomFlags)76 GRL_INLINE void PRIMREF_setProceduralMetaData( PrimRef* primref, uint geomID, uint primID, uint geomFlags )
77 {
78 /* encode geomID, primID */
79 uint flags = (geomFlags << QUAD_PRIMID_BITS);
80 primref->lower.w = as_float( geomID );
81 primref->upper.w = as_float( primID | flags );
82 }
83
PRIMREF_setQuadMetaData(PrimRef * primref,uint primID0,uint primID1,uint geomID,uint geomFlags)84 GRL_INLINE void PRIMREF_setQuadMetaData( PrimRef* primref, uint primID0, uint primID1, uint geomID, uint geomFlags )
85 {
86 const uint primID_diff = primID1 - primID0;
87 uint flags = geomFlags << QUAD_PRIMID_BITS;
88
89 primref->lower.w = as_float( geomID | (primID_diff << QUAD_GEOMID_BITS) );
90 primref->upper.w = as_float( (primID0 | flags) );
91 }
92
PRIMREF_setAABB(PrimRef * primref,float3 lower,float3 upper)93 GRL_INLINE void PRIMREF_setAABB( PrimRef* primref, float3 lower, float3 upper )
94 {
95 primref->lower.xyz = lower.xyz;
96 primref->upper.xyz = upper.xyz;
97 }
98
PRIMREF_set_instance(float3 lower,float3 upper,uint instanceIndex,uint instanceMask,uint rootOffset,bool is_procedural)99 GRL_INLINE PrimRef PRIMREF_set_instance( float3 lower, float3 upper, uint instanceIndex, uint instanceMask, uint rootOffset, bool is_procedural )
100 {
101 PrimRef new_ref;
102 new_ref.lower.xyz = lower;
103 new_ref.lower.w = as_float(instanceIndex | (instanceMask << 24));
104 new_ref.upper.xyz = upper;
105 new_ref.upper.w = as_float(rootOffset + (is_procedural? 0x80000000 : 0));
106 return new_ref;
107 }
108
PRIMREF_isProceduralInstance(PrimRef * primref)109 GRL_INLINE bool PRIMREF_isProceduralInstance( PrimRef* primref )
110 {
111 return (as_uint(primref->upper.w) & 0x80000000) != 0;
112 }
113
PRIMREF_instanceRootNodeOffset(PrimRef * primref)114 GRL_INLINE uint PRIMREF_instanceRootNodeOffset(PrimRef* primref)
115 {
116 return (as_uint(primref->upper.w) & 0x7fffffff);
117 }
118
PRIMREF_lower(PrimRef * primref)119 GRL_INLINE float3 PRIMREF_lower( PrimRef* primref )
120 {
121 return primref->lower.xyz;
122 }
PRIMREF_upper(PrimRef * primref)123 GRL_INLINE float3 PRIMREF_upper( PrimRef* primref )
124 {
125 return primref->upper.xyz;
126 }
127
AABB_extend(struct AABB * aabb,struct AABB * v)128 GRL_INLINE void AABB_extend(struct AABB *aabb, struct AABB *v)
129 {
130 aabb->lower = min(aabb->lower, v->lower);
131 aabb->upper = max(aabb->upper, v->upper);
132 }
133
AABB_extend_point(struct AABB * aabb,const float4 p)134 GRL_INLINE void AABB_extend_point(struct AABB *aabb, const float4 p)
135 {
136 aabb->lower = min(aabb->lower, p);
137 aabb->upper = max(aabb->upper, p);
138 }
139
AABB_extendlu(struct AABB * aabb,const float4 lower,const float4 upper)140 GRL_INLINE void AABB_extendlu(struct AABB *aabb, const float4 lower, const float4 upper)
141 {
142 aabb->lower = min(aabb->lower, lower);
143 aabb->upper = max(aabb->upper, upper);
144 }
145
AABB_enlarge(struct AABB * aabb,const float v)146 GRL_INLINE struct AABB AABB_enlarge(struct AABB *aabb, const float v)
147 {
148 struct AABB box;
149 box.lower = aabb->lower - (float4)v;
150 box.upper = aabb->upper + (float4)v;
151 return box;
152 }
153
AABB_intersect(struct AABB * aabb,struct AABB * v)154 GRL_INLINE void AABB_intersect(struct AABB *aabb, struct AABB *v)
155 {
156 aabb->lower = max(aabb->lower, v->lower);
157 aabb->upper = min(aabb->upper, v->upper);
158 }
159
AABB_size(struct AABB * aabb)160 GRL_INLINE float4 AABB_size(struct AABB *aabb)
161 {
162 return aabb->upper - aabb->lower;
163 }
164
AABB_centroid2(struct AABB * aabb)165 GRL_INLINE float4 AABB_centroid2(struct AABB *aabb)
166 {
167 return aabb->lower + aabb->upper;
168 }
169
AABB_halfArea(struct AABB * aabb)170 GRL_INLINE float AABB_halfArea(struct AABB *aabb)
171 {
172 const float4 d = AABB_size(aabb);
173 return halfarea(d.xyz);
174 }
175
AABB_intersecion_size(struct AABB * aabb,struct AABB * v)176 GRL_INLINE float AABB_intersecion_size(struct AABB* aabb, struct AABB* v)
177 {
178 struct AABB temp = *aabb;
179 AABB_intersect(&temp, v);
180 float4 len = AABB_size(&temp);
181 float ret = 0.0f;
182 if (len.x >= 0.0f && len.y >= 0.0f && len.z >= 0.0f) {
183 float3 v = { len.x, len.y, len.z };
184 ret = halfarea(v);
185 }
186 return ret;
187 }
188
AABB_subset(struct AABB * small,struct AABB * big)189 GRL_INLINE bool AABB_subset(struct AABB* small, struct AABB* big)
190 {
191 const int4 b0 = small->lower >= big->lower;
192 const int4 b1 = small->upper <= big->upper;
193 const int4 b = b0 & b1;
194 return b.x & b.y & b.z;
195 }
196
AABBfromAABB3f(const struct AABB3f box)197 GRL_INLINE struct AABB AABBfromAABB3f(const struct AABB3f box)
198 {
199 struct AABB box4d = {
200 {box.lower[0], box.lower[1], box.lower[2], 0.0f},
201 {box.upper[0], box.upper[1], box.upper[2], 0.0f}
202 };
203 return box4d;
204 }
205
AABB3fFromAABB(const struct AABB box)206 GRL_INLINE struct AABB3f AABB3fFromAABB(const struct AABB box)
207 {
208 struct AABB3f box3d = {
209 {box.lower[0], box.lower[1], box.lower[2]},
210 {box.upper[0], box.upper[1], box.upper[2]}
211 };
212 return box3d;
213 }
214
AABB_verify(struct AABB * aabb)215 GRL_INLINE bool AABB_verify(struct AABB* aabb)
216 {
217 bool error = false;
218 if (aabb->lower.x > aabb->upper.x)
219 error = true;
220 if (aabb->lower.y > aabb->upper.y)
221 error = true;
222 if (aabb->lower.z > aabb->upper.z)
223 error = true;
224 if (!isfinite(aabb->lower.x))
225 error = true;
226 if (!isfinite(aabb->lower.y))
227 error = true;
228 if (!isfinite(aabb->lower.z))
229 error = true;
230 if (!isfinite(aabb->upper.x))
231 error = true;
232 if (!isfinite(aabb->upper.y))
233 error = true;
234 if (!isfinite(aabb->upper.z))
235 error = true;
236 return error;
237 }
238
AABB_print(struct AABB * aabb)239 GRL_INLINE void AABB_print(struct AABB* aabb)
240 {
241 printf("AABB {\n area = %f\n lower = %f\n upper = %f\n geomID = %i primID0 = %i primID1 = %i\n aabb->lower.w = %x aabb->upper.w = %x }\n",
242 AABB_halfArea(aabb),
243 aabb->lower.xyz,
244 aabb->upper.xyz,
245 PRIMREF_geomID(aabb),
246 PRIMREF_primID0(aabb),
247 PRIMREF_primID1(aabb),
248 as_uint(aabb->lower.w),
249 as_uint(aabb->upper.w));
250 }
251
252 #ifdef __OPENCL_VERSION__
253
PrimRef_sub_group_shuffle(PrimRef * primRef,const uint slotID)254 GRL_INLINE PrimRef PrimRef_sub_group_shuffle(PrimRef* primRef, const uint slotID)
255 {
256 PrimRef shuffledPrimref;
257 shuffledPrimref.lower.x = intel_sub_group_shuffle(primRef->lower.x, slotID);
258 shuffledPrimref.lower.y = intel_sub_group_shuffle(primRef->lower.y, slotID);
259 shuffledPrimref.lower.z = intel_sub_group_shuffle(primRef->lower.z, slotID);
260 shuffledPrimref.lower.w = intel_sub_group_shuffle(primRef->lower.w, slotID);
261 shuffledPrimref.upper.x = intel_sub_group_shuffle(primRef->upper.x, slotID);
262 shuffledPrimref.upper.y = intel_sub_group_shuffle(primRef->upper.y, slotID);
263 shuffledPrimref.upper.z = intel_sub_group_shuffle(primRef->upper.z, slotID);
264 shuffledPrimref.upper.w = intel_sub_group_shuffle(primRef->upper.w, slotID);
265 return shuffledPrimref;
266 }
267
AABB_sub_group_broadcast(struct AABB * aabb,const uint slotID)268 GRL_INLINE struct AABB AABB_sub_group_broadcast(struct AABB *aabb, const uint slotID)
269 {
270 struct AABB bounds;
271 bounds.lower.x = sub_group_broadcast(aabb->lower.x, slotID);
272 bounds.lower.y = sub_group_broadcast(aabb->lower.y, slotID);
273 bounds.lower.z = sub_group_broadcast(aabb->lower.z, slotID);
274 bounds.lower.w = 0;
275 bounds.upper.x = sub_group_broadcast(aabb->upper.x, slotID);
276 bounds.upper.y = sub_group_broadcast(aabb->upper.y, slotID);
277 bounds.upper.z = sub_group_broadcast(aabb->upper.z, slotID);
278 bounds.upper.w = 0;
279 return bounds;
280 }
AABB_sub_group_shuffle(struct AABB * aabb,const uint slotID)281 GRL_INLINE struct AABB AABB_sub_group_shuffle(struct AABB* aabb, const uint slotID)
282 {
283 struct AABB bounds;
284 bounds.lower.x = intel_sub_group_shuffle(aabb->lower.x, slotID);
285 bounds.lower.y = intel_sub_group_shuffle(aabb->lower.y, slotID);
286 bounds.lower.z = intel_sub_group_shuffle(aabb->lower.z, slotID);
287 bounds.lower.w = 0;
288 bounds.upper.x = intel_sub_group_shuffle(aabb->upper.x, slotID);
289 bounds.upper.y = intel_sub_group_shuffle(aabb->upper.y, slotID);
290 bounds.upper.z = intel_sub_group_shuffle(aabb->upper.z, slotID);
291 bounds.upper.w = 0;
292 return bounds;
293 }
294
AABB_sub_group_shuffle_coordPerLane(struct AABB * aabb,const uint slotID)295 GRL_INLINE uint AABB_sub_group_shuffle_coordPerLane(struct AABB* aabb, const uint slotID)
296 {
297 float coordData[8] = {
298 sub_group_broadcast(aabb->lower.x, slotID),
299 sub_group_broadcast(aabb->lower.y, slotID),
300 sub_group_broadcast(aabb->lower.z, slotID),
301 sub_group_broadcast(aabb->lower.w, slotID),
302 sub_group_broadcast(aabb->upper.x, slotID),
303 sub_group_broadcast(aabb->upper.y, slotID),
304 sub_group_broadcast(aabb->upper.z, slotID),
305 sub_group_broadcast(aabb->upper.w, slotID) };
306
307 uint coordDataFiltered;
308 const uint lane = get_sub_group_local_id();
309 if (lane < 8) coordDataFiltered = as_uint(coordData[lane]);
310 return coordDataFiltered;
311 }
312
AABB_sub_group_reduce(struct AABB * aabb)313 GRL_INLINE struct AABB AABB_sub_group_reduce(struct AABB *aabb)
314 {
315 struct AABB bounds;
316 bounds.lower.x = sub_group_reduce_min(aabb->lower.x);
317 bounds.lower.y = sub_group_reduce_min(aabb->lower.y);
318 bounds.lower.z = sub_group_reduce_min(aabb->lower.z);
319 bounds.lower.w = 0;
320 bounds.upper.x = sub_group_reduce_max(aabb->upper.x);
321 bounds.upper.y = sub_group_reduce_max(aabb->upper.y);
322 bounds.upper.z = sub_group_reduce_max(aabb->upper.z);
323 bounds.upper.w = 0;
324 return bounds;
325 }
326
327
AABB_sub_group_reduce_N6(struct AABB * aabb)328 GRL_INLINE struct AABB AABB_sub_group_reduce_N6( struct AABB* aabb )
329 {
330 float3 l = aabb->lower.xyz;
331 float3 u = aabb->upper.xyz;
332 l = min( l, intel_sub_group_shuffle_down( l, l, 4 ) );
333 l = min( l, intel_sub_group_shuffle_down( l, l, 2 ) );
334 l = min( l, intel_sub_group_shuffle_down( l, l, 1 ) );
335 u = max( u, intel_sub_group_shuffle_down( u, u, 4 ) );
336 u = max( u, intel_sub_group_shuffle_down( u, u, 2 ) );
337 u = max( u, intel_sub_group_shuffle_down( u, u, 1 ) );
338
339 struct AABB bounds;
340 bounds.lower.x = l.x;
341 bounds.lower.y = l.y;
342 bounds.lower.z = l.z;
343 bounds.lower.w = 0;
344 bounds.upper.x = u.x;
345 bounds.upper.y = u.y;
346 bounds.upper.z = u.z;
347 bounds.upper.w = 0;
348 return bounds;
349 }
350
351
AABB_work_group_reduce(struct AABB * aabb)352 GRL_INLINE struct AABB AABB_work_group_reduce(struct AABB *aabb)
353 {
354 struct AABB bounds;
355 bounds.lower.x = work_group_reduce_min(aabb->lower.x);
356 bounds.lower.y = work_group_reduce_min(aabb->lower.y);
357 bounds.lower.z = work_group_reduce_min(aabb->lower.z);
358 bounds.upper.x = work_group_reduce_max(aabb->upper.x);
359 bounds.upper.y = work_group_reduce_max(aabb->upper.y);
360 bounds.upper.z = work_group_reduce_max(aabb->upper.z);
361 return bounds;
362 }
363
AABB_sub_group_scan_exclusive_min_max(struct AABB * aabb)364 GRL_INLINE struct AABB AABB_sub_group_scan_exclusive_min_max(struct AABB *aabb)
365 {
366 struct AABB bounds;
367 bounds.lower.x = sub_group_scan_exclusive_min(aabb->lower.x);
368 bounds.lower.y = sub_group_scan_exclusive_min(aabb->lower.y);
369 bounds.lower.z = sub_group_scan_exclusive_min(aabb->lower.z);
370 bounds.lower.w = 0;
371 bounds.upper.x = sub_group_scan_exclusive_max(aabb->upper.x);
372 bounds.upper.y = sub_group_scan_exclusive_max(aabb->upper.y);
373 bounds.upper.z = sub_group_scan_exclusive_max(aabb->upper.z);
374 bounds.upper.w = 0;
375 return bounds;
376 }
377
AABB_sub_group_scan_inclusive_min_max(struct AABB * aabb)378 GRL_INLINE struct AABB AABB_sub_group_scan_inclusive_min_max(struct AABB *aabb)
379 {
380 struct AABB bounds;
381 bounds.lower.x = sub_group_scan_inclusive_min(aabb->lower.x);
382 bounds.lower.y = sub_group_scan_inclusive_min(aabb->lower.y);
383 bounds.lower.z = sub_group_scan_inclusive_min(aabb->lower.z);
384 bounds.lower.w = 0;
385 bounds.upper.x = sub_group_scan_inclusive_max(aabb->upper.x);
386 bounds.upper.y = sub_group_scan_inclusive_max(aabb->upper.y);
387 bounds.upper.z = sub_group_scan_inclusive_max(aabb->upper.z);
388 bounds.upper.w = 0;
389 return bounds;
390 }
391
AABB_global_atomic_merge(global struct AABB * global_aabb,struct AABB * aabb)392 GRL_INLINE void AABB_global_atomic_merge(global struct AABB *global_aabb, struct AABB *aabb)
393 {
394 atomic_min((volatile __global float *)&global_aabb->lower + 0, aabb->lower.x);
395 atomic_min((volatile __global float *)&global_aabb->lower + 1, aabb->lower.y);
396 atomic_min((volatile __global float *)&global_aabb->lower + 2, aabb->lower.z);
397 atomic_max((volatile __global float *)&global_aabb->upper + 0, aabb->upper.x);
398 atomic_max((volatile __global float *)&global_aabb->upper + 1, aabb->upper.y);
399 atomic_max((volatile __global float *)&global_aabb->upper + 2, aabb->upper.z);
400 }
401
AABB_global_atomic_merge_lu(global struct AABB * global_aabb,float3 lower,float3 upper)402 GRL_INLINE void AABB_global_atomic_merge_lu(global struct AABB* global_aabb, float3 lower, float3 upper )
403 {
404 atomic_min((volatile __global float*) & global_aabb->lower + 0, lower.x);
405 atomic_min((volatile __global float*) & global_aabb->lower + 1, lower.y);
406 atomic_min((volatile __global float*) & global_aabb->lower + 2, lower.z);
407 atomic_max((volatile __global float*) & global_aabb->upper + 0, upper.x);
408 atomic_max((volatile __global float*) & global_aabb->upper + 1, upper.y);
409 atomic_max((volatile __global float*) & global_aabb->upper + 2, upper.z);
410 }
411
AABB_global_atomic_merge_sub_group_lu(uniform global struct AABB * aabb,float3 lower,float3 upper)412 GRL_INLINE void AABB_global_atomic_merge_sub_group_lu(uniform global struct AABB* aabb, float3 lower, float3 upper)
413 {
414 uint lane = get_sub_group_local_id();
415 float l[3];
416 l[0] = sub_group_reduce_min(lower.x);
417 l[1] = sub_group_reduce_min(lower.y);
418 l[2] = sub_group_reduce_min(lower.z);
419 float u[3];
420 u[0] = sub_group_reduce_max(upper.x);
421 u[1] = sub_group_reduce_max(upper.y);
422 u[2] = sub_group_reduce_max(upper.z);
423
424 if (lane < 3)
425 {
426 atomic_min((global float*)&aabb->lower + lane, l[lane]);
427 atomic_max((global float*)&aabb->upper + lane, u[lane]);
428 }
429 }
430
431
AABB_local_atomic_merge(local struct AABB * aabb,const float4 lower,const float4 upper)432 GRL_INLINE void AABB_local_atomic_merge(local struct AABB *aabb, const float4 lower, const float4 upper)
433 {
434 if (lower.x < aabb->lower.x)
435 atomic_min((local float *)&aabb->lower + 0, lower.x);
436 if (lower.y < aabb->lower.y)
437 atomic_min((local float *)&aabb->lower + 1, lower.y);
438 if (lower.z < aabb->lower.z)
439 atomic_min((local float *)&aabb->lower + 2, lower.z);
440 if (upper.x > aabb->upper.x)
441 atomic_max((local float *)&aabb->upper + 0, upper.x);
442 if (upper.y > aabb->upper.y)
443 atomic_max((local float *)&aabb->upper + 1, upper.y);
444 if (upper.z > aabb->upper.z)
445 atomic_max((local float *)&aabb->upper + 2, upper.z);
446 }
447 #endif
448
449 GRL_NAMESPACE_END(RTAS)
450 GRL_NAMESPACE_END(GRL)