xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/AABB.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 #pragma once
9 
10 #include "shared.h"
11 #include "intrinsics.h"
12 #ifndef __OPENCL_VERSION__
13 #include "stdio.h"
14 #endif
15 
16 GRL_NAMESPACE_BEGIN(GRL)
17 GRL_NAMESPACE_BEGIN(RTAS)
18 /* ====== QUAD ENCODING config ====== */
19 
20 #define QUAD_GEOMID_BITS 27 // dxr limit is 2^24 geos... we have headroom
21 #define QUAD_PRIMID_DIFF_BITS (32 - QUAD_GEOMID_BITS)
22 #define QUAD_GEOMID_MASK      ((1<<QUAD_GEOMID_BITS)-1)
23 
24 #define QUAD_PRIMID_BITS 29 // dxr limit is 2^29 prims total within one blas
25 #define QUAD_PRIMID_MASK  ((1<<QUAD_PRIMID_BITS)-1)
26 
27 #define INSTANCE_ID_BITS 24
28 #define INSTANCE_ID_MASK ((1<<INSTANCE_ID_BITS)-1)
29 
30 // JDB TODO:  Make this a separate, dedicated structure..  Aliasing a float4 AABB as a primref is needlessly obfuscated
31 
32 typedef struct AABB PrimRef;
33 
AABB_init(struct AABB * aabb)34 GRL_INLINE void AABB_init(struct AABB *aabb)
35 {
36     aabb->lower = (float4)(INFINITY, INFINITY, INFINITY, 0);
37     aabb->upper = -(float4)(INFINITY, INFINITY, INFINITY, 0);
38 }
39 
PRIMREF_geomID(PrimRef * aabb)40 GRL_INLINE uint PRIMREF_geomID( PrimRef* aabb)
41 {
42     const uint v = as_uint(aabb->lower.w);
43     return v & QUAD_GEOMID_MASK;
44 }
45 
PRIMREF_primID0(PrimRef * aabb)46 GRL_INLINE uint PRIMREF_primID0( PrimRef* aabb)
47 {
48     return as_uint( aabb->upper.w ) & QUAD_PRIMID_MASK;
49 }
50 
PRIMREF_primID1(PrimRef * aabb)51 GRL_INLINE uint PRIMREF_primID1( PrimRef* aabb)
52 {
53     const uint v = as_uint(aabb->lower.w);
54     const uint primID0 = as_uint(aabb->upper.w) & QUAD_PRIMID_MASK;
55     const uint deltaID = v >> QUAD_GEOMID_BITS;
56     const uint primID1 = primID0 + deltaID;
57     return primID1;
58 }
59 
PRIMREF_geomFlags(PrimRef * aabb)60 GRL_INLINE uint PRIMREF_geomFlags( PrimRef* aabb )
61 {
62     const uint v = as_uint( aabb->upper.w );
63     return (v >> QUAD_PRIMID_BITS) ;
64 }
65 
PRIMREF_instanceIndex(PrimRef * aabb)66 GRL_INLINE uint PRIMREF_instanceIndex( PrimRef* aabb )
67 {
68     return as_uint(aabb->lower.w) & INSTANCE_ID_MASK;
69 }
70 
PRIMREF_instanceMask(PrimRef * aabb)71 GRL_INLINE uchar PRIMREF_instanceMask( PrimRef* aabb )
72 {
73     return as_uint(aabb->lower.w) >> INSTANCE_ID_BITS;
74 }
75 
PRIMREF_setProceduralMetaData(PrimRef * primref,uint geomID,uint primID,uint geomFlags)76 GRL_INLINE void PRIMREF_setProceduralMetaData( PrimRef* primref, uint geomID, uint primID, uint geomFlags )
77 {
78     /* encode geomID, primID */
79     uint flags = (geomFlags << QUAD_PRIMID_BITS);
80     primref->lower.w = as_float( geomID );
81     primref->upper.w = as_float( primID | flags );
82 }
83 
PRIMREF_setQuadMetaData(PrimRef * primref,uint primID0,uint primID1,uint geomID,uint geomFlags)84 GRL_INLINE void PRIMREF_setQuadMetaData( PrimRef* primref, uint primID0, uint primID1, uint geomID, uint geomFlags )
85 {
86     const uint primID_diff = primID1 - primID0;
87     uint flags = geomFlags << QUAD_PRIMID_BITS;
88 
89     primref->lower.w = as_float( geomID | (primID_diff << QUAD_GEOMID_BITS) );
90     primref->upper.w = as_float( (primID0 | flags) );
91 }
92 
PRIMREF_setAABB(PrimRef * primref,float3 lower,float3 upper)93 GRL_INLINE void PRIMREF_setAABB( PrimRef* primref, float3 lower, float3 upper )
94 {
95     primref->lower.xyz = lower.xyz;
96     primref->upper.xyz = upper.xyz;
97 }
98 
PRIMREF_set_instance(float3 lower,float3 upper,uint instanceIndex,uint instanceMask,uint rootOffset,bool is_procedural)99 GRL_INLINE PrimRef PRIMREF_set_instance( float3 lower, float3 upper, uint instanceIndex, uint instanceMask, uint rootOffset, bool is_procedural )
100 {
101     PrimRef new_ref;
102     new_ref.lower.xyz = lower;
103     new_ref.lower.w = as_float(instanceIndex | (instanceMask << 24));
104     new_ref.upper.xyz = upper;
105     new_ref.upper.w = as_float(rootOffset + (is_procedural? 0x80000000 : 0));
106     return new_ref;
107 }
108 
PRIMREF_isProceduralInstance(PrimRef * primref)109 GRL_INLINE bool PRIMREF_isProceduralInstance( PrimRef* primref )
110 {
111     return (as_uint(primref->upper.w) & 0x80000000) != 0;
112 }
113 
PRIMREF_instanceRootNodeOffset(PrimRef * primref)114 GRL_INLINE uint PRIMREF_instanceRootNodeOffset(PrimRef* primref)
115 {
116     return (as_uint(primref->upper.w) & 0x7fffffff);
117 }
118 
PRIMREF_lower(PrimRef * primref)119 GRL_INLINE float3 PRIMREF_lower( PrimRef* primref )
120 {
121     return primref->lower.xyz;
122 }
PRIMREF_upper(PrimRef * primref)123 GRL_INLINE float3 PRIMREF_upper( PrimRef* primref )
124 {
125     return primref->upper.xyz;
126 }
127 
AABB_extend(struct AABB * aabb,struct AABB * v)128 GRL_INLINE void AABB_extend(struct AABB *aabb, struct AABB *v)
129 {
130     aabb->lower = min(aabb->lower, v->lower);
131     aabb->upper = max(aabb->upper, v->upper);
132 }
133 
AABB_extend_point(struct AABB * aabb,const float4 p)134 GRL_INLINE void AABB_extend_point(struct AABB *aabb, const float4 p)
135 {
136     aabb->lower = min(aabb->lower, p);
137     aabb->upper = max(aabb->upper, p);
138 }
139 
AABB_extendlu(struct AABB * aabb,const float4 lower,const float4 upper)140 GRL_INLINE void AABB_extendlu(struct AABB *aabb, const float4 lower, const float4 upper)
141 {
142     aabb->lower = min(aabb->lower, lower);
143     aabb->upper = max(aabb->upper, upper);
144 }
145 
AABB_enlarge(struct AABB * aabb,const float v)146 GRL_INLINE struct AABB AABB_enlarge(struct AABB *aabb, const float v)
147 {
148     struct AABB box;
149     box.lower = aabb->lower - (float4)v;
150     box.upper = aabb->upper + (float4)v;
151     return box;
152 }
153 
AABB_intersect(struct AABB * aabb,struct AABB * v)154 GRL_INLINE void AABB_intersect(struct AABB *aabb, struct AABB *v)
155 {
156     aabb->lower = max(aabb->lower, v->lower);
157     aabb->upper = min(aabb->upper, v->upper);
158 }
159 
AABB_size(struct AABB * aabb)160 GRL_INLINE float4 AABB_size(struct AABB *aabb)
161 {
162     return aabb->upper - aabb->lower;
163 }
164 
AABB_centroid2(struct AABB * aabb)165 GRL_INLINE float4 AABB_centroid2(struct AABB *aabb)
166 {
167     return aabb->lower + aabb->upper;
168 }
169 
AABB_halfArea(struct AABB * aabb)170 GRL_INLINE float AABB_halfArea(struct AABB *aabb)
171 {
172     const float4 d = AABB_size(aabb);
173     return halfarea(d.xyz);
174 }
175 
AABB_intersecion_size(struct AABB * aabb,struct AABB * v)176 GRL_INLINE float AABB_intersecion_size(struct AABB* aabb, struct AABB* v)
177 {
178     struct AABB temp = *aabb;
179     AABB_intersect(&temp, v);
180     float4 len = AABB_size(&temp);
181     float ret = 0.0f;
182     if (len.x >= 0.0f && len.y >= 0.0f && len.z >= 0.0f) {
183         float3 v = { len.x, len.y, len.z };
184         ret = halfarea(v);
185     }
186     return ret;
187 }
188 
AABB_subset(struct AABB * small,struct AABB * big)189 GRL_INLINE bool AABB_subset(struct AABB* small, struct AABB* big)
190 {
191     const int4 b0 = small->lower >= big->lower;
192     const int4 b1 = small->upper <= big->upper;
193     const int4 b = b0 & b1;
194     return b.x & b.y & b.z;
195 }
196 
AABBfromAABB3f(const struct AABB3f box)197 GRL_INLINE struct AABB AABBfromAABB3f(const struct AABB3f box)
198 {
199     struct AABB box4d = {
200         {box.lower[0], box.lower[1], box.lower[2], 0.0f},
201         {box.upper[0], box.upper[1], box.upper[2], 0.0f}
202     };
203     return box4d;
204 }
205 
AABB3fFromAABB(const struct AABB box)206 GRL_INLINE struct AABB3f AABB3fFromAABB(const struct AABB box)
207 {
208     struct AABB3f box3d = {
209         {box.lower[0], box.lower[1], box.lower[2]},
210         {box.upper[0], box.upper[1], box.upper[2]}
211     };
212     return box3d;
213 }
214 
AABB_verify(struct AABB * aabb)215 GRL_INLINE bool AABB_verify(struct AABB* aabb)
216 {
217     bool error = false;
218     if (aabb->lower.x > aabb->upper.x)
219         error = true;
220     if (aabb->lower.y > aabb->upper.y)
221         error = true;
222     if (aabb->lower.z > aabb->upper.z)
223         error = true;
224     if (!isfinite(aabb->lower.x))
225         error = true;
226     if (!isfinite(aabb->lower.y))
227         error = true;
228     if (!isfinite(aabb->lower.z))
229         error = true;
230     if (!isfinite(aabb->upper.x))
231         error = true;
232     if (!isfinite(aabb->upper.y))
233         error = true;
234     if (!isfinite(aabb->upper.z))
235         error = true;
236     return error;
237 }
238 
AABB_print(struct AABB * aabb)239 GRL_INLINE void AABB_print(struct AABB* aabb)
240 {
241     printf("AABB {\n  area = %f\n  lower = %f\n  upper = %f\n  geomID = %i  primID0 = %i  primID1 = %i\n  aabb->lower.w = %x  aabb->upper.w = %x }\n",
242         AABB_halfArea(aabb),
243         aabb->lower.xyz,
244         aabb->upper.xyz,
245         PRIMREF_geomID(aabb),
246         PRIMREF_primID0(aabb),
247         PRIMREF_primID1(aabb),
248         as_uint(aabb->lower.w),
249         as_uint(aabb->upper.w));
250 }
251 
252 #ifdef __OPENCL_VERSION__
253 
PrimRef_sub_group_shuffle(PrimRef * primRef,const uint slotID)254 GRL_INLINE PrimRef PrimRef_sub_group_shuffle(PrimRef* primRef, const uint slotID)
255 {
256     PrimRef shuffledPrimref;
257     shuffledPrimref.lower.x = intel_sub_group_shuffle(primRef->lower.x, slotID);
258     shuffledPrimref.lower.y = intel_sub_group_shuffle(primRef->lower.y, slotID);
259     shuffledPrimref.lower.z = intel_sub_group_shuffle(primRef->lower.z, slotID);
260     shuffledPrimref.lower.w = intel_sub_group_shuffle(primRef->lower.w, slotID);
261     shuffledPrimref.upper.x = intel_sub_group_shuffle(primRef->upper.x, slotID);
262     shuffledPrimref.upper.y = intel_sub_group_shuffle(primRef->upper.y, slotID);
263     shuffledPrimref.upper.z = intel_sub_group_shuffle(primRef->upper.z, slotID);
264     shuffledPrimref.upper.w = intel_sub_group_shuffle(primRef->upper.w, slotID);
265     return shuffledPrimref;
266 }
267 
AABB_sub_group_broadcast(struct AABB * aabb,const uint slotID)268 GRL_INLINE struct AABB AABB_sub_group_broadcast(struct AABB *aabb, const uint slotID)
269 {
270     struct AABB bounds;
271     bounds.lower.x = sub_group_broadcast(aabb->lower.x, slotID);
272     bounds.lower.y = sub_group_broadcast(aabb->lower.y, slotID);
273     bounds.lower.z = sub_group_broadcast(aabb->lower.z, slotID);
274     bounds.lower.w = 0;
275     bounds.upper.x = sub_group_broadcast(aabb->upper.x, slotID);
276     bounds.upper.y = sub_group_broadcast(aabb->upper.y, slotID);
277     bounds.upper.z = sub_group_broadcast(aabb->upper.z, slotID);
278     bounds.upper.w = 0;
279     return bounds;
280 }
AABB_sub_group_shuffle(struct AABB * aabb,const uint slotID)281 GRL_INLINE struct AABB AABB_sub_group_shuffle(struct AABB* aabb, const uint slotID)
282 {
283     struct AABB bounds;
284     bounds.lower.x = intel_sub_group_shuffle(aabb->lower.x, slotID);
285     bounds.lower.y = intel_sub_group_shuffle(aabb->lower.y, slotID);
286     bounds.lower.z = intel_sub_group_shuffle(aabb->lower.z, slotID);
287     bounds.lower.w = 0;
288     bounds.upper.x = intel_sub_group_shuffle(aabb->upper.x, slotID);
289     bounds.upper.y = intel_sub_group_shuffle(aabb->upper.y, slotID);
290     bounds.upper.z = intel_sub_group_shuffle(aabb->upper.z, slotID);
291     bounds.upper.w = 0;
292     return bounds;
293 }
294 
AABB_sub_group_shuffle_coordPerLane(struct AABB * aabb,const uint slotID)295 GRL_INLINE uint AABB_sub_group_shuffle_coordPerLane(struct AABB* aabb, const uint slotID)
296 {
297     float coordData[8] = {
298         sub_group_broadcast(aabb->lower.x, slotID),
299         sub_group_broadcast(aabb->lower.y, slotID),
300         sub_group_broadcast(aabb->lower.z, slotID),
301         sub_group_broadcast(aabb->lower.w, slotID),
302         sub_group_broadcast(aabb->upper.x, slotID),
303         sub_group_broadcast(aabb->upper.y, slotID),
304         sub_group_broadcast(aabb->upper.z, slotID),
305         sub_group_broadcast(aabb->upper.w, slotID) };
306 
307     uint coordDataFiltered;
308     const uint lane = get_sub_group_local_id();
309     if (lane < 8) coordDataFiltered = as_uint(coordData[lane]);
310     return coordDataFiltered;
311 }
312 
AABB_sub_group_reduce(struct AABB * aabb)313 GRL_INLINE struct AABB AABB_sub_group_reduce(struct AABB *aabb)
314 {
315     struct AABB bounds;
316     bounds.lower.x = sub_group_reduce_min(aabb->lower.x);
317     bounds.lower.y = sub_group_reduce_min(aabb->lower.y);
318     bounds.lower.z = sub_group_reduce_min(aabb->lower.z);
319     bounds.lower.w = 0;
320     bounds.upper.x = sub_group_reduce_max(aabb->upper.x);
321     bounds.upper.y = sub_group_reduce_max(aabb->upper.y);
322     bounds.upper.z = sub_group_reduce_max(aabb->upper.z);
323     bounds.upper.w = 0;
324     return bounds;
325 }
326 
327 
AABB_sub_group_reduce_N6(struct AABB * aabb)328 GRL_INLINE struct AABB AABB_sub_group_reduce_N6( struct AABB* aabb )
329 {
330     float3 l = aabb->lower.xyz;
331     float3 u = aabb->upper.xyz;
332     l = min( l, intel_sub_group_shuffle_down( l, l, 4 ) );
333     l = min( l, intel_sub_group_shuffle_down( l, l, 2 ) );
334     l = min( l, intel_sub_group_shuffle_down( l, l, 1 ) );
335     u = max( u, intel_sub_group_shuffle_down( u, u, 4 ) );
336     u = max( u, intel_sub_group_shuffle_down( u, u, 2 ) );
337     u = max( u, intel_sub_group_shuffle_down( u, u, 1 ) );
338 
339     struct AABB bounds;
340     bounds.lower.x = l.x;
341     bounds.lower.y = l.y;
342     bounds.lower.z = l.z;
343     bounds.lower.w = 0;
344     bounds.upper.x = u.x;
345     bounds.upper.y = u.y;
346     bounds.upper.z = u.z;
347     bounds.upper.w = 0;
348     return bounds;
349 }
350 
351 
AABB_work_group_reduce(struct AABB * aabb)352 GRL_INLINE struct AABB AABB_work_group_reduce(struct AABB *aabb)
353 {
354     struct AABB bounds;
355     bounds.lower.x = work_group_reduce_min(aabb->lower.x);
356     bounds.lower.y = work_group_reduce_min(aabb->lower.y);
357     bounds.lower.z = work_group_reduce_min(aabb->lower.z);
358     bounds.upper.x = work_group_reduce_max(aabb->upper.x);
359     bounds.upper.y = work_group_reduce_max(aabb->upper.y);
360     bounds.upper.z = work_group_reduce_max(aabb->upper.z);
361     return bounds;
362 }
363 
AABB_sub_group_scan_exclusive_min_max(struct AABB * aabb)364 GRL_INLINE struct AABB AABB_sub_group_scan_exclusive_min_max(struct AABB *aabb)
365 {
366     struct AABB bounds;
367     bounds.lower.x = sub_group_scan_exclusive_min(aabb->lower.x);
368     bounds.lower.y = sub_group_scan_exclusive_min(aabb->lower.y);
369     bounds.lower.z = sub_group_scan_exclusive_min(aabb->lower.z);
370     bounds.lower.w = 0;
371     bounds.upper.x = sub_group_scan_exclusive_max(aabb->upper.x);
372     bounds.upper.y = sub_group_scan_exclusive_max(aabb->upper.y);
373     bounds.upper.z = sub_group_scan_exclusive_max(aabb->upper.z);
374     bounds.upper.w = 0;
375     return bounds;
376 }
377 
AABB_sub_group_scan_inclusive_min_max(struct AABB * aabb)378 GRL_INLINE struct AABB AABB_sub_group_scan_inclusive_min_max(struct AABB *aabb)
379 {
380     struct AABB bounds;
381     bounds.lower.x = sub_group_scan_inclusive_min(aabb->lower.x);
382     bounds.lower.y = sub_group_scan_inclusive_min(aabb->lower.y);
383     bounds.lower.z = sub_group_scan_inclusive_min(aabb->lower.z);
384     bounds.lower.w = 0;
385     bounds.upper.x = sub_group_scan_inclusive_max(aabb->upper.x);
386     bounds.upper.y = sub_group_scan_inclusive_max(aabb->upper.y);
387     bounds.upper.z = sub_group_scan_inclusive_max(aabb->upper.z);
388     bounds.upper.w = 0;
389     return bounds;
390 }
391 
AABB_global_atomic_merge(global struct AABB * global_aabb,struct AABB * aabb)392 GRL_INLINE void AABB_global_atomic_merge(global struct AABB *global_aabb, struct AABB *aabb)
393 {
394     atomic_min((volatile __global float *)&global_aabb->lower + 0, aabb->lower.x);
395     atomic_min((volatile __global float *)&global_aabb->lower + 1, aabb->lower.y);
396     atomic_min((volatile __global float *)&global_aabb->lower + 2, aabb->lower.z);
397     atomic_max((volatile __global float *)&global_aabb->upper + 0, aabb->upper.x);
398     atomic_max((volatile __global float *)&global_aabb->upper + 1, aabb->upper.y);
399     atomic_max((volatile __global float *)&global_aabb->upper + 2, aabb->upper.z);
400 }
401 
AABB_global_atomic_merge_lu(global struct AABB * global_aabb,float3 lower,float3 upper)402 GRL_INLINE void AABB_global_atomic_merge_lu(global struct AABB* global_aabb, float3 lower, float3 upper )
403 {
404     atomic_min((volatile __global float*) & global_aabb->lower + 0, lower.x);
405     atomic_min((volatile __global float*) & global_aabb->lower + 1, lower.y);
406     atomic_min((volatile __global float*) & global_aabb->lower + 2, lower.z);
407     atomic_max((volatile __global float*) & global_aabb->upper + 0, upper.x);
408     atomic_max((volatile __global float*) & global_aabb->upper + 1, upper.y);
409     atomic_max((volatile __global float*) & global_aabb->upper + 2, upper.z);
410 }
411 
AABB_global_atomic_merge_sub_group_lu(uniform global struct AABB * aabb,float3 lower,float3 upper)412 GRL_INLINE void AABB_global_atomic_merge_sub_group_lu(uniform global struct AABB* aabb, float3 lower, float3 upper)
413 {
414     uint lane = get_sub_group_local_id();
415     float l[3];
416     l[0] = sub_group_reduce_min(lower.x);
417     l[1] = sub_group_reduce_min(lower.y);
418     l[2] = sub_group_reduce_min(lower.z);
419     float u[3];
420     u[0] = sub_group_reduce_max(upper.x);
421     u[1] = sub_group_reduce_max(upper.y);
422     u[2] = sub_group_reduce_max(upper.z);
423 
424     if (lane < 3)
425     {
426         atomic_min((global float*)&aabb->lower + lane, l[lane]);
427         atomic_max((global float*)&aabb->upper + lane, u[lane]);
428     }
429 }
430 
431 
AABB_local_atomic_merge(local struct AABB * aabb,const float4 lower,const float4 upper)432 GRL_INLINE void AABB_local_atomic_merge(local struct AABB *aabb, const float4 lower, const float4 upper)
433 {
434     if (lower.x < aabb->lower.x)
435         atomic_min((local float *)&aabb->lower + 0, lower.x);
436     if (lower.y < aabb->lower.y)
437         atomic_min((local float *)&aabb->lower + 1, lower.y);
438     if (lower.z < aabb->lower.z)
439         atomic_min((local float *)&aabb->lower + 2, lower.z);
440     if (upper.x > aabb->upper.x)
441         atomic_max((local float *)&aabb->upper + 0, upper.x);
442     if (upper.y > aabb->upper.y)
443         atomic_max((local float *)&aabb->upper + 1, upper.y);
444     if (upper.z > aabb->upper.z)
445         atomic_max((local float *)&aabb->upper + 2, upper.z);
446 }
447 #endif
448 
449 GRL_NAMESPACE_END(RTAS)
450 GRL_NAMESPACE_END(GRL)