1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7
8 #pragma once
9
10 #include "shared.h"
11 #include "intrinsics.h"
12 #include "AABB.h"
13 #include "AABB3f.h"
14 #include "qbvh6.h"
15
16 /* ====== BVH_BUILDER config ====== */
17
18 __constant const float cfg_intCost = 4.0f;
19 __constant const float cfg_travCost = 1.0f;
20 __constant const uint cfg_minLeafSize = BVH_LEAF_N_MIN;
21 __constant const uint cfg_maxLeafSize = BVH_LEAF_N_MAX;
22 __constant const uint cfg_maxDepth = BUILDRECORD_STACK_SIZE;
23
24 #define ENABLE_CONVERSION_CHECKS 0
25
26 #ifdef ENABLE_BIG_REG_ANNOTATION
27 #define GRL_ANNOTATE_BIG_REG_REQ __attribute__((annotate("num-thread-per-eu 4")))
28 #else
29 #define GRL_ANNOTATE_BIG_REG_REQ
30 #endif
31
32 #ifdef ENABLE_IGC_DO_NOT_SPILL
33 #define GRL_ANNOTATE_IGC_DO_NOT_SPILL __attribute__((annotate("igc-do-not-spill")))
34 #else
35 #define GRL_ANNOTATE_IGC_DO_NOT_SPILL
36 #endif
37
38 #define ERROR()
39
40 /* =================================================================================================================================================== */
41 /* =================================================================================================================================================== */
42 /* =================================================================================================================================================== */
43 /* =================================================================================================================================================== */
44
getNumLeafPrims(unsigned int offset)45 GRL_INLINE unsigned int getNumLeafPrims(unsigned int offset)
46 {
47 return (offset & 0x7) - 3;
48 }
49
getLeafOffset(unsigned int offset)50 GRL_INLINE unsigned int getLeafOffset(unsigned int offset)
51 {
52 return offset & (~0x7);
53 }
54
triangleNormal(const float4 v0,const float4 v1,const float4 v2)55 GRL_INLINE float4 triangleNormal(const float4 v0, const float4 v1, const float4 v2)
56 {
57 const float4 a = v1 - v0;
58 const float4 b = v2 - v0;
59 return cross(a, b);
60 }
61
areaTriangle(const float4 v0,const float4 v1,const float4 v2)62 GRL_INLINE float areaTriangle(const float4 v0, const float4 v1, const float4 v2)
63 {
64 const float4 normal = triangleNormal(v0, v1, v2);
65 return length((float3)(normal.x, normal.y, normal.z)) * 0.5f;
66 }
67
det2(const float2 a,const float2 b)68 GRL_INLINE float det2(const float2 a, const float2 b)
69 {
70 return a.x * b.y - a.y * b.x;
71 }
72
areaProjectedTriangle(const float4 v0,const float4 v1,const float4 v2)73 GRL_INLINE float areaProjectedTriangle(const float4 v0, const float4 v1, const float4 v2)
74 {
75 const float xy = 0.5f * fabs(det2(v1.xy - v0.xy, v2.xy - v0.xy));
76 const float yz = 0.5f * fabs(det2(v1.yz - v0.yz, v2.yz - v0.yz));
77 const float zx = 0.5f * fabs(det2(v1.zx - v0.zx, v2.zx - v0.zx));
78 return xy + yz + zx;
79 }
80
81 typedef struct Block64B {
82 char data[64];
83 } Block64B __attribute__((aligned(64)));
84
85 typedef char byte_align64B __attribute__((aligned(64)));
86
87 /* ====================================================================== */
88 /* ============================== GLOBALS =============================== */
89 /* ====================================================================== */
90
Globals_OnFinish(global struct Globals * globals)91 GRL_INLINE bool Globals_OnFinish(global struct Globals *globals)
92 {
93 /* last active HW thread ? */
94 if (get_local_id(0) == 0)
95 {
96 const uint sync = atomic_add(&globals->sync, 1);
97 if (sync + 1 == get_num_groups(0))
98 {
99 globals->sync = 0;
100 return true;
101 }
102 }
103 return false;
104 }
105
BlockAllocator_BytesUsed(struct BlockAllocator * p)106 GRL_INLINE uint BlockAllocator_BytesUsed(struct BlockAllocator *p)
107 {
108 return p->cur - p->start;
109 };
110
BlockAllocator_Alloc(__global struct BlockAllocator * p,const uint size)111 GRL_INLINE uint BlockAllocator_Alloc(__global struct BlockAllocator *p, const uint size)
112 {
113 return atomic_add(&p->cur, size);
114 }
115
BlockAllocator_Alloc_Single(__global struct BlockAllocator * p,const uint size)116 GRL_INLINE uint BlockAllocator_Alloc_Single(__global struct BlockAllocator *p, const uint size)
117 {
118 uint offset = 0;
119 if (get_sub_group_local_id() == 0)
120 offset = atomic_add(&p->cur, size);
121 return sub_group_broadcast(offset, 0);
122 }
123
124 // node allocation returns an offset from beginning of BVH to allocated node
125 // in multiples of 64B
allocate_inner_nodes(global struct BVHBase * base,uint num_nodes)126 GRL_INLINE uint allocate_inner_nodes(global struct BVHBase* base, uint num_nodes )
127 {
128 return atomic_add_global( &base->nodeDataCur, num_nodes );
129 }
allocate_procedural_leaves(global struct BVHBase * base,uint num_nodes)130 GRL_INLINE uint allocate_procedural_leaves(global struct BVHBase* base, uint num_nodes)
131 {
132 return atomic_add_global(&base->proceduralDataCur, num_nodes);
133 }
134
allocate_quad_leaves(global struct BVHBase * base,uint num_nodes)135 GRL_INLINE uint allocate_quad_leaves(global struct BVHBase* base, uint num_nodes)
136 {
137 return atomic_add_global(&base->quadLeafCur, num_nodes);
138 }
139
140 #if 0
141 GRL_INLINE uint alloc_node_mem(global struct Globals *globals, const uint size)
142 {
143 const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
144 return BlockAllocator_Alloc(&globals->node_mem_allocator, aligned_size);
145 }
146
147 GRL_INLINE uint alloc_single_node_mem(global struct Globals *globals, const uint size)
148 {
149 const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
150 return BlockAllocator_Alloc_Single(&globals->node_mem_allocator, aligned_size);
151 }
152
153 GRL_INLINE uint alloc_quad_leaf_mem(global struct Globals *globals, const uint size)
154 {
155 const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
156 return BlockAllocator_Alloc(&globals->quad_mem_allocator, aligned_size);
157 }
158
159 GRL_INLINE uint alloc_procedural_leaf_mem(global struct Globals *globals, const uint size)
160 {
161 const uint aligned_size = ((size + 63) / 64) * 64; /* allocate in 64 bytes blocks */
162 return BlockAllocator_Alloc(&globals->procedural_mem_allocator, aligned_size);
163 }
164 #endif
165
getBuildRecords(char * bvh_mem,struct Globals * globals)166 GRL_INLINE global struct BuildRecord *getBuildRecords(char *bvh_mem, struct Globals *globals)
167 {
168 return (global struct BuildRecord *)(bvh_mem + globals->build_record_start);
169 }
170
171 /* ======================================================================= */
172 /* ============================== TRIANGLE =============================== */
173 /* ======================================================================= */
174
175 /*GRL_INLINE void printTriangle(struct Triangle *t)
176 {
177 printf("vtx[0] %d vtx[1] %d vtx[2] %d primID %d geomID %d \n",t->vtx[0],t->vtx[1],t->vtx[2],t->primID,t->geomID);
178 }*/
179
180 /* ==================================================================== */
181 /* ============================== SPLIT =============================== */
182 /* ==================================================================== */
183
printSplit(struct Split * split)184 GRL_INLINE void printSplit(struct Split *split)
185 {
186 printf("split sah %f dim %d pos %d \n", split->sah, split->dim, split->pos);
187 }
188
189 /* ========================================================================== */
190 /* ============================== BUILDRECORD =============================== */
191 /* ========================================================================== */
192
initBuildRecord(struct BuildRecord * buildRecord,uint start,uint end)193 GRL_INLINE void initBuildRecord(struct BuildRecord *buildRecord, uint start, uint end)
194 {
195 AABB_init(&buildRecord->centroidBounds);
196 buildRecord->start = start;
197 buildRecord->end = end;
198 }
199
extendBuildRecord(struct BuildRecord * buildRecord,struct AABB * primref)200 GRL_INLINE void extendBuildRecord(struct BuildRecord *buildRecord, struct AABB *primref)
201 {
202 AABB_extend_point(&buildRecord->centroidBounds, AABB_centroid2(primref));
203 }
204
getBuildRecursionDepth(struct BuildRecord * buildRecord)205 GRL_INLINE uint getBuildRecursionDepth(struct BuildRecord *buildRecord)
206 {
207 return as_uint(buildRecord->centroidBounds.upper.w);
208 }
209
setBuildRecursionDepth(struct BuildRecord * buildRecord,uint depth)210 GRL_INLINE void setBuildRecursionDepth(struct BuildRecord *buildRecord, uint depth)
211 {
212 buildRecord->centroidBounds.upper.w = as_float(depth);
213 }
214
getNumPrimsBuildRecord(struct BuildRecord * buildRecord)215 GRL_INLINE uint getNumPrimsBuildRecord(struct BuildRecord *buildRecord)
216 {
217 return buildRecord->end - buildRecord->start;
218 }
219
220 /* ========================================================================== */
221 /* =================== BinaryMortonCodeHierarchy ============================= */
222 /* ========================================================================== */
223
BinaryMortonCodeHierarchy_init(struct BinaryMortonCodeHierarchy * record,uint start,uint end)224 GRL_INLINE void BinaryMortonCodeHierarchy_init(struct BinaryMortonCodeHierarchy *record, uint start, uint end)
225 {
226 record->range.start = start;
227 record->range.end = end;
228 record->leftChild = -1;
229 record->rightChild = -1;
230 // record->flag = 0;
231 }
232
BinaryMortonCodeHierarchy_getNumPrimitives(global struct BinaryMortonCodeHierarchy * nodes,uint nodeID)233 GRL_INLINE uint BinaryMortonCodeHierarchy_getNumPrimitives(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
234 {
235 /* leaf case */
236 if (nodeID & (uint)(1 << 31))
237 return 1;
238
239 /* inner node case*/
240 else
241 return nodes[nodeID].range.end - nodes[nodeID].range.start + 1;
242 }
243
BinaryMortonCodeHierarchy_getEntry(global struct BinaryMortonCodeHierarchy * nodes,uint nodeID)244 GRL_INLINE struct BinaryMortonCodeHierarchy BinaryMortonCodeHierarchy_getEntry(global struct BinaryMortonCodeHierarchy* nodes, uint nodeID)
245 {
246 struct BinaryMortonCodeHierarchy entry;
247
248 if (nodeID & (uint)(1 << 31)) {
249 /* leaf case */
250 uint rangeStart = nodeID ^ (uint)(1 << 31);
251 BinaryMortonCodeHierarchy_init(&entry, rangeStart, rangeStart);
252 }
253 else {
254 /* inner node case*/
255 entry = nodes[nodeID];
256 }
257
258 return entry;
259 }
260
BinaryMortonCodeHierarchy_getRangeStart(global struct BinaryMortonCodeHierarchy * nodes,uint nodeID)261 GRL_INLINE uint BinaryMortonCodeHierarchy_getRangeStart(global struct BinaryMortonCodeHierarchy *nodes, uint nodeID)
262 {
263 /* leaf case */
264 if (nodeID & (uint)(1 << 31))
265 return nodeID ^ (uint)(1 << 31);
266
267 /* inner node case*/
268 else
269 return nodes[nodeID].range.start;
270 }
271
272 /* ==================================================================== */
273 /* ============================== RANGE =============================== */
274 /* ==================================================================== */
275
printRange(struct Range * range)276 GRL_INLINE void printRange(struct Range *range)
277 {
278 printf("start %d end %d \n", range->start, range->end);
279 }
280
equalRange(struct Range * range0,struct Range * range1)281 GRL_INLINE bool equalRange(struct Range *range0, struct Range *range1)
282 {
283 if (range0->start == range1->start &&
284 range0->end == range1->end)
285 return true;
286 return false;
287 }
288
getSizeRange(struct Range * range)289 GRL_INLINE uint getSizeRange(struct Range *range)
290 {
291 return range->end - range->start;
292 }
293
294 /* ==================================================================== */
295 /* ========================= ProceduralLeaf =========================== */
296 /* ==================================================================== */
297
298 #if 0
299 struct ProceduralLeaf
300 {
301 uint shaderIndex_geomMask;
302 uint geomIndex_flags;
303 uint N_last;
304 uint primIndex[13];
305 };
306 #endif
307
ProceduralLeaf_geomIndex(global struct ProceduralLeaf * This)308 GRL_INLINE uint ProceduralLeaf_geomIndex(global struct ProceduralLeaf *This)
309 {
310 return This->leafDesc.geomIndex_flags & 0x1FFFFFFF;
311 }
312
ProceduralLeaf_primIndex(global struct ProceduralLeaf * This,uint i)313 GRL_INLINE uint ProceduralLeaf_primIndex(global struct ProceduralLeaf *This, uint i)
314 {
315 //assert(i < N);
316 return This->_primIndex[i];
317 }
318
319 /* ==================================================================== */
320 /* =========================== TrianglePair =========================== */
321 /* ==================================================================== */
322
323 struct TrianglePair
324 {
325 uint4 a; // indices of the 4 verts to store in the quad
326 uint3 lb; // index of the second triangle's verts in 'a'
327 };
328
TrianglePair_Constructor(uint3 tri0,uint primID0,uint3 tri1,uint primID1)329 GRL_INLINE struct TrianglePair TrianglePair_Constructor(uint3 tri0, uint primID0, uint3 tri1, uint primID1)
330 {
331 struct TrianglePair q;
332 q.a.x = tri0.x;
333 q.a.y = tri0.y;
334 q.a.z = tri0.z;
335 q.a.w = tri0.z;
336
337 uint3 b;
338 b.x = tri1.x;
339 b.y = tri1.y;
340 b.z = tri1.z;
341
342 q.lb = (uint3)(3);
343
344 q.lb.x = (b.x == q.a.x) ? 0 : q.lb.x;
345 q.lb.y = (b.y == q.a.x) ? 0 : q.lb.y;
346 q.lb.z = (b.z == q.a.x) ? 0 : q.lb.z;
347
348 q.lb.x = (b.x == q.a.y) ? 1 : q.lb.x;
349 q.lb.y = (b.y == q.a.y) ? 1 : q.lb.y;
350 q.lb.z = (b.z == q.a.y) ? 1 : q.lb.z;
351
352 q.lb.x = (b.x == q.a.z) ? 2 : q.lb.x;
353 q.lb.y = (b.y == q.a.z) ? 2 : q.lb.y;
354 q.lb.z = (b.z == q.a.z) ? 2 : q.lb.z;
355
356 q.lb.x = (primID0 != primID1) ? q.lb.x : 0;
357 q.lb.y = (primID0 != primID1) ? q.lb.y : 0;
358 q.lb.z = (primID0 != primID1) ? q.lb.z : 0;
359
360 q.a.w = (q.lb.x == 3) ? b.x : q.a.w;
361 q.a.w = (q.lb.y == 3) ? b.y : q.a.w;
362 q.a.w = (q.lb.z == 3) ? b.z : q.a.w;
363
364 return q;
365 }
366
InstanceDesc_get_transform(const InstanceDesc * d,const uint32_t row,const uint32_t column)367 GRL_INLINE float InstanceDesc_get_transform(const InstanceDesc *d, const uint32_t row, const uint32_t column)
368 {
369 return d->Transform[row][column];
370 }
371
InstanceDesc_get_instanceID(const InstanceDesc * d)372 GRL_INLINE uint32_t InstanceDesc_get_instanceID(const InstanceDesc *d)
373 {
374 return d->InstanceIDAndMask & (0x00FFFFFF);
375 }
376
InstanceDesc_get_InstanceMask(const InstanceDesc * d)377 GRL_INLINE uint32_t InstanceDesc_get_InstanceMask(const InstanceDesc *d)
378 {
379 return d->InstanceIDAndMask >> 24;
380 }
381
InstanceDesc_get_InstanceContributionToHitGroupIndex(const InstanceDesc * d)382 GRL_INLINE uint32_t InstanceDesc_get_InstanceContributionToHitGroupIndex(const InstanceDesc *d)
383 {
384 return d->InstanceContributionToHitGroupIndexAndFlags & ((1 << 24) - 1);
385 }
386
InstanceDesc_get_InstanceFlags(const InstanceDesc * d)387 GRL_INLINE uint32_t InstanceDesc_get_InstanceFlags(const InstanceDesc *d)
388 {
389 return d->InstanceContributionToHitGroupIndexAndFlags >> 24;
390 }
391
InstanceDesc_get_AccelerationStructure(const InstanceDesc * d)392 GRL_INLINE gpuva_t InstanceDesc_get_AccelerationStructure(const InstanceDesc *d)
393 {
394 return d->AccelerationStructureGPUVA;
395 }
396
InstanceDesc_set_transform(InstanceDesc * d,const uint32_t row,const uint32_t column,float value)397 GRL_INLINE void InstanceDesc_set_transform(InstanceDesc *d, const uint32_t row, const uint32_t column, float value)
398 {
399 d->Transform[row][column] = value;
400 }
401
InstanceDesc_set_instanceID(InstanceDesc * d,const uint32_t id)402 GRL_INLINE void InstanceDesc_set_instanceID(InstanceDesc *d, const uint32_t id)
403 {
404 d->InstanceIDAndMask &= 255 << 24;
405 d->InstanceIDAndMask |= id & ((1 << 24) - 1);
406 }
407
InstanceDesc_set_InstanceMask(InstanceDesc * d,const uint32_t mask)408 GRL_INLINE void InstanceDesc_set_InstanceMask(InstanceDesc *d, const uint32_t mask)
409 {
410 d->InstanceIDAndMask &= ((1 << 24) - 1);
411 d->InstanceIDAndMask |= mask << 24;
412 }
413
InstanceDesc_set_InstanceContributionToHitGroupIndex(InstanceDesc * d,const uint32_t contribution)414 GRL_INLINE void InstanceDesc_set_InstanceContributionToHitGroupIndex(InstanceDesc *d, const uint32_t contribution)
415 {
416 d->InstanceContributionToHitGroupIndexAndFlags &= 255 << 24;
417 d->InstanceContributionToHitGroupIndexAndFlags |= contribution & ((1 << 24) - 1);
418 }
419
InstanceDesc_set_InstanceFlags(InstanceDesc * d,const uint32_t flags)420 GRL_INLINE void InstanceDesc_set_InstanceFlags(InstanceDesc *d, const uint32_t flags)
421 {
422 d->InstanceContributionToHitGroupIndexAndFlags &= ((1 << 24) - 1);
423 d->InstanceContributionToHitGroupIndexAndFlags |= flags << 24;
424 }
425
InstanceDesc_set_AccelerationStructure(InstanceDesc * d,gpuva_t address)426 GRL_INLINE void InstanceDesc_set_AccelerationStructure(InstanceDesc *d, gpuva_t address)
427 {
428 d->AccelerationStructureGPUVA = address;
429 }
430