1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7
8 //
9 // This file is to contain structure definitions related to the Gen12 QBVH6 acceleration structures
10 //
11 //
12
13 //********************************************************************************************
14 // WARNING!!!!!
15 // This file is shared by OpenCL and C++ source code and must be compatible.
16 // There should only be C structure definitions and trivial GRL_INLINE functions here
17 //
18 //********************************************************************************************
19
20 #pragma once
21
22 #include "GRLRTASCommon.h"
23 #include "GRLUtilities.h"
24
25 GRL_NAMESPACE_BEGIN(GRL)
GRL_NAMESPACE_BEGIN(RTAS)26 GRL_NAMESPACE_BEGIN(RTAS)
27 GRL_NAMESPACE_BEGIN(GEN12)
28
29 enum_uint8(NodeType)
30 {
31 NODE_TYPE_MIXED = 0x0, // identifies a mixed internal node where each child can have a different type
32 NODE_TYPE_INTERNAL = 0x0, // internal BVH node with 6 children
33 NODE_TYPE_INSTANCE = 0x1, // instance leaf
34 NODE_TYPE_PROCEDURAL = 0x3, // procedural leaf
35 NODE_TYPE_QUAD = 0x4, // quad leaf
36 NODE_TYPE_INVALID = 0x7 // indicates invalid node
37 };
38
39
40 typedef enum PrimLeafType
41 {
42 TYPE_NONE = 0,
43
44 TYPE_QUAD = 0,
45
46 /* For a node type of NODE_TYPE_PROCEDURAL we support enabling
47 * and disabling the opaque/non_opaque culling. */
48
49 TYPE_OPACITY_CULLING_ENABLED = 0,
50 TYPE_OPACITY_CULLING_DISABLED = 1
51 } PrimLeafType;
52
53 #define BVH_MAGIC_MACRO "GEN12_RTAS_005" // If serialization-breaking or algorithm-breaking changes are made, increment the digits at the end
54 static const char BVH_MAGIC[16] = BVH_MAGIC_MACRO;
55
56 typedef struct BVHBase
57 {
58 // TODO: Implement the "copy-first-node" trick... duplicate root node here
59
60 uint64_t rootNodeOffset;
61
62 uint32_t reserved;
63
64 uint32_t nodeDataCur; // nodeDataStart is sizeof(BVHBase) / 64 = BVH_ROOT_NODE_OFFSET / 64
65 uint32_t quadLeafStart;
66 uint32_t quadLeafCur;
67 uint32_t proceduralDataStart;
68 uint32_t proceduralDataCur;
69 uint32_t instanceLeafStart;
70 uint32_t instanceLeafEnd;
71 uint32_t backPointerDataStart; //
72 uint32_t refitTreeletsDataStart; // refit structs
73 uint32_t refitStartPointDataStart; //
74 uint32_t BVHDataEnd;
75
76 // number of bottom treelets
77 // if 1, then the bottom treelet is also tip treelet
78 uint32_t refitTreeletCnt;
79 uint32_t refitTreeletCnt2; // always 0, used for atomic updates
80 // data layout:
81 // @backPointerDataStart
82 // 'backpointer' - a dword per inner node.
83 // The bits are used as follows:
84 // 2:0 --> Used as a refit counter during BVH refitting. MBZ
85 // 5:3 --> Number of children
86 // 31:6 --> Index of the parent node in the internal node array
87 // The root node has a parent index of all ones
88 // @refitTreeletsDataStart
89 // RefitTreelet[], the last treelet is for top treelet all previous are for bottom
90 // @refitStartPointDataStart
91 // for each treelet T there is [T.startpoint_offset, T.numStartpoints) interval of startpoints here in that space
92 // @backPointerDataEnd
93
94 uint32_t fatLeafCount; // number of internal nodes which are "fat-leaves"
95 uint32_t innerCount; // number of internal nodes which are true inner nodes (all internalNode children)
96 uint32_t fatLeafTableStart;
97 uint32_t innerTableStart;
98
99 uint32_t quadLeftoversCountNewAtomicUpdate; // number of quad leftovers for new atomic update
100 uint32_t quadTableSizeNewAtomicUpdate; // size of quad Table including leftovers, padded to 256
101 uint32_t quadIndicesDataStart;
102
103 uint32_t _pad[9];
104
105 struct RTASMetaData Meta;
106
107 } BVHBase;
108
BVHBase_GetGeoMetaData(BVHBase * base)109 GRL_INLINE struct GeoMetaData* BVHBase_GetGeoMetaData(BVHBase* base)
110 {
111 return (struct GeoMetaData*)(((char*)base) + base->Meta.geoDescsStart);
112 }
113
114 #ifdef __OPENCL_VERSION__
115 #define BVH_ROOT_NODE_OFFSET sizeof(BVHBase)
116 #else
117 #define BVH_ROOT_NODE_OFFSET sizeof(GRL::RTAS::GEN12::BVHBase)
118 #endif
119
120 GRL_STATIC_ASSERT( sizeof(BVHBase) == BVH_ROOT_NODE_OFFSET, "Wrong size!");
121 GRL_STATIC_ASSERT( (sizeof(BVHBase) % 64) == 0 , "Misaligned size!");
122
123 typedef struct BackPointers {
124 } BackPointers;
125
126 // threshold for size of bottom treelets, note usually treelets will be 2-3x smaller than that number
127 // means that no bottom treelet has more paths than this number
128 #define TREELET_NUM_STARTPOINTS 1536
129
130 // threshold under which only one treelet will be created
131 #define SINGLE_TREELET_THRESHOLD 3072
132
133 typedef struct LeafTableEntry {
134
135 uint backpointer;
136 uint inner_node_index;
137 uint leaf_index;
138 } LeafTableEntry;
139
140 typedef struct InnerNodeTableEntry {
141
142 uint node_index_and_numchildren; // numchildren in 3 lsbs
143 uint first_child;
144
145 } InnerNodeTableEntry;
146
147 typedef struct QuadDataIndices
148 {
149 uint header_data[4];
150 uint vert_idx[4];
151 } QuadDataIndices;
152
153 typedef struct RefitTreelet {
154 uint32_t startpoint_offset;
155 uint32_t numStartpoints;
156 uint32_t numNonTrivialStartpoints;
157 uint8_t maxDepth;
158 uint8_t depthLess64; // depth from bottom at which there are less 64 paths
159 uint8_t depthLess128;// depth from bottom at which there are less 128 paths
160 uint8_t depthLess256;// depth from bottom at which there are less 256 paths
161 } RefitTreelet;
162
163 // if RefitTreelet has number of startpoints == 1
164 // it should be reinterpreted as:
165 typedef struct RefitTreeletTrivial {
166 uint32_t theOnlyNodeIndex;
167 uint32_t numStartpoints; // have to be 1 or 0
168 int32_t childrenOffsetOfTheNode; // 0th node based
169 uint8_t maxDepth;
170 uint8_t numChildrenOfTheNode;
171 } RefitTreeletTrivial;
172
173 // 5:0 - depth after you die
174 // 31:6 - Index of the inner node
175 typedef uint32_t StartPoint;
176
177 struct HwInstanceLeaf;
178 struct QuadLeaf;
179 struct ProceduralLeaf;
180 struct InternalNode;
181
182 typedef struct HwInstanceLeaf HwInstanceLeaf;
183 typedef struct InternalNode InternalNode;
184 typedef struct QuadLeaf QuadLeaf;
185 typedef struct ProceduralLeaf ProceduralLeaf;
186
BackPointer_GetParentIndex(uint32_t bp)187 GRL_INLINE uint32_t BackPointer_GetParentIndex( uint32_t bp )
188 {
189 return bp >> 6;
190 }
BackPointer_GetNumChildren(uint32_t bp)191 GRL_INLINE uint32_t BackPointer_GetNumChildren( uint32_t bp )
192 {
193 return (bp >> 3) & (7);
194 }
BackPointer_GetRefitCount(uint32_t bp)195 GRL_INLINE uint32_t BackPointer_GetRefitCount( uint32_t bp )
196 {
197 return bp & 7;
198 }
BackPointer_IsRoot(uint32_t bp)199 GRL_INLINE bool BackPointer_IsRoot( uint32_t bp )
200 {
201 return (bp >> 6) == 0x03FFFFFF;
202 }
203
BVHBase_GetRootNode(const BVHBase * p)204 GRL_INLINE InternalNode* BVHBase_GetRootNode( const BVHBase* p )
205 {
206 return (InternalNode*)( ((char*)p) + BVH_ROOT_NODE_OFFSET);
207 }
208
BVHBase_GetRootAABB(const BVHBase * p)209 GRL_INLINE AABB3f BVHBase_GetRootAABB(const BVHBase* p)
210 {
211 return p->Meta.bounds;
212 }
213
BVHBase_GetInternalNodes(const BVHBase * p)214 GRL_INLINE InternalNode* BVHBase_GetInternalNodes(const BVHBase* p)
215 {
216 return (InternalNode*)(((char*)p) + BVH_ROOT_NODE_OFFSET);
217 }
BVHBase_GetInternalNodesEnd(const BVHBase * p)218 GRL_INLINE InternalNode* BVHBase_GetInternalNodesEnd(const BVHBase* p)
219 {
220 return (InternalNode*)(((char*)p) + (size_t)(64u * p->nodeDataCur));
221 }
BVHBase_GetNumInternalNodes(const BVHBase * p)222 GRL_INLINE uint32_t BVHBase_GetNumInternalNodes(const BVHBase* p)
223 {
224 return p->nodeDataCur - BVH_ROOT_NODE_OFFSET / 64;
225 }
226
227
BVHBase_GetQuadLeaves(const BVHBase * p)228 GRL_INLINE QuadLeaf* BVHBase_GetQuadLeaves(const BVHBase* p)
229 {
230 return (QuadLeaf*)(((char*)p) + (size_t)(64u * p->quadLeafStart));
231 }
BVHBase_GetQuadLeaves_End(const BVHBase * p)232 GRL_INLINE const QuadLeaf* BVHBase_GetQuadLeaves_End(const BVHBase* p)
233 {
234 return (QuadLeaf*)(((char*)p) + (size_t)(64u * p->quadLeafCur));
235 }
236
BVHBase_GetProceduralLeaves_End(const BVHBase * p)237 GRL_INLINE const ProceduralLeaf* BVHBase_GetProceduralLeaves_End(const BVHBase* p)
238 {
239 return (ProceduralLeaf*)(((char*)p) + (size_t)(64u * p->proceduralDataCur));
240 }
241
BVHBase_GetProceduralLeaves(const BVHBase * p)242 GRL_INLINE ProceduralLeaf* BVHBase_GetProceduralLeaves(const BVHBase* p)
243 {
244 return (ProceduralLeaf*)(((char*)p) + (size_t)(64u * p->proceduralDataStart));
245 }
246
BVHBase_GetHWInstanceLeaves(const BVHBase * p)247 GRL_INLINE HwInstanceLeaf* BVHBase_GetHWInstanceLeaves(const BVHBase* p )
248 {
249 char* pRTASBits = (char*)p;
250 return (HwInstanceLeaf*)(pRTASBits + (size_t)(64u * p->instanceLeafStart));
251 }
252
BVHBase_GetHWInstanceLeaves_End(const BVHBase * p)253 GRL_INLINE HwInstanceLeaf* BVHBase_GetHWInstanceLeaves_End(const BVHBase* p )
254 {
255 char* pRTASBits = (char*) p;
256 return (HwInstanceLeaf*)(pRTASBits + (size_t)(64u * p->instanceLeafEnd));
257 }
258
BVHBase_GetNumHWInstanceLeaves(const BVHBase * p)259 GRL_INLINE uint BVHBase_GetNumHWInstanceLeaves( const BVHBase* p )
260 {
261 return (p->instanceLeafEnd - p->instanceLeafStart) / 2;
262 }
263
BVHBase_GetRefitStartPoints(const BVHBase * p)264 GRL_INLINE uint* BVHBase_GetRefitStartPoints(const BVHBase* p)
265 {
266 return (uint32_t*)(((char*)p) + (size_t)(64u * p->refitStartPointDataStart));
267 }
268
BVHBase_GetRefitStartPointsSize(const BVHBase * p)269 GRL_INLINE uint BVHBase_GetRefitStartPointsSize(const BVHBase* p)
270 {
271 return 64u * (p->fatLeafTableStart - p->refitStartPointDataStart);
272 }
273
StartPoint_GetDepth(StartPoint s)274 GRL_INLINE uint StartPoint_GetDepth(StartPoint s)
275 {
276 return s & ((1 << 6) - 1);
277 }
278
StartPoint_GetNodeIdx(StartPoint s)279 GRL_INLINE uint StartPoint_GetNodeIdx(StartPoint s)
280 {
281 return s >> 6;
282 }
283
BVHBase_GetRefitTreeletDescs(const BVHBase * p)284 GRL_INLINE RefitTreelet* BVHBase_GetRefitTreeletDescs(const BVHBase* p)
285 {
286 return (RefitTreelet*)(((char*)p) + (size_t)(64u * p->refitTreeletsDataStart));
287 }
288
289 // this is treelet count as should be executed, ie. num of bottom treelets if there are top and bottoms.
290 // to get real number of all treelets including tip, the formula is
291 // actualNumTreelets = refitTreeletCnt > 1 ? refitTreeletCnt + 1 : 1;
BVHBase_GetRefitTreeletCntPtr(BVHBase * p)292 GRL_INLINE uint32_t* BVHBase_GetRefitTreeletCntPtr(BVHBase* p)
293 {
294 return &p->refitTreeletCnt;
295 }
296
BVHBase_GetRefitTreeletCnt(const BVHBase * p)297 GRL_INLINE uint32_t BVHBase_GetRefitTreeletCnt(const BVHBase* p)
298 {
299 return p->refitTreeletCnt;
300 }
301
BVHBase_IsSingleTreelet(const BVHBase * p)302 GRL_INLINE uint32_t BVHBase_IsSingleTreelet(const BVHBase* p)
303 {
304 return p->refitTreeletCnt == 1;
305 }
306
BVHBase_GetBackPointers(const BVHBase * p)307 GRL_INLINE BackPointers* BVHBase_GetBackPointers(const BVHBase* p)
308 {
309 return (BackPointers*)(((char*)p) + (size_t)(64u * p->backPointerDataStart));
310 }
311
312
BVHBase_GetFatLeafTable(const BVHBase * p)313 GRL_INLINE LeafTableEntry* BVHBase_GetFatLeafTable(const BVHBase* p)
314 {
315 return (LeafTableEntry*)(((char*)p) + (size_t)(64u * p->fatLeafTableStart));
316 }
BVHBase_GetInnerNodeTable(const BVHBase * p)317 GRL_INLINE InnerNodeTableEntry* BVHBase_GetInnerNodeTable(const BVHBase* p)
318 {
319 return (InnerNodeTableEntry*)(((char*)p) + (size_t)(64u * p->innerTableStart));
320 }
BVHBase_GetQuadDataIndicesTable(const BVHBase * p)321 GRL_INLINE QuadDataIndices* BVHBase_GetQuadDataIndicesTable(const BVHBase* p)
322 {
323 return (QuadDataIndices*)(((char*)p) + (size_t)(64u * p->quadIndicesDataStart));
324 }
325
InnerNode_GetBackPointer(BackPointers * backpointersStruct,uint32_t inodeOffset)326 GRL_INLINE unsigned* InnerNode_GetBackPointer(
327 BackPointers* backpointersStruct,
328 uint32_t inodeOffset /*in 64B units, from the earliest Inner node*/)
329 {
330 uint* backpointersArray = (uint*)backpointersStruct;
331 // BACKPOINTER_LAYOUT
332 uint new_index = inodeOffset; //<-layout canonical
333 //uint new_index = inodeOffset*16; //<-layout scattered
334 // uint new_index = (inodeOffset & (~0xFFFF)) | (((inodeOffset & 0xFF) << 8) | ((inodeOffset & 0xFF00) >> 8)); //<-layout hashed
335
336 return backpointersArray + new_index;
337 }
338
BVHBase_GetRefitStructsDataSize(const BVHBase * p)339 GRL_INLINE uint32_t BVHBase_GetRefitStructsDataSize(const BVHBase* p)
340 {
341 return 64u * (p->BVHDataEnd - p->backPointerDataStart);
342 }
343
BVHBase_GetBackpointersDataSize(const BVHBase * p)344 GRL_INLINE uint32_t BVHBase_GetBackpointersDataSize(const BVHBase* p)
345 {
346 return 64u * (p->refitTreeletsDataStart - p->backPointerDataStart);
347 }
348
BVHBase_GetBVHDataEnd(const BVHBase * p)349 GRL_INLINE uint32_t* BVHBase_GetBVHDataEnd( const BVHBase* p )
350 {
351 return (uint32_t*)(((char*)p) + (size_t)(64u * p->BVHDataEnd));
352 }
353
BVHBase_HasBackPointers(const BVHBase * p)354 GRL_INLINE bool BVHBase_HasBackPointers( const BVHBase* p )
355 {
356 return p->refitTreeletsDataStart > p->backPointerDataStart;
357 }
358
BVHBase_GetNumQuads(const BVHBase * p)359 GRL_INLINE const size_t BVHBase_GetNumQuads(const BVHBase* p)
360 {
361 return p->quadLeafCur - p->quadLeafStart;
362 }
363
BVHBase_GetNumProcedurals(const BVHBase * p)364 GRL_INLINE const size_t BVHBase_GetNumProcedurals(const BVHBase* p)
365 {
366 return p->proceduralDataCur - p->proceduralDataStart;
367 }
368
BVHBase_GetNumInstances(const BVHBase * p)369 GRL_INLINE const size_t BVHBase_GetNumInstances(const BVHBase* p)
370 {
371 return (p->instanceLeafEnd - p->instanceLeafStart) / 2;
372 }
373
BVHBase_totalBytes(const BVHBase * p)374 GRL_INLINE const size_t BVHBase_totalBytes(const BVHBase* p)
375 {
376 return p->BVHDataEnd * 64u;
377 }
378
379
380
381 struct HwInstanceLeaf
382 {
383 /* first 64 bytes accessed during traversal */
384 struct Part0
385 {
386 //uint32_t shaderIndex : 24;
387 //uint32_t geomMask : 8;
388 uint32_t DW0;
389
390 // uint32_t instanceContributionToHitGroupIndex : 24;
391 // uint32_t pad0 : 8
392 //
393 // NOTE: Traversal shaders are implemented by aliasing instance leaves as procedural and sending them through the procedural path
394 // For a procedural instance, bit 29 should be set to 1, to disable "opaque culling"
395 // and bits 30 and 31 must be zero. See also the definition of the 'PrimLeafDesc' structure
396 uint32_t DW1;
397
398 // uint64_t rootNodePtr : 48;
399 // uint64_t instFlags : 8;
400 // uint64_t pad1 : 8;
401 uint64_t DW2_DW3;
402
403 // Vec3f world2obj_vx; // 1st row of Worl2Obj transform
404 float world2obj_vx_x;
405 float world2obj_vx_y;
406 float world2obj_vx_z;
407
408 // Vec3f world2obj_vy; // 2nd row of Worl2Obj transform
409 float world2obj_vy_x;
410 float world2obj_vy_y;
411 float world2obj_vy_z;
412
413 // Vec3f world2obj_vz; // 3rd row of Worl2Obj transform
414 float world2obj_vz_x;
415 float world2obj_vz_y;
416 float world2obj_vz_z;
417
418 // Vec3f obj2world_p; // translation of Obj2World transform (on purpose in fist 64 bytes)
419 float obj2world_p_x;
420 float obj2world_p_y;
421 float obj2world_p_z;
422 } part0;
423
424 /* second 64 bytes accessed during shading */
425 // NOTE: Everything in this block is under SW control
426 struct Part1
427 {
428 // uint64_t bvhPtr : 48;
429 // uint64_t pad : 16;
430 uint64_t DW0_DW1;
431
432 uint32_t instanceID;
433 uint32_t instanceIndex;
434
435 // Vec3f world2obj_vx; // 1st row of Worl2Obj transform
436 float obj2world_vx_x;
437 float obj2world_vx_y;
438 float obj2world_vx_z;
439
440 // Vec3f world2obj_vy; // 2nd row of Worl2Obj transform
441 float obj2world_vy_x;
442 float obj2world_vy_y;
443 float obj2world_vy_z;
444
445 // Vec3f world2obj_vz; // 3rd row of Worl2Obj transform
446 float obj2world_vz_x;
447 float obj2world_vz_y;
448 float obj2world_vz_z;
449
450 // Vec3f obj2world_p; // translation of Obj2World transform (on purpose in fist 64 bytes)
451 float world2obj_p_x;
452 float world2obj_p_y;
453 float world2obj_p_z;
454 } part1;
455 };
456
457 __constant const uint64_t c_one = 1ul;
458
HwInstanceLeaf_GetInstanceMask(const HwInstanceLeaf * p)459 GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceMask( const HwInstanceLeaf* p )
460 {
461 return p->part0.DW0 >> 24;
462 }
463
HwInstanceLeaf_GetInstanceContributionToHitGroupIndex(const HwInstanceLeaf * p)464 GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceContributionToHitGroupIndex( const HwInstanceLeaf* p )
465 {
466 return p->part0.DW1 & 0x00ffffff;
467 }
468
HwInstanceLeaf_GetInstanceFlags(const HwInstanceLeaf * p)469 GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceFlags( const HwInstanceLeaf* p )
470 {
471 return (p->part0.DW2_DW3 >> 48) & 0xff;
472 }
HwInstanceLeaf_GetInstanceID(const HwInstanceLeaf * p)473 GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceID( const HwInstanceLeaf* p )
474 {
475 return p->part1.instanceID;
476 }
477
HwInstanceLeaf_GetBVH(const HwInstanceLeaf * p)478 GRL_INLINE gpuva_t HwInstanceLeaf_GetBVH( const HwInstanceLeaf* p ) { return p->part1.DW0_DW1 & ((c_one << 48) - 1); }
HwInstanceLeaf_GetStartNode(const HwInstanceLeaf * p)479 GRL_INLINE gpuva_t HwInstanceLeaf_GetStartNode( const HwInstanceLeaf* p ) { return p->part0.DW2_DW3 & ((c_one << 48) - 1); }
HwInstanceLeaf_GetInstanceIndex(const HwInstanceLeaf * p)480 GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceIndex( const HwInstanceLeaf* p ) { return p->part1.instanceIndex; }
481
HwInstanceLeaf_GetTransform(struct HwInstanceLeaf * p,float * transform)482 GRL_INLINE void HwInstanceLeaf_GetTransform(struct HwInstanceLeaf* p, float* transform)
483 {
484 transform[0] = p->part1.obj2world_vx_x;
485 transform[1] = p->part1.obj2world_vy_x;
486 transform[2] = p->part1.obj2world_vz_x;
487 transform[3] = p->part0.obj2world_p_x;
488 transform[4] = p->part1.obj2world_vx_y;
489 transform[5] = p->part1.obj2world_vy_y;
490 transform[6] = p->part1.obj2world_vz_y;
491 transform[7] = p->part0.obj2world_p_y;
492 transform[8] = p->part1.obj2world_vx_z;
493 transform[9] = p->part1.obj2world_vy_z;
494 transform[10] = p->part1.obj2world_vz_z;
495 transform[11] = p->part0.obj2world_p_z;
496 }
497
HwInstanceLeaf_SetBVH(HwInstanceLeaf * p,gpuva_t b)498 GRL_INLINE void HwInstanceLeaf_SetBVH( HwInstanceLeaf* p, gpuva_t b ) {
499 uint64_t mask = ((c_one << 48) - 1);
500 uint64_t v = p->part1.DW0_DW1;
501 v = (b & mask) | (v & ~mask);
502 p->part1.DW0_DW1 = v;
503 }
HwInstanceLeaf_SetStartNode(HwInstanceLeaf * p,gpuva_t b)504 GRL_INLINE void HwInstanceLeaf_SetStartNode( HwInstanceLeaf* p, gpuva_t b ) {
505 uint64_t mask = ((c_one << 48) - 1);
506 uint64_t v = p->part0.DW2_DW3;
507 v = (b & mask) | (v & ~mask);
508 p->part0.DW2_DW3 = v;
509 }
HwInstanceLeaf_SetStartNodeAndInstanceFlags(HwInstanceLeaf * p,gpuva_t root,uint8_t flags)510 GRL_INLINE void HwInstanceLeaf_SetStartNodeAndInstanceFlags( HwInstanceLeaf* p,
511 gpuva_t root,
512 uint8_t flags ) {
513 uint64_t mask = ((1ull << 48) - 1);
514 uint64_t v = (root & mask) | ((uint64_t)(flags)<<48);
515 p->part1.DW0_DW1 = v;
516 }
517
518 struct InternalNode
519 {
520 float lower[3]; // world space origin of quantization grid
521 int32_t childOffset; // offset to all children in 64B multiples
522
523 uint8_t nodeType; // the type of the node
524 uint8_t pad; // unused byte
525
526 int8_t exp_x; // 2^exp_x is the size of the grid in x dimension
527 int8_t exp_y; // 2^exp_y is the size of the grid in y dimension
528 int8_t exp_z; // 2^exp_z is the size of the grid in z dimension
529 uint8_t nodeMask; // mask used for ray filtering
530
531 struct ChildData
532 {
533 //uint8_t blockIncr : 2; // size of child in 64 byte blocks. Must be ==2 for instance leaves, <=2 for quad leaves.
534 //uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
535 //uint8_t pad : 2; // unused bits
536 uint8_t bits;
537 } childData[6];
538
539 uint8_t lower_x[6]; // the quantized lower bounds in x-dimension
540 uint8_t upper_x[6]; // the quantized upper bounds in x-dimension
541 uint8_t lower_y[6]; // the quantized lower bounds in y-dimension
542 uint8_t upper_y[6]; // the quantized upper bounds in y-dimension
543 uint8_t lower_z[6]; // the quantized lower bounds in z-dimension
544 uint8_t upper_z[6]; // the quantized upper bounds in z-dimension
545 };
546
InternalNode_GetChildBlockIncr(const InternalNode * p,uint idx)547 GRL_INLINE uint InternalNode_GetChildBlockIncr( const InternalNode* p, uint idx )
548 {
549 return p->childData[idx].bits & 3;
550 }
InternalNode_GetChildStartPrim(const InternalNode * p,uint idx)551 GRL_INLINE uint InternalNode_GetChildStartPrim( const InternalNode* p, uint idx )
552 {
553 return (p->childData[idx].bits>>2) & 0xf;
554 }
555
InternalNode_GetChildType(const InternalNode * p,uint idx)556 GRL_INLINE uint8_t InternalNode_GetChildType( const InternalNode* p, uint idx )
557 {
558 return (p->childData[idx].bits >> 2) & 0xF;
559 }
560
InternalNode_SetChildType(InternalNode * p,uint idx,uint type)561 GRL_INLINE void InternalNode_SetChildType( InternalNode* p, uint idx, uint type )
562 {
563 uint bits = p->childData[idx].bits;
564 const uint mask = (0xF << 2);
565 bits = ((type << 2) & mask) | (bits & ~mask);
566 p->childData[idx].bits = (uint8_t)bits;
567 }
568
InternalNode_IsChildValid(const InternalNode * p,size_t child)569 GRL_INLINE bool InternalNode_IsChildValid( const InternalNode* p, size_t child )
570 {
571 bool lower = p->lower_x[child] & 0x80; // invalid nodes are indicated by setting lower_msb = 1 and upper_msb=0
572 bool upper = p->upper_x[child] & 0x80;
573 return !lower || upper;
574 }
575
InternalNode_GetChildAABB(const InternalNode * node,size_t i)576 GRL_INLINE AABB3f InternalNode_GetChildAABB(const InternalNode* node, size_t i)
577 {
578 float4 lower, upper;
579 const float4 base = { node->lower[0], node->lower[1], node->lower[2], 0.0f };
580 const int4 lower_i = { node->lower_x[i], node->lower_y[i], node->lower_z[i], 0 };
581 const int4 upper_i = { node->upper_x[i], node->upper_y[i], node->upper_z[i], 0 };
582 const int4 exp_i = { node->exp_x, node->exp_y, node->exp_z, 0 };
583 lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
584 upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
585 AABB3f aabb3f = {
586 { lower.x, lower.y, lower.z },
587 { upper.x, upper.y, upper.z } };
588 return aabb3f;
589 }
590
InternalNode_GetChildren(InternalNode * node)591 GRL_INLINE void* InternalNode_GetChildren( InternalNode* node)
592 {
593 return (void*)(((char*)node) + node->childOffset * 64);
594 }
595
596 typedef struct PrimLeafDesc
597 {
598 //uint32_t shaderIndex : 24; // shader index used for shader record calculations
599 //uint32_t geomMask : 8; // geometry mask used for ray masking
600 uint32_t shaderIndex_geomMask;
601
602 //uint32_t geomIndex : 29; // the geometry index specifies the n'th geometry of the scene
603 //PrimLeafType type : 1; // see above
604 //GeometryFlags geomFlags : 2; // geometry flags of this geometry
605 uint32_t geomIndex_flags;
606 } PrimLeafDesc;
607
PrimLeaf_GetShaderIndex(const PrimLeafDesc * p)608 GRL_INLINE uint32_t PrimLeaf_GetShaderIndex( const PrimLeafDesc* p )
609 {
610 return p->shaderIndex_geomMask & ((1 << 24) - 1);
611 }
PrimLeaf_GetGeoIndex(const PrimLeafDesc * p)612 GRL_INLINE uint32_t PrimLeaf_GetGeoIndex( const PrimLeafDesc* p )
613 {
614 return p->geomIndex_flags & ((1<<29)-1);
615 }
PrimLeaf_GetGeomFlags(const PrimLeafDesc * p)616 GRL_INLINE uint32_t PrimLeaf_GetGeomFlags( const PrimLeafDesc* p )
617 {
618 return (p->geomIndex_flags >> 30);
619 }
PrimLeaf_GetType(const PrimLeafDesc * p)620 GRL_INLINE uint32_t PrimLeaf_GetType(const PrimLeafDesc* p)
621 {
622 return (p->geomIndex_flags >> 29) & 1;
623 }
624
625 struct QuadLeaf
626 {
627 PrimLeafDesc leafDesc;
628
629 uint32_t primIndex0;
630
631 //uint32_t primIndex1Delta : 16;
632 //uint32_t j0 : 2;
633 //uint32_t j1 : 2;
634 //uint32_t j2 : 2;
635 //uint32_t last : 1; // last quad in list
636 //uint32_t pad : 9;
637 uint32_t DW1;
638
639 float v[4][3];
640 };
641
QuadLeaf_GetPrimIndexDelta(const QuadLeaf * p)642 GRL_INLINE uint32_t QuadLeaf_GetPrimIndexDelta( const QuadLeaf* p )
643 {
644 return p->DW1 & 0x0000ffff;
645 }
QuadLeaf_GetPrimIndex0(const QuadLeaf * p)646 GRL_INLINE uint32_t QuadLeaf_GetPrimIndex0( const QuadLeaf* p )
647 {
648 return p->primIndex0;
649 }
QuadLeaf_GetPrimIndex1(const QuadLeaf * p)650 GRL_INLINE uint32_t QuadLeaf_GetPrimIndex1( const QuadLeaf* p )
651 {
652 return p->primIndex0 + QuadLeaf_GetPrimIndexDelta(p);
653 }
QuadLeaf_IsSingleTriangle(const QuadLeaf * p)654 GRL_INLINE bool QuadLeaf_IsSingleTriangle( const QuadLeaf* p )
655 {
656 return QuadLeaf_GetPrimIndexDelta(p) == 0;
657 }
QuadLeaf_GetSecondTriangleIndices(const QuadLeaf * p)658 GRL_INLINE uint32_t QuadLeaf_GetSecondTriangleIndices( const QuadLeaf* p )
659 {
660 return (p->DW1>>16) & 0x3f;
661 }
662
QuadLeaf_SetVertices(QuadLeaf * quad,float3 v0,float3 v1,float3 v2,float3 v3)663 GRL_INLINE void QuadLeaf_SetVertices( QuadLeaf* quad, float3 v0, float3 v1, float3 v2, float3 v3 )
664 {
665 quad->v[0][0] = v0.x;
666 quad->v[0][1] = v0.y;
667 quad->v[0][2] = v0.z;
668 quad->v[1][0] = v1.x;
669 quad->v[1][1] = v1.y;
670 quad->v[1][2] = v1.z;
671 quad->v[2][0] = v2.x;
672 quad->v[2][1] = v2.y;
673 quad->v[2][2] = v2.z;
674 quad->v[3][0] = v3.x;
675 quad->v[3][1] = v3.y;
676 quad->v[3][2] = v3.z;
677 }
678
679
680 struct ProceduralLeaf {
681 PrimLeafDesc leafDesc;
682
683 // Number of primitives + "last" bits.
684 // The meaning of this section is SW-defined and flexible
685 uint32_t DW1 ;
686 uint32_t _primIndex[13];
687 } ;
688
689 GRL_NAMESPACE_END(Gen12)
690 GRL_NAMESPACE_END(RTAS)
691 GRL_NAMESPACE_END(GRL)
692