xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/include/GRLGen12.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 //
9 // This file is to contain structure definitions related to the Gen12 QBVH6 acceleration structures
10 //
11 //
12 
13 //********************************************************************************************
14 //   WARNING!!!!!
15 // This file is shared by OpenCL and C++ source code and must be compatible.
16 //  There should only be C structure definitions and trivial GRL_INLINE functions here
17 //
18 //********************************************************************************************
19 
20 #pragma once
21 
22 #include "GRLRTASCommon.h"
23 #include "GRLUtilities.h"
24 
25 GRL_NAMESPACE_BEGIN(GRL)
GRL_NAMESPACE_BEGIN(RTAS)26 GRL_NAMESPACE_BEGIN(RTAS)
27 GRL_NAMESPACE_BEGIN(GEN12)
28 
29     enum_uint8(NodeType)
30     {
31         NODE_TYPE_MIXED = 0x0,        // identifies a mixed internal node where each child can have a different type
32         NODE_TYPE_INTERNAL = 0x0,     // internal BVH node with 6 children
33         NODE_TYPE_INSTANCE = 0x1,     // instance leaf
34         NODE_TYPE_PROCEDURAL = 0x3,   // procedural leaf
35         NODE_TYPE_QUAD = 0x4,         // quad leaf
36         NODE_TYPE_INVALID = 0x7       // indicates invalid node
37     };
38 
39 
40     typedef enum PrimLeafType
41     {
42         TYPE_NONE = 0,
43 
44         TYPE_QUAD = 0,
45 
46         /* For a node type of NODE_TYPE_PROCEDURAL we support enabling
47         * and disabling the opaque/non_opaque culling. */
48 
49         TYPE_OPACITY_CULLING_ENABLED = 0,
50         TYPE_OPACITY_CULLING_DISABLED = 1
51     } PrimLeafType;
52 
53     #define BVH_MAGIC_MACRO     "GEN12_RTAS_005"    //  If serialization-breaking or algorithm-breaking changes are made, increment the digits at the end
54     static const char BVH_MAGIC[16] = BVH_MAGIC_MACRO;
55 
56     typedef struct BVHBase
57     {
58         // TODO:  Implement the "copy-first-node" trick... duplicate root node here
59 
60         uint64_t rootNodeOffset;
61 
62         uint32_t reserved;
63 
64         uint32_t nodeDataCur; // nodeDataStart is sizeof(BVHBase) / 64 = BVH_ROOT_NODE_OFFSET / 64
65         uint32_t quadLeafStart;
66         uint32_t quadLeafCur;
67         uint32_t proceduralDataStart;
68         uint32_t proceduralDataCur;
69         uint32_t instanceLeafStart;
70         uint32_t instanceLeafEnd;
71         uint32_t backPointerDataStart;     //
72         uint32_t refitTreeletsDataStart;   // refit structs
73         uint32_t refitStartPointDataStart; //
74         uint32_t BVHDataEnd;
75 
76         // number of bottom treelets
77         // if 1, then the bottom treelet is also tip treelet
78         uint32_t refitTreeletCnt;
79         uint32_t refitTreeletCnt2; // always 0, used for atomic updates
80         // data layout:
81         // @backPointerDataStart
82         //  'backpointer' - a dword per inner node.
83         //  The bits are used as follows:
84         //     2:0  --> Used as a refit counter during BVH refitting.  MBZ
85         //     5:3  --> Number of children
86         //     31:6 --> Index of the parent node in the internal node array
87         //    The root node has a parent index of all ones
88         // @refitTreeletsDataStart
89         //  RefitTreelet[], the last treelet is for top treelet all previous are for bottom
90         // @refitStartPointDataStart
91         //  for each treelet T there is [T.startpoint_offset, T.numStartpoints) interval of startpoints here in that space
92         // @backPointerDataEnd
93 
94         uint32_t fatLeafCount;  // number of internal nodes which are "fat-leaves"
95         uint32_t innerCount;    // number of internal nodes which are true inner nodes (all internalNode children)
96         uint32_t fatLeafTableStart;
97         uint32_t innerTableStart;
98 
99         uint32_t quadLeftoversCountNewAtomicUpdate; // number of quad leftovers for new atomic update
100         uint32_t quadTableSizeNewAtomicUpdate; // size of quad Table including leftovers, padded to 256
101         uint32_t quadIndicesDataStart;
102 
103         uint32_t _pad[9];
104 
105         struct RTASMetaData Meta;
106 
107     } BVHBase;
108 
BVHBase_GetGeoMetaData(BVHBase * base)109     GRL_INLINE struct GeoMetaData* BVHBase_GetGeoMetaData(BVHBase* base)
110     {
111         return (struct GeoMetaData*)(((char*)base) + base->Meta.geoDescsStart);
112     }
113 
114 #ifdef __OPENCL_VERSION__
115 #define BVH_ROOT_NODE_OFFSET sizeof(BVHBase)
116 #else
117 #define BVH_ROOT_NODE_OFFSET sizeof(GRL::RTAS::GEN12::BVHBase)
118 #endif
119 
120 GRL_STATIC_ASSERT( sizeof(BVHBase) == BVH_ROOT_NODE_OFFSET, "Wrong size!");
121 GRL_STATIC_ASSERT( (sizeof(BVHBase) % 64) == 0 , "Misaligned size!");
122 
123     typedef struct BackPointers {
124     } BackPointers;
125 
126     // threshold for size of bottom treelets, note usually treelets will be 2-3x smaller than that number
127     // means that no bottom treelet has more paths than this number
128     #define TREELET_NUM_STARTPOINTS 1536
129 
130     // threshold under which only one treelet will be created
131     #define SINGLE_TREELET_THRESHOLD 3072
132 
133     typedef struct LeafTableEntry {
134 
135         uint backpointer;
136         uint inner_node_index;
137         uint leaf_index;
138     } LeafTableEntry;
139 
140     typedef struct InnerNodeTableEntry {
141 
142         uint node_index_and_numchildren; // numchildren in 3 lsbs
143         uint first_child;
144 
145     } InnerNodeTableEntry;
146 
147     typedef struct QuadDataIndices
148     {
149         uint header_data[4];
150         uint vert_idx[4];
151     } QuadDataIndices;
152 
153     typedef struct RefitTreelet {
154         uint32_t startpoint_offset;
155         uint32_t numStartpoints;
156         uint32_t numNonTrivialStartpoints;
157         uint8_t  maxDepth;
158         uint8_t  depthLess64; // depth from bottom at which there are less 64  paths
159         uint8_t  depthLess128;// depth from bottom at which there are less 128 paths
160         uint8_t  depthLess256;// depth from bottom at which there are less 256 paths
161     } RefitTreelet;
162 
163     // if RefitTreelet has number of startpoints == 1
164     // it should be reinterpreted as:
165     typedef struct RefitTreeletTrivial {
166         uint32_t theOnlyNodeIndex;
167         uint32_t numStartpoints; // have to be 1 or 0
168         int32_t  childrenOffsetOfTheNode; // 0th node based
169         uint8_t  maxDepth;
170         uint8_t  numChildrenOfTheNode;
171     } RefitTreeletTrivial;
172 
173     // 5:0  - depth after you die
174     // 31:6 - Index of the inner node
175     typedef uint32_t StartPoint;
176 
177     struct HwInstanceLeaf;
178     struct QuadLeaf;
179     struct ProceduralLeaf;
180     struct InternalNode;
181 
182     typedef struct HwInstanceLeaf HwInstanceLeaf;
183     typedef struct InternalNode InternalNode;
184     typedef struct QuadLeaf QuadLeaf;
185     typedef struct ProceduralLeaf ProceduralLeaf;
186 
BackPointer_GetParentIndex(uint32_t bp)187     GRL_INLINE uint32_t BackPointer_GetParentIndex( uint32_t bp )
188     {
189         return bp >> 6;
190     }
BackPointer_GetNumChildren(uint32_t bp)191     GRL_INLINE uint32_t BackPointer_GetNumChildren( uint32_t bp )
192     {
193         return (bp >> 3) & (7);
194     }
BackPointer_GetRefitCount(uint32_t bp)195     GRL_INLINE uint32_t BackPointer_GetRefitCount( uint32_t bp )
196     {
197         return bp & 7;
198     }
BackPointer_IsRoot(uint32_t bp)199     GRL_INLINE bool BackPointer_IsRoot( uint32_t bp )
200     {
201         return (bp >> 6) == 0x03FFFFFF;
202     }
203 
BVHBase_GetRootNode(const BVHBase * p)204     GRL_INLINE InternalNode* BVHBase_GetRootNode( const BVHBase* p )
205     {
206         return (InternalNode*)( ((char*)p) + BVH_ROOT_NODE_OFFSET);
207     }
208 
BVHBase_GetRootAABB(const BVHBase * p)209     GRL_INLINE AABB3f BVHBase_GetRootAABB(const BVHBase* p)
210     {
211         return p->Meta.bounds;
212     }
213 
BVHBase_GetInternalNodes(const BVHBase * p)214     GRL_INLINE InternalNode* BVHBase_GetInternalNodes(const BVHBase* p)
215     {
216         return (InternalNode*)(((char*)p) + BVH_ROOT_NODE_OFFSET);
217     }
BVHBase_GetInternalNodesEnd(const BVHBase * p)218     GRL_INLINE InternalNode* BVHBase_GetInternalNodesEnd(const BVHBase* p)
219     {
220         return (InternalNode*)(((char*)p) + (size_t)(64u * p->nodeDataCur));
221     }
BVHBase_GetNumInternalNodes(const BVHBase * p)222     GRL_INLINE uint32_t BVHBase_GetNumInternalNodes(const BVHBase* p)
223     {
224         return p->nodeDataCur - BVH_ROOT_NODE_OFFSET / 64;
225     }
226 
227 
BVHBase_GetQuadLeaves(const BVHBase * p)228     GRL_INLINE QuadLeaf* BVHBase_GetQuadLeaves(const BVHBase* p)
229     {
230         return (QuadLeaf*)(((char*)p) + (size_t)(64u * p->quadLeafStart));
231     }
BVHBase_GetQuadLeaves_End(const BVHBase * p)232     GRL_INLINE const QuadLeaf* BVHBase_GetQuadLeaves_End(const BVHBase* p)
233     {
234         return (QuadLeaf*)(((char*)p) + (size_t)(64u * p->quadLeafCur));
235     }
236 
BVHBase_GetProceduralLeaves_End(const BVHBase * p)237     GRL_INLINE const ProceduralLeaf* BVHBase_GetProceduralLeaves_End(const BVHBase* p)
238     {
239         return (ProceduralLeaf*)(((char*)p) + (size_t)(64u * p->proceduralDataCur));
240     }
241 
BVHBase_GetProceduralLeaves(const BVHBase * p)242     GRL_INLINE ProceduralLeaf* BVHBase_GetProceduralLeaves(const BVHBase* p)
243     {
244         return (ProceduralLeaf*)(((char*)p) + (size_t)(64u * p->proceduralDataStart));
245     }
246 
BVHBase_GetHWInstanceLeaves(const BVHBase * p)247     GRL_INLINE HwInstanceLeaf* BVHBase_GetHWInstanceLeaves(const BVHBase* p )
248     {
249         char* pRTASBits = (char*)p;
250         return (HwInstanceLeaf*)(pRTASBits + (size_t)(64u * p->instanceLeafStart));
251     }
252 
BVHBase_GetHWInstanceLeaves_End(const BVHBase * p)253     GRL_INLINE HwInstanceLeaf* BVHBase_GetHWInstanceLeaves_End(const BVHBase* p )
254     {
255         char* pRTASBits = (char*) p;
256         return (HwInstanceLeaf*)(pRTASBits + (size_t)(64u * p->instanceLeafEnd));
257     }
258 
BVHBase_GetNumHWInstanceLeaves(const BVHBase * p)259     GRL_INLINE uint BVHBase_GetNumHWInstanceLeaves( const BVHBase* p )
260     {
261         return (p->instanceLeafEnd - p->instanceLeafStart) / 2;
262     }
263 
BVHBase_GetRefitStartPoints(const BVHBase * p)264     GRL_INLINE uint* BVHBase_GetRefitStartPoints(const BVHBase* p)
265     {
266         return (uint32_t*)(((char*)p) + (size_t)(64u * p->refitStartPointDataStart));
267     }
268 
BVHBase_GetRefitStartPointsSize(const BVHBase * p)269     GRL_INLINE uint BVHBase_GetRefitStartPointsSize(const BVHBase* p)
270     {
271         return 64u * (p->fatLeafTableStart - p->refitStartPointDataStart);
272     }
273 
StartPoint_GetDepth(StartPoint s)274     GRL_INLINE uint StartPoint_GetDepth(StartPoint s)
275     {
276         return s & ((1 << 6) - 1);
277     }
278 
StartPoint_GetNodeIdx(StartPoint s)279     GRL_INLINE uint StartPoint_GetNodeIdx(StartPoint s)
280     {
281         return s >> 6;
282     }
283 
BVHBase_GetRefitTreeletDescs(const BVHBase * p)284     GRL_INLINE RefitTreelet* BVHBase_GetRefitTreeletDescs(const BVHBase* p)
285     {
286         return (RefitTreelet*)(((char*)p) + (size_t)(64u * p->refitTreeletsDataStart));
287     }
288 
289     // this is treelet count as should be executed, ie. num of bottom treelets if there are top and bottoms.
290     // to get real number of all treelets including tip, the formula is
291     //    actualNumTreelets = refitTreeletCnt > 1 ? refitTreeletCnt + 1 : 1;
BVHBase_GetRefitTreeletCntPtr(BVHBase * p)292     GRL_INLINE uint32_t* BVHBase_GetRefitTreeletCntPtr(BVHBase* p)
293     {
294         return &p->refitTreeletCnt;
295     }
296 
BVHBase_GetRefitTreeletCnt(const BVHBase * p)297     GRL_INLINE uint32_t BVHBase_GetRefitTreeletCnt(const BVHBase* p)
298     {
299         return p->refitTreeletCnt;
300     }
301 
BVHBase_IsSingleTreelet(const BVHBase * p)302     GRL_INLINE uint32_t BVHBase_IsSingleTreelet(const BVHBase* p)
303     {
304         return p->refitTreeletCnt == 1;
305     }
306 
BVHBase_GetBackPointers(const BVHBase * p)307     GRL_INLINE BackPointers* BVHBase_GetBackPointers(const BVHBase* p)
308     {
309         return (BackPointers*)(((char*)p) + (size_t)(64u * p->backPointerDataStart));
310     }
311 
312 
BVHBase_GetFatLeafTable(const BVHBase * p)313     GRL_INLINE LeafTableEntry* BVHBase_GetFatLeafTable(const BVHBase* p)
314     {
315         return (LeafTableEntry*)(((char*)p) + (size_t)(64u * p->fatLeafTableStart));
316     }
BVHBase_GetInnerNodeTable(const BVHBase * p)317     GRL_INLINE InnerNodeTableEntry* BVHBase_GetInnerNodeTable(const BVHBase* p)
318     {
319         return (InnerNodeTableEntry*)(((char*)p) + (size_t)(64u * p->innerTableStart));
320     }
BVHBase_GetQuadDataIndicesTable(const BVHBase * p)321     GRL_INLINE QuadDataIndices* BVHBase_GetQuadDataIndicesTable(const BVHBase* p)
322     {
323         return (QuadDataIndices*)(((char*)p) + (size_t)(64u * p->quadIndicesDataStart));
324     }
325 
InnerNode_GetBackPointer(BackPointers * backpointersStruct,uint32_t inodeOffset)326     GRL_INLINE unsigned* InnerNode_GetBackPointer(
327         BackPointers* backpointersStruct,
328         uint32_t inodeOffset /*in 64B units, from the earliest Inner node*/)
329     {
330         uint* backpointersArray = (uint*)backpointersStruct;
331         // BACKPOINTER_LAYOUT
332         uint new_index = inodeOffset;                                                                              //<-layout canonical
333         //uint new_index = inodeOffset*16;                                                                           //<-layout scattered
334         // uint new_index = (inodeOffset & (~0xFFFF)) | (((inodeOffset & 0xFF) << 8) | ((inodeOffset & 0xFF00) >> 8));     //<-layout hashed
335 
336         return backpointersArray + new_index;
337     }
338 
BVHBase_GetRefitStructsDataSize(const BVHBase * p)339     GRL_INLINE uint32_t BVHBase_GetRefitStructsDataSize(const BVHBase* p)
340     {
341         return 64u * (p->BVHDataEnd - p->backPointerDataStart);
342     }
343 
BVHBase_GetBackpointersDataSize(const BVHBase * p)344     GRL_INLINE uint32_t BVHBase_GetBackpointersDataSize(const BVHBase* p)
345     {
346         return 64u * (p->refitTreeletsDataStart - p->backPointerDataStart);
347     }
348 
BVHBase_GetBVHDataEnd(const BVHBase * p)349     GRL_INLINE uint32_t* BVHBase_GetBVHDataEnd( const BVHBase* p )
350     {
351         return (uint32_t*)(((char*)p) + (size_t)(64u * p->BVHDataEnd));
352     }
353 
BVHBase_HasBackPointers(const BVHBase * p)354     GRL_INLINE bool BVHBase_HasBackPointers( const BVHBase* p )
355     {
356         return p->refitTreeletsDataStart > p->backPointerDataStart;
357     }
358 
BVHBase_GetNumQuads(const BVHBase * p)359     GRL_INLINE const size_t BVHBase_GetNumQuads(const BVHBase* p)
360     {
361         return p->quadLeafCur - p->quadLeafStart;
362     }
363 
BVHBase_GetNumProcedurals(const BVHBase * p)364     GRL_INLINE const size_t BVHBase_GetNumProcedurals(const BVHBase* p)
365     {
366         return p->proceduralDataCur - p->proceduralDataStart;
367     }
368 
BVHBase_GetNumInstances(const BVHBase * p)369     GRL_INLINE const size_t BVHBase_GetNumInstances(const BVHBase* p)
370     {
371         return (p->instanceLeafEnd - p->instanceLeafStart) / 2;
372     }
373 
BVHBase_totalBytes(const BVHBase * p)374     GRL_INLINE const size_t BVHBase_totalBytes(const BVHBase* p)
375     {
376         return p->BVHDataEnd * 64u;
377     }
378 
379 
380 
381     struct HwInstanceLeaf
382     {
383         /* first 64 bytes accessed during traversal */
384         struct Part0
385         {
386             //uint32_t shaderIndex : 24;
387             //uint32_t geomMask : 8;
388             uint32_t DW0;
389 
390             // uint32_t instanceContributionToHitGroupIndex : 24;
391             // uint32_t pad0 : 8
392             //
393             // NOTE:  Traversal shaders are implemented by aliasing instance leaves as procedural and sending them through the procedural path
394             //    For a procedural instance, bit 29 should be set to 1, to disable "opaque culling"
395             //      and bits 30 and 31 must be zero.  See also the definition of the 'PrimLeafDesc' structure
396             uint32_t DW1;
397 
398             //      uint64_t rootNodePtr : 48;
399             //      uint64_t instFlags : 8;
400             //      uint64_t pad1 : 8;
401             uint64_t DW2_DW3;
402 
403             // Vec3f world2obj_vx;   // 1st row of Worl2Obj transform
404             float    world2obj_vx_x;
405             float    world2obj_vx_y;
406             float    world2obj_vx_z;
407 
408             // Vec3f world2obj_vy;   // 2nd row of Worl2Obj transform
409             float    world2obj_vy_x;
410             float    world2obj_vy_y;
411             float    world2obj_vy_z;
412 
413             // Vec3f world2obj_vz;   // 3rd row of Worl2Obj transform
414             float    world2obj_vz_x;
415             float    world2obj_vz_y;
416             float    world2obj_vz_z;
417 
418             // Vec3f obj2world_p;    // translation of Obj2World transform (on purpose in fist 64 bytes)
419             float    obj2world_p_x;
420             float    obj2world_p_y;
421             float    obj2world_p_z;
422         } part0;
423 
424         /* second 64 bytes accessed during shading */
425         // NOTE: Everything in this block is under SW control
426         struct Part1
427         {
428             //      uint64_t bvhPtr : 48;
429             //      uint64_t pad : 16;
430             uint64_t DW0_DW1;
431 
432             uint32_t instanceID;
433             uint32_t instanceIndex;
434 
435             // Vec3f world2obj_vx;   // 1st row of Worl2Obj transform
436             float    obj2world_vx_x;
437             float    obj2world_vx_y;
438             float    obj2world_vx_z;
439 
440             // Vec3f world2obj_vy;   // 2nd row of Worl2Obj transform
441             float    obj2world_vy_x;
442             float    obj2world_vy_y;
443             float    obj2world_vy_z;
444 
445             // Vec3f world2obj_vz;   // 3rd row of Worl2Obj transform
446             float    obj2world_vz_x;
447             float    obj2world_vz_y;
448             float    obj2world_vz_z;
449 
450             // Vec3f obj2world_p;    // translation of Obj2World transform (on purpose in fist 64 bytes)
451             float    world2obj_p_x;
452             float    world2obj_p_y;
453             float    world2obj_p_z;
454         } part1;
455     };
456 
457     __constant const uint64_t c_one = 1ul;
458 
HwInstanceLeaf_GetInstanceMask(const HwInstanceLeaf * p)459     GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceMask( const HwInstanceLeaf* p )
460     {
461         return p->part0.DW0 >> 24;
462     }
463 
HwInstanceLeaf_GetInstanceContributionToHitGroupIndex(const HwInstanceLeaf * p)464     GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceContributionToHitGroupIndex( const HwInstanceLeaf* p )
465     {
466         return p->part0.DW1 & 0x00ffffff;
467     }
468 
HwInstanceLeaf_GetInstanceFlags(const HwInstanceLeaf * p)469     GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceFlags( const HwInstanceLeaf* p )
470     {
471         return (p->part0.DW2_DW3 >> 48) & 0xff;
472     }
HwInstanceLeaf_GetInstanceID(const HwInstanceLeaf * p)473     GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceID( const HwInstanceLeaf* p )
474     {
475         return p->part1.instanceID;
476     }
477 
HwInstanceLeaf_GetBVH(const HwInstanceLeaf * p)478     GRL_INLINE gpuva_t HwInstanceLeaf_GetBVH( const HwInstanceLeaf* p )           { return p->part1.DW0_DW1 & ((c_one << 48) - 1); }
HwInstanceLeaf_GetStartNode(const HwInstanceLeaf * p)479     GRL_INLINE gpuva_t HwInstanceLeaf_GetStartNode( const HwInstanceLeaf* p )     { return p->part0.DW2_DW3 & ((c_one << 48) - 1); }
HwInstanceLeaf_GetInstanceIndex(const HwInstanceLeaf * p)480     GRL_INLINE uint32_t HwInstanceLeaf_GetInstanceIndex( const HwInstanceLeaf* p ) { return p->part1.instanceIndex; }
481 
HwInstanceLeaf_GetTransform(struct HwInstanceLeaf * p,float * transform)482     GRL_INLINE void HwInstanceLeaf_GetTransform(struct HwInstanceLeaf* p, float* transform)
483     {
484         transform[0]  = p->part1.obj2world_vx_x;
485         transform[1]  = p->part1.obj2world_vy_x;
486         transform[2]  = p->part1.obj2world_vz_x;
487         transform[3]  = p->part0.obj2world_p_x;
488         transform[4]  = p->part1.obj2world_vx_y;
489         transform[5]  = p->part1.obj2world_vy_y;
490         transform[6]  = p->part1.obj2world_vz_y;
491         transform[7]  = p->part0.obj2world_p_y;
492         transform[8]  = p->part1.obj2world_vx_z;
493         transform[9]  = p->part1.obj2world_vy_z;
494         transform[10] = p->part1.obj2world_vz_z;
495         transform[11] = p->part0.obj2world_p_z;
496     }
497 
HwInstanceLeaf_SetBVH(HwInstanceLeaf * p,gpuva_t b)498     GRL_INLINE void HwInstanceLeaf_SetBVH( HwInstanceLeaf* p, gpuva_t b ) {
499         uint64_t mask = ((c_one << 48) - 1);
500         uint64_t v = p->part1.DW0_DW1;
501         v = (b & mask) | (v & ~mask);
502         p->part1.DW0_DW1 = v;
503     }
HwInstanceLeaf_SetStartNode(HwInstanceLeaf * p,gpuva_t b)504     GRL_INLINE void HwInstanceLeaf_SetStartNode( HwInstanceLeaf* p, gpuva_t b ) {
505         uint64_t mask = ((c_one << 48) - 1);
506         uint64_t v = p->part0.DW2_DW3;
507         v = (b & mask) | (v & ~mask);
508         p->part0.DW2_DW3 = v;
509     }
HwInstanceLeaf_SetStartNodeAndInstanceFlags(HwInstanceLeaf * p,gpuva_t root,uint8_t flags)510     GRL_INLINE void HwInstanceLeaf_SetStartNodeAndInstanceFlags( HwInstanceLeaf* p,
511                                                              gpuva_t root,
512                                                              uint8_t flags ) {
513         uint64_t mask = ((1ull << 48) - 1);
514         uint64_t v = (root & mask) | ((uint64_t)(flags)<<48);
515         p->part1.DW0_DW1 = v;
516     }
517 
518     struct InternalNode
519     {
520         float lower[3];       // world space origin of quantization grid
521         int32_t childOffset;  // offset to all children in 64B multiples
522 
523         uint8_t nodeType;     // the type of the node
524         uint8_t pad;          // unused byte
525 
526         int8_t exp_x;         // 2^exp_x is the size of the grid in x dimension
527         int8_t exp_y;         // 2^exp_y is the size of the grid in y dimension
528         int8_t exp_z;         // 2^exp_z is the size of the grid in z dimension
529         uint8_t nodeMask;     // mask used for ray filtering
530 
531         struct ChildData
532         {
533             //uint8_t blockIncr : 2; // size of child in 64 byte blocks.   Must be ==2 for instance leaves, <=2 for quad leaves.
534             //uint8_t startPrim : 4; // start primitive in fat leaf mode or child type in mixed mode
535             //uint8_t pad : 2; // unused bits
536             uint8_t bits;
537         } childData[6];
538 
539         uint8_t lower_x[6];  // the quantized lower bounds in x-dimension
540         uint8_t upper_x[6];  // the quantized upper bounds in x-dimension
541         uint8_t lower_y[6];  // the quantized lower bounds in y-dimension
542         uint8_t upper_y[6];  // the quantized upper bounds in y-dimension
543         uint8_t lower_z[6];  // the quantized lower bounds in z-dimension
544         uint8_t upper_z[6];  // the quantized upper bounds in z-dimension
545     };
546 
InternalNode_GetChildBlockIncr(const InternalNode * p,uint idx)547     GRL_INLINE uint InternalNode_GetChildBlockIncr( const InternalNode* p, uint idx )
548     {
549         return p->childData[idx].bits & 3;
550     }
InternalNode_GetChildStartPrim(const InternalNode * p,uint idx)551     GRL_INLINE uint InternalNode_GetChildStartPrim( const InternalNode* p, uint idx )
552     {
553         return (p->childData[idx].bits>>2) & 0xf;
554     }
555 
InternalNode_GetChildType(const InternalNode * p,uint idx)556     GRL_INLINE uint8_t InternalNode_GetChildType( const InternalNode* p, uint idx )
557     {
558         return (p->childData[idx].bits >> 2) & 0xF;
559     }
560 
InternalNode_SetChildType(InternalNode * p,uint idx,uint type)561     GRL_INLINE void InternalNode_SetChildType( InternalNode* p, uint idx, uint type )
562     {
563         uint bits = p->childData[idx].bits;
564         const uint mask = (0xF << 2);
565         bits = ((type << 2) & mask) | (bits & ~mask);
566         p->childData[idx].bits = (uint8_t)bits;
567     }
568 
InternalNode_IsChildValid(const InternalNode * p,size_t child)569     GRL_INLINE bool InternalNode_IsChildValid( const InternalNode* p, size_t child )
570     {
571         bool lower = p->lower_x[child] & 0x80; // invalid nodes are indicated by setting lower_msb = 1 and upper_msb=0
572         bool upper = p->upper_x[child] & 0x80;
573         return !lower || upper;
574     }
575 
InternalNode_GetChildAABB(const InternalNode * node,size_t i)576     GRL_INLINE AABB3f InternalNode_GetChildAABB(const InternalNode* node, size_t i)
577     {
578         float4 lower, upper;
579         const float4 base = { node->lower[0], node->lower[1], node->lower[2], 0.0f };
580         const int4 lower_i = { node->lower_x[i], node->lower_y[i], node->lower_z[i], 0 };
581         const int4 upper_i = { node->upper_x[i], node->upper_y[i], node->upper_z[i], 0 };
582         const int4 exp_i = { node->exp_x, node->exp_y, node->exp_z, 0 };
583         lower = base + bitShiftLdexp4(convert_float4_rtn(lower_i), exp_i - 8);
584         upper = base + bitShiftLdexp4(convert_float4_rtp(upper_i), exp_i - 8);
585         AABB3f aabb3f = {
586             { lower.x, lower.y, lower.z },
587             { upper.x, upper.y, upper.z } };
588         return aabb3f;
589     }
590 
InternalNode_GetChildren(InternalNode * node)591     GRL_INLINE void* InternalNode_GetChildren( InternalNode* node)
592     {
593         return (void*)(((char*)node) + node->childOffset * 64);
594     }
595 
596     typedef struct PrimLeafDesc
597     {
598         //uint32_t shaderIndex : 24;    // shader index used for shader record calculations
599         //uint32_t geomMask : 8;        // geometry mask used for ray masking
600         uint32_t shaderIndex_geomMask;
601 
602         //uint32_t geomIndex : 29;      // the geometry index specifies the n'th geometry of the scene
603         //PrimLeafType type : 1;        // see above
604         //GeometryFlags geomFlags : 2;  // geometry flags of this geometry
605         uint32_t geomIndex_flags;
606     } PrimLeafDesc;
607 
PrimLeaf_GetShaderIndex(const PrimLeafDesc * p)608     GRL_INLINE uint32_t PrimLeaf_GetShaderIndex( const PrimLeafDesc* p )
609     {
610         return p->shaderIndex_geomMask & ((1 << 24) - 1);
611     }
PrimLeaf_GetGeoIndex(const PrimLeafDesc * p)612     GRL_INLINE uint32_t PrimLeaf_GetGeoIndex( const PrimLeafDesc* p )
613     {
614         return p->geomIndex_flags & ((1<<29)-1);
615     }
PrimLeaf_GetGeomFlags(const PrimLeafDesc * p)616     GRL_INLINE uint32_t PrimLeaf_GetGeomFlags( const PrimLeafDesc* p )
617     {
618         return (p->geomIndex_flags >> 30);
619     }
PrimLeaf_GetType(const PrimLeafDesc * p)620     GRL_INLINE uint32_t PrimLeaf_GetType(const PrimLeafDesc* p)
621     {
622         return (p->geomIndex_flags >> 29) & 1;
623     }
624 
625     struct QuadLeaf
626     {
627         PrimLeafDesc leafDesc;
628 
629         uint32_t primIndex0;
630 
631         //uint32_t primIndex1Delta : 16;
632         //uint32_t j0 : 2;
633         //uint32_t j1 : 2;
634         //uint32_t j2 : 2;
635         //uint32_t last : 1; // last quad in list
636         //uint32_t pad : 9;
637         uint32_t DW1;
638 
639         float v[4][3];
640     };
641 
QuadLeaf_GetPrimIndexDelta(const QuadLeaf * p)642     GRL_INLINE uint32_t QuadLeaf_GetPrimIndexDelta( const QuadLeaf* p )
643     {
644         return p->DW1 & 0x0000ffff;
645     }
QuadLeaf_GetPrimIndex0(const QuadLeaf * p)646     GRL_INLINE uint32_t QuadLeaf_GetPrimIndex0( const QuadLeaf* p )
647     {
648         return p->primIndex0;
649     }
QuadLeaf_GetPrimIndex1(const QuadLeaf * p)650     GRL_INLINE uint32_t QuadLeaf_GetPrimIndex1( const QuadLeaf* p )
651     {
652         return p->primIndex0 + QuadLeaf_GetPrimIndexDelta(p);
653     }
QuadLeaf_IsSingleTriangle(const QuadLeaf * p)654     GRL_INLINE bool QuadLeaf_IsSingleTriangle( const QuadLeaf* p )
655     {
656         return QuadLeaf_GetPrimIndexDelta(p) == 0;
657     }
QuadLeaf_GetSecondTriangleIndices(const QuadLeaf * p)658     GRL_INLINE uint32_t QuadLeaf_GetSecondTriangleIndices( const QuadLeaf* p )
659     {
660         return (p->DW1>>16) & 0x3f;
661     }
662 
QuadLeaf_SetVertices(QuadLeaf * quad,float3 v0,float3 v1,float3 v2,float3 v3)663     GRL_INLINE void QuadLeaf_SetVertices( QuadLeaf* quad, float3 v0, float3 v1, float3 v2, float3 v3 )
664     {
665         quad->v[0][0] = v0.x;
666         quad->v[0][1] = v0.y;
667         quad->v[0][2] = v0.z;
668         quad->v[1][0] = v1.x;
669         quad->v[1][1] = v1.y;
670         quad->v[1][2] = v1.z;
671         quad->v[2][0] = v2.x;
672         quad->v[2][1] = v2.y;
673         quad->v[2][2] = v2.z;
674         quad->v[3][0] = v3.x;
675         quad->v[3][1] = v3.y;
676         quad->v[3][2] = v3.z;
677     }
678 
679 
680     struct ProceduralLeaf {
681         PrimLeafDesc leafDesc;
682 
683         // Number of primitives + "last" bits.
684         // The meaning of this section is SW-defined and flexible
685         uint32_t DW1 ;
686         uint32_t _primIndex[13];
687     } ;
688 
689 GRL_NAMESPACE_END(Gen12)
690 GRL_NAMESPACE_END(RTAS)
691 GRL_NAMESPACE_END(GRL)
692