xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/instance.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7 
8 #pragma once
9 
10 #include "shared.h"
11 #include "affinespace.h"
12 #include "api_interface.h"
13 #include "qbvh6.h"
14 #include "libs/lsc_intrinsics.h"
15 
HwInstanceLeafPart1_getInstanceIndex(struct HwInstanceLeaf * I)16 GRL_INLINE uint32_t HwInstanceLeafPart1_getInstanceIndex(struct HwInstanceLeaf *I)
17 {
18     return I->part1.instanceIndex;
19 }
20 
encodeDW0_HwInstanceLeafPart0(uint32_t shaderIndex,uint32_t geomMask,uint4 * dst)21 GRL_INLINE void encodeDW0_HwInstanceLeafPart0(
22     uint32_t shaderIndex,
23     uint32_t geomMask,
24     uint4 *dst)
25 {
26     (*dst).x = (shaderIndex & ((1 << 24) - 1)) |
27              (geomMask << 24);
28 }
29 
encodeDW1_HwInstanceLeafPart0(uint32_t instanceContributionToHitGroupIndex,uint32_t notProcedural,uint32_t geomFlags,uint4 * dst)30 GRL_INLINE void encodeDW1_HwInstanceLeafPart0(
31     uint32_t instanceContributionToHitGroupIndex,
32     uint32_t notProcedural,
33     uint32_t geomFlags,
34     uint4* dst)
35 {
36     (*dst).y = (instanceContributionToHitGroupIndex & ((1 << 24) - 1)) |
37         ((notProcedural & 1) << (24 + 5)) |
38         ((geomFlags & 3) << (24 + 5 + 1));
39 }
40 
encodeDW2DW3_HwInstanceLeafPart0(uint64_t rootNodePtr,uint32_t instFlags,uint4 * dst)41 GRL_INLINE void encodeDW2DW3_HwInstanceLeafPart0(
42     uint64_t rootNodePtr,
43     uint32_t instFlags,
44     uint4* dst)
45 {
46     uint64_t flags = instFlags;
47     uint DW2 = (uint)rootNodePtr;
48     uint DW3 = ((uint)(rootNodePtr >> 32ul) & 0xffff);
49     DW3 |= flags << 16ull;
50     (*dst).z = DW2;
51     (*dst).w = DW3;
52 }
53 
HwInstanceLeafPart0_setDW0(struct HwInstanceLeaf * I,uint32_t shaderIndex,uint32_t geomMask)54 GRL_INLINE void HwInstanceLeafPart0_setDW0(struct HwInstanceLeaf *I,
55                                        uint32_t shaderIndex,
56                                        uint32_t geomMask)
57 {
58     I->part0.DW0 =
59         (shaderIndex & ((1 << 24) - 1)) |
60         (geomMask << 24);
61 }
62 
HwInstanceLeafPart0_setDW1(struct HwInstanceLeaf * I,uint32_t instanceContributionToHitGroupIndex,uint32_t notProcedural,uint32_t geomFlags)63 GRL_INLINE void HwInstanceLeafPart0_setDW1(struct HwInstanceLeaf *I,
64                                        uint32_t instanceContributionToHitGroupIndex,
65                                        uint32_t notProcedural,
66                                        uint32_t geomFlags)
67 {
68     I->part0.DW1 =
69         (instanceContributionToHitGroupIndex & ((1 << 24) - 1)) |
70         ((notProcedural & 1) << (24 + 5)) |
71         ((geomFlags & 3) << (24 + 5 + 1));
72 }
73 
HwInstanceLeafPart1_setDW0DW1(struct HwInstanceLeaf * I,global char * pBvhPtr)74 GRL_INLINE void HwInstanceLeafPart1_setDW0DW1(struct HwInstanceLeaf *I,
75                                           global char *pBvhPtr)
76 {
77     I->part1.DW0_DW1 = ((uint64_t)pBvhPtr) & (((uint64_t)1 << 48) - 1);
78 }
79 
HwInstanceLeafPart0_setDW2DW3(struct HwInstanceLeaf * I,uint64_t rootNodePtr,uint32_t instFlags)80 GRL_INLINE void HwInstanceLeafPart0_setDW2DW3(struct HwInstanceLeaf *I,
81                                           uint64_t rootNodePtr,
82                                           uint32_t instFlags)
83 {
84     uint64_t flags = instFlags;
85     flags = flags << 48ull;
86     uint64_t ptr = rootNodePtr & 0x0000ffffffffffff;
87     I->part0.DW2_DW3 = ptr + flags;
88 }
89 
HwInstanceLeaf_Constructor(global struct HwInstanceLeaf * leaf,global const struct GRL_RAYTRACING_INSTANCE_DESC * instDesc,uint instanceIndex,uint rootNodeByteOffset,uint instanceMask)90 GRL_INLINE void HwInstanceLeaf_Constructor(global struct HwInstanceLeaf* leaf,
91     global const struct GRL_RAYTRACING_INSTANCE_DESC* instDesc,
92     uint instanceIndex,
93     uint rootNodeByteOffset,
94     uint instanceMask)
95 {
96     global uint4* InstanceLeaf_4DWparts = (global uint4*) (leaf);
97 
98     struct AffineSpace3f obj2world = AffineSpace3f_load_row_major(instDesc->Transform);
99 
100     qword accStructPtr = (qword)instDesc->AccelerationStructure;
101     uint4 p1_DW0_3 = (uint4)(
102         (uint)accStructPtr,
103         (uint)(accStructPtr >> (uint64_t)32),
104         GRL_get_instanceID(instDesc),
105         instanceIndex);
106 
107     struct AffineSpace3f world2obj = AffineSpace3f_invert(obj2world);
108 
109     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 0 /*part1 + 0DW*/, p1_DW0_3);
110 
111     uint4 p1_DW4_7 = (uint4)(
112         as_uint(obj2world.l.vx.x),
113         as_uint(obj2world.l.vx.y),
114         as_uint(obj2world.l.vx.z),
115         as_uint(obj2world.l.vy.x));
116 
117     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 1 /*part1 + 4DW*/, p1_DW4_7);
118 
119     uint4 p1_DW8_11 = (uint4)(
120         as_uint(obj2world.l.vy.y),
121         as_uint(obj2world.l.vy.z),
122         as_uint(obj2world.l.vz.x),
123         as_uint(obj2world.l.vz.y));
124 
125     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 2 /*part1 + 8DW*/, p1_DW8_11);
126 
127 
128     uint4 p1_DW12_15 = (uint4)(
129         as_uint(obj2world.l.vz.z),
130         as_uint(world2obj.p.x),
131         as_uint(world2obj.p.y),
132         as_uint(world2obj.p.z));
133 
134     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 3 /*part1 + 12DW*/, p1_DW12_15);
135 
136 
137     uint hit_group_index = GRL_get_InstanceContributionToHitGroupIndex(instDesc);
138     global struct BVHBase* bvh = (global struct BVHBase*)instDesc->AccelerationStructure;
139 
140     uint4 p0_DW0_3;
141 
142     encodeDW0_HwInstanceLeafPart0(
143         hit_group_index,
144         instanceMask,
145         &p0_DW0_3);
146 
147     encodeDW1_HwInstanceLeafPart0(
148         hit_group_index, // for HW instance leaf, this field is used to offset the hit-group index
149         1,  // disable opaque culling.. Necessary for SW instancing.. don't-care for HW instancing
150         0,
151         &p0_DW0_3);
152 
153     encodeDW2DW3_HwInstanceLeafPart0(
154         rootNodeByteOffset == NO_NODE_OFFSET ? 0 : ((uint64_t)bvh) + rootNodeByteOffset, // offset NO_NODE_OFFSET is for degenerated instance, put null as root pointer
155         GRL_get_InstanceFlags(instDesc),
156         &p0_DW0_3);
157 
158     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 0 /*part0 + 0DW*/, p0_DW0_3);
159 
160     uint4 p0_DW4_7 = (uint4)(
161         as_uint(world2obj.l.vx.x),
162         as_uint(world2obj.l.vx.y),
163         as_uint(world2obj.l.vx.z),
164         as_uint(world2obj.l.vy.x));
165 
166     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 1 /*part0 + 4DW*/, p0_DW4_7);
167 
168     uint4 p0_DW8_11 = (uint4)(
169         as_uint(world2obj.l.vy.y),
170         as_uint(world2obj.l.vy.z),
171         as_uint(world2obj.l.vz.x),
172         as_uint(world2obj.l.vz.y));
173 
174     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 2 /*part0 + 8DW*/, p0_DW8_11);
175 
176     uint4 p0_DW12_15 = (uint4)(
177         as_uint(world2obj.l.vz.z),
178         as_uint(obj2world.p.x),
179         as_uint(obj2world.p.y),
180         as_uint(obj2world.p.z));
181 
182     store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 3 /*part0 + 12DW*/, p0_DW12_15);
183 }
184