1 //
2 // Copyright (C) 2009-2021 Intel Corporation
3 //
4 // SPDX-License-Identifier: MIT
5 //
6 //
7
8 #pragma once
9
10 #include "shared.h"
11 #include "affinespace.h"
12 #include "api_interface.h"
13 #include "qbvh6.h"
14 #include "libs/lsc_intrinsics.h"
15
HwInstanceLeafPart1_getInstanceIndex(struct HwInstanceLeaf * I)16 GRL_INLINE uint32_t HwInstanceLeafPart1_getInstanceIndex(struct HwInstanceLeaf *I)
17 {
18 return I->part1.instanceIndex;
19 }
20
encodeDW0_HwInstanceLeafPart0(uint32_t shaderIndex,uint32_t geomMask,uint4 * dst)21 GRL_INLINE void encodeDW0_HwInstanceLeafPart0(
22 uint32_t shaderIndex,
23 uint32_t geomMask,
24 uint4 *dst)
25 {
26 (*dst).x = (shaderIndex & ((1 << 24) - 1)) |
27 (geomMask << 24);
28 }
29
encodeDW1_HwInstanceLeafPart0(uint32_t instanceContributionToHitGroupIndex,uint32_t notProcedural,uint32_t geomFlags,uint4 * dst)30 GRL_INLINE void encodeDW1_HwInstanceLeafPart0(
31 uint32_t instanceContributionToHitGroupIndex,
32 uint32_t notProcedural,
33 uint32_t geomFlags,
34 uint4* dst)
35 {
36 (*dst).y = (instanceContributionToHitGroupIndex & ((1 << 24) - 1)) |
37 ((notProcedural & 1) << (24 + 5)) |
38 ((geomFlags & 3) << (24 + 5 + 1));
39 }
40
encodeDW2DW3_HwInstanceLeafPart0(uint64_t rootNodePtr,uint32_t instFlags,uint4 * dst)41 GRL_INLINE void encodeDW2DW3_HwInstanceLeafPart0(
42 uint64_t rootNodePtr,
43 uint32_t instFlags,
44 uint4* dst)
45 {
46 uint64_t flags = instFlags;
47 uint DW2 = (uint)rootNodePtr;
48 uint DW3 = ((uint)(rootNodePtr >> 32ul) & 0xffff);
49 DW3 |= flags << 16ull;
50 (*dst).z = DW2;
51 (*dst).w = DW3;
52 }
53
HwInstanceLeafPart0_setDW0(struct HwInstanceLeaf * I,uint32_t shaderIndex,uint32_t geomMask)54 GRL_INLINE void HwInstanceLeafPart0_setDW0(struct HwInstanceLeaf *I,
55 uint32_t shaderIndex,
56 uint32_t geomMask)
57 {
58 I->part0.DW0 =
59 (shaderIndex & ((1 << 24) - 1)) |
60 (geomMask << 24);
61 }
62
HwInstanceLeafPart0_setDW1(struct HwInstanceLeaf * I,uint32_t instanceContributionToHitGroupIndex,uint32_t notProcedural,uint32_t geomFlags)63 GRL_INLINE void HwInstanceLeafPart0_setDW1(struct HwInstanceLeaf *I,
64 uint32_t instanceContributionToHitGroupIndex,
65 uint32_t notProcedural,
66 uint32_t geomFlags)
67 {
68 I->part0.DW1 =
69 (instanceContributionToHitGroupIndex & ((1 << 24) - 1)) |
70 ((notProcedural & 1) << (24 + 5)) |
71 ((geomFlags & 3) << (24 + 5 + 1));
72 }
73
HwInstanceLeafPart1_setDW0DW1(struct HwInstanceLeaf * I,global char * pBvhPtr)74 GRL_INLINE void HwInstanceLeafPart1_setDW0DW1(struct HwInstanceLeaf *I,
75 global char *pBvhPtr)
76 {
77 I->part1.DW0_DW1 = ((uint64_t)pBvhPtr) & (((uint64_t)1 << 48) - 1);
78 }
79
HwInstanceLeafPart0_setDW2DW3(struct HwInstanceLeaf * I,uint64_t rootNodePtr,uint32_t instFlags)80 GRL_INLINE void HwInstanceLeafPart0_setDW2DW3(struct HwInstanceLeaf *I,
81 uint64_t rootNodePtr,
82 uint32_t instFlags)
83 {
84 uint64_t flags = instFlags;
85 flags = flags << 48ull;
86 uint64_t ptr = rootNodePtr & 0x0000ffffffffffff;
87 I->part0.DW2_DW3 = ptr + flags;
88 }
89
HwInstanceLeaf_Constructor(global struct HwInstanceLeaf * leaf,global const struct GRL_RAYTRACING_INSTANCE_DESC * instDesc,uint instanceIndex,uint rootNodeByteOffset,uint instanceMask)90 GRL_INLINE void HwInstanceLeaf_Constructor(global struct HwInstanceLeaf* leaf,
91 global const struct GRL_RAYTRACING_INSTANCE_DESC* instDesc,
92 uint instanceIndex,
93 uint rootNodeByteOffset,
94 uint instanceMask)
95 {
96 global uint4* InstanceLeaf_4DWparts = (global uint4*) (leaf);
97
98 struct AffineSpace3f obj2world = AffineSpace3f_load_row_major(instDesc->Transform);
99
100 qword accStructPtr = (qword)instDesc->AccelerationStructure;
101 uint4 p1_DW0_3 = (uint4)(
102 (uint)accStructPtr,
103 (uint)(accStructPtr >> (uint64_t)32),
104 GRL_get_instanceID(instDesc),
105 instanceIndex);
106
107 struct AffineSpace3f world2obj = AffineSpace3f_invert(obj2world);
108
109 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 0 /*part1 + 0DW*/, p1_DW0_3);
110
111 uint4 p1_DW4_7 = (uint4)(
112 as_uint(obj2world.l.vx.x),
113 as_uint(obj2world.l.vx.y),
114 as_uint(obj2world.l.vx.z),
115 as_uint(obj2world.l.vy.x));
116
117 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 1 /*part1 + 4DW*/, p1_DW4_7);
118
119 uint4 p1_DW8_11 = (uint4)(
120 as_uint(obj2world.l.vy.y),
121 as_uint(obj2world.l.vy.z),
122 as_uint(obj2world.l.vz.x),
123 as_uint(obj2world.l.vz.y));
124
125 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 2 /*part1 + 8DW*/, p1_DW8_11);
126
127
128 uint4 p1_DW12_15 = (uint4)(
129 as_uint(obj2world.l.vz.z),
130 as_uint(world2obj.p.x),
131 as_uint(world2obj.p.y),
132 as_uint(world2obj.p.z));
133
134 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 4 + 3 /*part1 + 12DW*/, p1_DW12_15);
135
136
137 uint hit_group_index = GRL_get_InstanceContributionToHitGroupIndex(instDesc);
138 global struct BVHBase* bvh = (global struct BVHBase*)instDesc->AccelerationStructure;
139
140 uint4 p0_DW0_3;
141
142 encodeDW0_HwInstanceLeafPart0(
143 hit_group_index,
144 instanceMask,
145 &p0_DW0_3);
146
147 encodeDW1_HwInstanceLeafPart0(
148 hit_group_index, // for HW instance leaf, this field is used to offset the hit-group index
149 1, // disable opaque culling.. Necessary for SW instancing.. don't-care for HW instancing
150 0,
151 &p0_DW0_3);
152
153 encodeDW2DW3_HwInstanceLeafPart0(
154 rootNodeByteOffset == NO_NODE_OFFSET ? 0 : ((uint64_t)bvh) + rootNodeByteOffset, // offset NO_NODE_OFFSET is for degenerated instance, put null as root pointer
155 GRL_get_InstanceFlags(instDesc),
156 &p0_DW0_3);
157
158 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 0 /*part0 + 0DW*/, p0_DW0_3);
159
160 uint4 p0_DW4_7 = (uint4)(
161 as_uint(world2obj.l.vx.x),
162 as_uint(world2obj.l.vx.y),
163 as_uint(world2obj.l.vx.z),
164 as_uint(world2obj.l.vy.x));
165
166 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 1 /*part0 + 4DW*/, p0_DW4_7);
167
168 uint4 p0_DW8_11 = (uint4)(
169 as_uint(world2obj.l.vy.y),
170 as_uint(world2obj.l.vy.z),
171 as_uint(world2obj.l.vz.x),
172 as_uint(world2obj.l.vz.y));
173
174 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 2 /*part0 + 8DW*/, p0_DW8_11);
175
176 uint4 p0_DW12_15 = (uint4)(
177 as_uint(world2obj.l.vz.z),
178 as_uint(obj2world.p.x),
179 as_uint(obj2world.p.y),
180 as_uint(obj2world.p.z));
181
182 store_uint4_L1S_L3WB(InstanceLeaf_4DWparts, 3 /*part0 + 12DW*/, p0_DW12_15);
183 }
184