xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/build_refit.grl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8module build_refit;
9
10kernel_module morton_kernels ("bvh_build_refit.cl")
11{
12    links lsc_intrinsics;
13
14    kernel update_instance_leaves    < kernelFunction="update_instance_leaves" >;
15    kernel refit_indirect_sg         < kernelFunction="Refit_indirect_sg" >;
16    kernel update_instance_leaves_indirect    < kernelFunction="update_instance_leaves_indirect" >;
17
18
19}
20
21const INSTANCE_LEAF_GROUP_SIZE = 16;
22const REFIT_GROUP_SIZE = 8;
23
24metakernel update_instance_leaves(
25    qword bvh,
26    qword dxrInstancesArray,
27    qword dxrInstancesPtrArray,
28    qword instance_leaf_aabbs,
29    dword num_instances )
30{
31    define num_groups (num_instances + INSTANCE_LEAF_GROUP_SIZE - 1) / INSTANCE_LEAF_GROUP_SIZE;
32
33    dispatch update_instance_leaves(num_groups, 1, 1) args(
34        bvh,
35        dxrInstancesArray,
36        dxrInstancesPtrArray,
37        instance_leaf_aabbs);
38}
39
40metakernel update_instance_leaves_indirect(
41    qword bvh,
42    qword dxrInstancesArray,
43    qword dxrInstancesPtrArray,
44    qword instance_leaf_aabbs,
45    qword indirectBuildRangeInfo)
46{
47    define num_groups  REG0;
48    define groupsize_1 REG1; // groupsize - 1
49    define C_4         REG2;
50
51    // init with primitiveCount
52    num_groups = load_dword(indirectBuildRangeInfo);
53    groupsize_1 = 15; // INSTANCE_LEAF_GROUP_SIZE - 1
54    C_4 = 4;  // log_2(INSTANCE_LEAF_GROUP_SIZE)
55
56    num_groups = num_groups + groupsize_1;
57    num_groups = num_groups >> C_4; // num_groups / INSTANCE_LEAF_GROUP_SIZE;
58
59    DISPATCHDIM_X = num_groups.lo;
60    DISPATCHDIM_Y = 1;
61    DISPATCHDIM_Z = 1;
62
63    dispatch_indirect update_instance_leaves_indirect args(
64        bvh,
65        dxrInstancesArray,
66        dxrInstancesPtrArray,
67        instance_leaf_aabbs,
68        indirectBuildRangeInfo);
69}
70
71/*
72metakernel refit(
73    qword bvh,
74    qword geomDesc,
75    qword instance_aabbs,
76    dword dispatchSize )
77{
78    define num_groups (dispatchSize + REFIT_GROUP_SIZE - 1) / REFIT_GROUP_SIZE;
79
80    dispatch refit(num_groups, 1, 1) args(
81        bvh,
82        geomDesc,
83        instance_aabbs);
84}
85
86const REFIT_SIMD_SIZE = 8;
87const REFIT_SIMD_SIZE_SHIFT = 3;
88
89metakernel refit_indirect(
90    qword bvh,
91    qword bvh_inner_nodes_start_value,
92    qword bvh_inner_nodes_end,
93    qword geomDesc,
94    qword instance_aabbs )
95{
96    define cRoundingSIMD REG4;
97    define TWO REG3;
98    define ONE REG5;
99    cRoundingSIMD  = (REFIT_SIMD_SIZE - 1);
100
101    TWO = 2;
102    ONE = 1;
103
104    REG0 = bvh_inner_nodes_start_value;
105    REG1 = load_dword(bvh_inner_nodes_end);
106    REG1.hi = 0;
107    REG2 = REG1 - REG0;
108    REG2 = REG2 + cRoundingSIMD;
109    REG2 = REG2 >> TWO;             // JDB:  >>3 must be implemented as >>2 then >>1 because command streamer
110    REG2 = REG2 >> ONE;             //   only supports pow2 shifts because somebody wanted to save area.
111
112    DISPATCHDIM_X = REG2.lo;
113    DISPATCHDIM_Y = 1;
114    DISPATCHDIM_Z = 1;
115
116    dispatch_indirect refit_indirect args(
117        bvh,
118        geomDesc,
119        instance_aabbs);
120
121}
122*/
123
124metakernel refit_indirect_sg(
125    qword bvh,
126    qword bvh_inner_nodes_start_value,
127    qword bvh_inner_nodes_end,
128    qword geomDesc,
129    qword instance_aabbs )
130{
131
132    REG0 = bvh_inner_nodes_start_value;
133    REG1.lo = load_dword(bvh_inner_nodes_end);
134    REG1.hi = 0;
135    REG2 = REG1 - REG0;
136
137    DISPATCHDIM_X = REG2.lo;
138    DISPATCHDIM_Y = 1;
139    DISPATCHDIM_Z = 1;
140
141    dispatch_indirect refit_indirect_sg args(
142        bvh,
143        geomDesc,
144        instance_aabbs);
145
146}
147/*
148////////////////////////////////////////////////////////////////
149// constructing treelets
150// phase 1: mark nodes that will be roots of bottom treelets
151// also for each node leave a number of startpoints that are under it and max depth of the path from the node
152metakernel find_refit_treelets(
153    qword bvh,
154    qword treelet_node_data,
155    qword scratch_startpoints,
156    qword startpointAlloc,
157    qword bvh_inner_nodes_start_value,
158    qword bvh_inner_nodes_end )
159{
160    define cRoundingSIMD REG4;
161    define TWO REG3;
162    define ONE REG5;
163    cRoundingSIMD  = (REFIT_SIMD_SIZE - 1);
164
165    TWO = 2;
166    ONE = 1;
167
168    REG0 = bvh_inner_nodes_start_value;
169    REG1.lo = load_dword(bvh_inner_nodes_end);
170    REG1.hi = 0;
171    REG2 = REG1 - REG0;
172    REG2 = REG2 + cRoundingSIMD;
173    REG2 = REG2 >> TWO;             // JDB:  >>3 must be implemented as >>2 then >>1 because command streamer
174    REG2 = REG2 >> ONE;             //   only supports pow2 shifts because somebody wanted to save area.
175
176    DISPATCHDIM_X = REG2.lo;
177    DISPATCHDIM_Y = 1;
178    DISPATCHDIM_Z = 1;
179
180    dispatch_indirect find_refit_treelets args(
181        bvh,
182        treelet_node_data,
183        scratch_startpoints,
184        startpointAlloc);
185}
186
187
188////////////////////////////////////////////////////////////////
189// constructing treelets
190// phase 2 totally parallel, run threads up to assign startpoints to given treelet
191//
192metakernel assign_refit_startpoints_to_treelets(
193    qword bvh,
194    qword treelet_node_data,
195    qword scratch_startpoints,
196    qword bvh_inner_nodes_start_value,
197    qword bvh_inner_nodes_end )
198{
199    define cRoundingSIMD REG4;
200    define TWO REG3;
201    define ONE REG5;
202    cRoundingSIMD  = (REFIT_SIMD_SIZE - 1);
203
204    TWO = 2;
205    ONE = 1;
206
207    REG0 = bvh_inner_nodes_start_value;
208    REG1.lo = load_dword(bvh_inner_nodes_end);
209    REG1.hi = 0;
210    REG2 = REG1 - REG0;
211    REG2 = REG2 + cRoundingSIMD;
212    REG2 = REG2 >> TWO;             // JDB:  >>3 must be implemented as >>2 then >>1 because command streamer
213    REG2 = REG2 >> ONE;             //   only supports pow2 shifts because somebody wanted to save area.
214
215    DISPATCHDIM_X = REG2.lo;
216    DISPATCHDIM_Y = 1;
217    DISPATCHDIM_Z = 1;
218
219    dispatch_indirect assign_refit_startpoints_to_treelets args(
220        bvh,
221        treelet_node_data,
222        scratch_startpoints);
223}
224
225
226////////////////////////////////////////////////////////////////
227// constructing treelets
228// phase 3 local work: group per treelet, sort the startpoints in treelets ?// by length of the path
229metakernel finalize_treelets_in_groups(
230    qword bvh,
231    qword scratch_startpoints,
232    qword ptrNumTreelets )
233{
234    REG0 = load_qword(ptrNumTreelets);
235
236    DISPATCHDIM_X = REG0.lo;
237    DISPATCHDIM_Y = 1;
238    DISPATCHDIM_Z = 1;
239
240    dispatch_indirect finalize_treelets_in_groups args(
241        bvh,
242        scratch_startpoints);
243}
244
245
246////////////////////////////////////////////////////////////////
247// Updating treelets
248// phase 1 update vertex and generate boxes for vertices
249//
250
251const PER_GROUP_ELEMENTS_ROUNDING = 15;
252const PER_GROUP_ELEMENTS_SHIFT = 4;
253
254metakernel init_treelets_refit(qword pSquashGroupsCountToReset)
255{
256    REG1 = 0;
257    store_qword(pSquashGroupsCountToReset, REG1);
258    DISPATCHDIM_Y = 1;
259    DISPATCHDIM_Z = 1;
260    //REG4 = PER_GROUP_ELEMENTS_SHIFT;
261    //REG5.hi = PER_GROUP_ELEMENTS_ROUNDING;
262    //REG5.lo = 0;
263}
264
265metakernel update_quads(
266    qword scratch_box,
267    qword bvh,
268    qword input,
269    dword numPrimsDividedBy32,
270    qword bigSquashInput)
271{
272    //REG0 = load_qword(quads_nodes_begin_end_pair);
273    //REG1.hi = REG0.lo; // this holds inner nodes begin
274    //REG2 = REG0 - REG1;
275    //REG2 = REG2 + REG5;
276    //REG2 = REG2 >> REG4;
277    //DISPATCHDIM_X = REG2.hi;
278
279    dispatch  refit_quads(numPrimsDividedBy32, 1, 1) args(
280        bvh,
281        input,
282        scratch_box,
283        numPrimsDividedBy32,
284        bigSquashInput );
285}
286
287//
288////////////////////////////////////////////////////////////////
289
290
291////////////////////////////////////////////////////////////////
292//
293// phase 1 or 2 - update primitives as well as bottom up refit internal nodes
294// in single dispatch (in single group per tree)
295metakernel refit_tree_by_group_including_quads(
296    qword squashed_inputs,
297    dword numBuilds
298)
299{
300    dispatch refit_tree_per_group(numBuilds, 1, 1) args(
301        squashed_inputs);
302}
303//
304////////////////////////////////////////////////////////////////
305
306
307////////////////////////////////////////////////////////////////
308//
309// phase 2 bottom up refit internal nodes
310//
311metakernel refit_treelet_per_group(
312    qword bigSquashInput,
313    qword ptrNumTreelets)
314{
315    DISPATCHDIM_X = load_dword(ptrNumTreelets);
316
317    dispatch_indirect refit_treelet_per_group args(
318        bigSquashInput);
319}
320//
321////////////////////////////////////////////////////////////////
322
323#endif
324*/
325