xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/morton_builder.grl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8module morton_builder;
9
10kernel_module morton_kernels ("morton/pre_sort.cl")
11{
12    kernel opencl_build_kernel_init                                     < kernelFunction="init" >;
13    kernel opencl_build_morton_kernel_create_morton_codes_indirect      < kernelFunction="create_morton_codes_indirect" >;
14    kernel opencl_build_morton_kernel_init_bottom_up_indirect           < kernelFunction="init_bottom_up_indirect" >;
15}
16
17kernel_module morton_kernels ("morton/post_sort.cl")
18{
19    links lsc_intrinsics;
20
21    kernel opencl_build_morton_kernel_build_bottom_up_indirect          < kernelFunction="build_bottom_up_indirect" >;
22}
23
24kernel_module morton_kernels ("morton/phase0.cl")
25{
26    links lsc_intrinsics;
27
28    kernel opencl_build_morton_kernel_parallel_build_phase0             < kernelFunction="parallel_build_phase0" >;
29    kernel opencl_build_morton_kernel_parallel_build_phase0_local_sync  < kernelFunction="parallel_build_phase0_local_sync" >;
30}
31
32kernel_module morton_kernels ("morton/phase1.cl")
33{
34    links lsc_intrinsics;
35
36    kernel opencl_build_morton_kernel_parallel_build_phase1_Indirect    < kernelFunction="parallel_build_phase1_Indirect_SG" >;
37    kernel opencl_build_morton_kernel_parallel_build_phase1_root        < kernelFunction="parallel_build_phase1_Indirect_global_root" >;
38}
39
40kernel_module morton_kernels ("morton/phase2.cl")
41{
42    links lsc_intrinsics;
43
44    kernel opencl_build_morton_kernel_parallel_build_phase2_refit       < kernelFunction="parallel_build_phase2_refit" >;
45    kernel opencl_build_morton_kernel_parallel_build_phase2_refit_local < kernelFunction="parallel_build_phase2_refit_local" >;
46}
47
48import struct MKBuilderState "structs.grl";
49
50/*
51metakernel begin(
52    MKBuilderState state,
53    qword morton_code_buffer,
54    dword primLeafType,
55    dword numHwThreads)
56{
57    dispatch opencl_build_kernel_init(1, 1, 1) args(
58        state.build_globals
59        );
60
61    control(wait_idle);
62
63
64    dispatch opencl_build_morton_kernel_create_morton_codes(numHwThreads, 1, 1) args(
65        state.build_globals,
66        state.bvh_buffer,
67        state.build_primref_buffer,
68        morton_code_buffer);
69
70    control(wait_idle);
71
72}
73
74metakernel build_bottom_up(
75    MKBuilderState state,
76    qword buildrecords_bottom_up,
77    qword morton_code_buffer,
78    dword numHwThreads)
79{
80    dispatch opencl_build_morton_kernel_init_bottom_up(numHwThreads, 1, 1) args(
81        state.build_globals,
82        buildrecords_bottom_up);
83
84    control(wait_idle);
85
86    dispatch opencl_build_morton_kernel_build_bottom_up(numHwThreads, 1, 1) args(
87        state.build_globals,
88        buildrecords_bottom_up,
89        morton_code_buffer);
90
91    control(wait_idle);
92
93}
94
95
96metakernel parallel_build(
97    MKBuilderState state,
98    qword buildrecords_bottom_up,
99    qword morton_code_buffer,
100    dword numHwThreads)
101{
102    dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args(
103        state.build_globals,
104        buildrecords_bottom_up,
105        state.bvh_buffer);
106
107    control(wait_idle);
108
109    dispatch opencl_build_morton_kernel_parallel_build_phase1(numHwThreads, 1, 1) args(
110        state.build_globals,
111        morton_code_buffer,
112        state.build_primref_buffer,
113        buildrecords_bottom_up,
114        state.bvh_buffer);
115
116   control(wait_idle);
117
118}
119
120*/
121
122metakernel NewMorton_pre_sort(
123    qword num_primrefs_counter,
124    MKBuilderState state,
125    qword morton_code_buffer,
126    qword morton_code_buffer_tmp,
127    qword buildrecords_bottom_up,
128    dword use_new_morton_sort)
129{
130
131
132    {
133        REG1 = 15;
134        REG2 = 4;
135        REG0 = load_dword( num_primrefs_counter );
136
137        REG0 = REG0 + REG1;     // JDB TODO:  TGL will need to do this computation in the EU and store it in globals
138        REG1 = ~REG1;
139        REG0 = REG0 & REG1;
140        REG0 = REG0 >> REG2;
141    }
142
143    dispatch opencl_build_kernel_init(1, 1, 1) args( state.build_globals );
144
145    DISPATCHDIM_X = REG0.lo;
146    DISPATCHDIM_Y = 1;
147    DISPATCHDIM_Z = 1;
148
149    /*
150    // new bottom-up kernel does not need this
151    dispatch_indirect opencl_build_morton_kernel_init_bottom_up_indirect args(
152        state.build_globals,
153        buildrecords_bottom_up);
154        */
155    dispatch_indirect opencl_build_morton_kernel_create_morton_codes_indirect args(
156        state.build_globals,
157        state.bvh_buffer,
158        state.build_primref_buffer,
159        morton_code_buffer,
160        morton_code_buffer_tmp,
161        use_new_morton_sort);
162
163
164}
165
166
167
168metakernel NewMorton_post_sort(
169    qword num_primrefs_counter,
170    qword num_buildrecords_counter,
171    MKBuilderState state,
172    qword buildrecords_bottom_up,
173    qword morton_code_buffer )
174{
175
176    {
177        REG1 = 15;
178        REG2 = 4;
179        REG0 = load_dword( num_primrefs_counter );
180
181        REG0 = REG0 + REG1;     // JDB TODO:  TGL will need to do this computation in the EU and store it in globals
182        REG1 = ~REG1;
183        REG0 = REG0 & REG1;
184        REG0 = REG0 >> REG2;
185    }
186
187    DISPATCHDIM_X = REG0.lo;
188    DISPATCHDIM_Y = 1;
189    DISPATCHDIM_Z = 1;
190
191    dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args(
192        state.build_globals,
193        buildrecords_bottom_up,
194        morton_code_buffer);
195
196
197    /*
198   dispatch opencl_build_morton_kernel_build_bottom_up(16, 1, 1) args(
199        state.build_globals,
200        buildrecords_bottom_up,
201        morton_code_buffer);
202        */
203
204    control(wait_idle);
205
206    dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args(
207        state.build_globals,
208        buildrecords_bottom_up,
209        state.bvh_buffer);
210
211    control(wait_idle);
212
213    DISPATCHDIM_X = load_dword( num_buildrecords_counter );
214
215    dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args(
216        state.build_globals,
217        morton_code_buffer,
218        state.build_primref_buffer,
219        buildrecords_bottom_up,
220        state.bvh_buffer);
221
222   control(wait_idle);
223
224}
225
226metakernel NewMorton_bottom_up(
227    qword num_primrefs_counter,
228    MKBuilderState state,
229    qword buildrecords_bottom_up,
230    qword morton_code_buffer )
231{
232
233    {
234        REG1 = 15;
235        REG2 = 4;
236        REG0 = load_dword( num_primrefs_counter );
237
238        REG0 = REG0 + REG1;     // JDB TODO:  TGL will need to do this computation in the EU and store it in globals
239        REG1 = ~REG1;
240        REG0 = REG0 & REG1;
241        REG0 = REG0 >> REG2;
242    }
243
244    DISPATCHDIM_X = REG0.lo;
245    DISPATCHDIM_Y = 1;
246    DISPATCHDIM_Z = 1;
247
248    dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args(
249        state.build_globals,
250        buildrecords_bottom_up,
251        morton_code_buffer);
252}
253
254
255metakernel NewMorton_phase0(
256    MKBuilderState state,
257    qword buildrecords_bottom_up,
258    qword morton_p0_refit_startpoints)
259{
260
261    dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args(
262        state.build_globals,
263        buildrecords_bottom_up,
264        state.bvh_buffer,
265        morton_p0_refit_startpoints);
266}
267
268metakernel NewMorton_phase0_local_sync(
269    MKBuilderState state,
270    qword buildrecords_bottom_up,
271    qword p0_boxless_nodes)
272{
273
274    dispatch opencl_build_morton_kernel_parallel_build_phase0_local_sync(1, 1, 1) args(
275        state.build_globals,
276        buildrecords_bottom_up,
277        state.bvh_buffer,
278        p0_boxless_nodes);
279}
280
281
282metakernel NewMorton_phase1(
283    qword num_buildrecords_counter,
284    MKBuilderState state,
285    qword buildrecords_bottom_up,
286    qword morton_code_buffer)
287{
288
289    DISPATCHDIM_X = load_dword( num_buildrecords_counter );
290
291    dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args(
292        state.build_globals,
293        morton_code_buffer,
294        state.build_primref_buffer,
295        buildrecords_bottom_up,
296        state.bvh_buffer);
297}
298
299metakernel NewMorton_phase1_root(
300    qword num_buildrecords_counter,
301    MKBuilderState state,
302    qword buildrecords_bottom_up,
303    qword morton_code_buffer)
304{
305    dispatch opencl_build_morton_kernel_parallel_build_phase1_root(1, 1, 1) args(
306        state.build_globals,
307        morton_code_buffer,
308        state.build_primref_buffer,
309        buildrecords_bottom_up,
310        state.bvh_buffer);
311}
312
313metakernel NewMorton_phase2(
314    qword num_leaves_counter,
315    MKBuilderState state,
316    qword bottom_node_ids )
317{
318
319    DISPATCHDIM_X = load_dword( num_leaves_counter );
320
321    dispatch_indirect opencl_build_morton_kernel_parallel_build_phase2_refit args(
322        state.bvh_buffer,
323        bottom_node_ids);
324}
325
326metakernel NewMorton_phase2_local(
327    MKBuilderState state,
328    qword p0_boxless_nodes)
329{
330
331    dispatch opencl_build_morton_kernel_parallel_build_phase2_refit_local(1, 1, 1) args(
332        state.build_globals,
333        state.bvh_buffer,
334        p0_boxless_nodes);
335}
336