xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/radix_sort.grl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8module radix_sort;
9
10kernel_module radix_kernels ("morton_radix_sort.cl")
11{
12    links lsc_intrinsics;
13    kernel opencl_build_morton_kernel_sort_bin_items              < kernelFunction="sort_morton_codes_bin_items">;
14    kernel opencl_build_morton_kernel_sort_reduce_bins            < kernelFunction="sort_morton_codes_reduce_bins">;
15    kernel opencl_build_morton_kernel_sort_scatter_items          < kernelFunction="sort_morton_codes_scatter_items">;
16
17    kernel opencl_build_morton_codes_sort_merged                  < kernelFunction="sort_morton_codes_merged">;
18
19    kernel opencl_build_morton_kernel_sort_reduce_bins_wide_partial_sum   < kernelFunction="sort_morton_codes_reduce_bins_wide_partial_sum">;
20    kernel opencl_build_morton_kernel_sort_reduce_bins_wide_add_reduce    < kernelFunction="sort_morton_codes_reduce_bins_wide_add_reduce">;
21}
22
23metakernel sort(
24    qword build_globals,
25    dword shift,
26    qword global_histogram,
27    qword input0,
28    qword input1,
29    dword input0_offset,
30    dword input1_offset,
31    dword iteration,
32    dword threads)
33{
34    dispatch opencl_build_morton_kernel_sort_bin_items (threads, 1, 1) args(
35        build_globals,
36        shift,
37        global_histogram,
38        input0,
39        input1,
40        input0_offset,
41        input1_offset,
42        iteration);
43
44    control(wait_idle);
45
46    dispatch opencl_build_morton_kernel_sort_reduce_bins (1, 1, 1) args(
47        threads,
48        global_histogram);
49
50    control(wait_idle);
51
52    dispatch opencl_build_morton_kernel_sort_scatter_items (threads, 1, 1) args(
53        build_globals,
54        shift,
55        global_histogram,
56        input0,
57        input1,
58        input0_offset,
59        input1_offset,
60        iteration);
61
62        control(wait_idle);
63
64}
65
66metakernel sort_bin_items(
67    qword build_globals,
68    qword global_histogram,
69    qword wg_flags,
70    qword input0,
71    dword iteration,
72    dword threads,
73    dword update_wg_flags
74    )
75{
76    dispatch opencl_build_morton_kernel_sort_bin_items (threads, 1, 1) args(
77        build_globals,
78        global_histogram,
79        wg_flags,
80        input0,
81        iteration,
82        threads,
83        update_wg_flags
84    );
85}
86
87metakernel sort_reduce_bins(
88    qword build_globals,
89    qword global_histogram,
90    dword threads,
91    dword iteration)
92{
93    dispatch opencl_build_morton_kernel_sort_reduce_bins (1, 1, 1) args(
94        build_globals,
95        threads,
96        global_histogram,
97        iteration);
98}
99
100metakernel sort_scatter_items(
101    qword build_globals,
102    qword global_histogram,
103    qword input0,
104    qword input1,
105    dword iteration,
106    dword threads,
107    dword update_morton_sort_in_flight )
108{
109    dispatch opencl_build_morton_kernel_sort_scatter_items( threads, 1, 1 ) args(
110        build_globals,
111        global_histogram,
112        input0,
113        input1,
114        iteration,
115        threads,
116        update_morton_sort_in_flight
117    );
118}
119
120metakernel sort_bin_items_merged(
121    qword build_globals,
122    qword global_histogram,
123    qword input0,
124    dword iteration,
125    dword threads)
126{
127    dispatch opencl_build_morton_codes_sort_merged (threads, 1, 1) args(
128        build_globals,
129        global_histogram,
130        input0,
131        iteration,
132        threads
133    );
134}
135
136metakernel sort_reduce_bins_wide(
137    qword build_globals,
138    qword global_histogram,
139    qword global_histogram_tmp,
140    qword wg_flags,
141    dword threads,
142    dword threads_groups,
143    dword iteration)
144{
145    dispatch opencl_build_morton_kernel_sort_reduce_bins_wide_partial_sum(threads_groups, 1, 1) args(
146        build_globals,
147        threads,
148        threads_groups,
149        global_histogram,
150        global_histogram_tmp,
151        wg_flags,
152        iteration);
153
154    control(wait_idle);
155
156    dispatch opencl_build_morton_kernel_sort_reduce_bins_wide_add_reduce(threads_groups, 1, 1) args(
157        build_globals,
158        threads,
159        threads_groups,
160        global_histogram,
161        global_histogram_tmp,
162        iteration);
163}
164