1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module radix_sort; 9 10kernel_module radix_kernels ("morton_radix_sort.cl") 11{ 12 links lsc_intrinsics; 13 kernel opencl_build_morton_kernel_sort_bin_items < kernelFunction="sort_morton_codes_bin_items">; 14 kernel opencl_build_morton_kernel_sort_reduce_bins < kernelFunction="sort_morton_codes_reduce_bins">; 15 kernel opencl_build_morton_kernel_sort_scatter_items < kernelFunction="sort_morton_codes_scatter_items">; 16 17 kernel opencl_build_morton_codes_sort_merged < kernelFunction="sort_morton_codes_merged">; 18 19 kernel opencl_build_morton_kernel_sort_reduce_bins_wide_partial_sum < kernelFunction="sort_morton_codes_reduce_bins_wide_partial_sum">; 20 kernel opencl_build_morton_kernel_sort_reduce_bins_wide_add_reduce < kernelFunction="sort_morton_codes_reduce_bins_wide_add_reduce">; 21} 22 23metakernel sort( 24 qword build_globals, 25 dword shift, 26 qword global_histogram, 27 qword input0, 28 qword input1, 29 dword input0_offset, 30 dword input1_offset, 31 dword iteration, 32 dword threads) 33{ 34 dispatch opencl_build_morton_kernel_sort_bin_items (threads, 1, 1) args( 35 build_globals, 36 shift, 37 global_histogram, 38 input0, 39 input1, 40 input0_offset, 41 input1_offset, 42 iteration); 43 44 control(wait_idle); 45 46 dispatch opencl_build_morton_kernel_sort_reduce_bins (1, 1, 1) args( 47 threads, 48 global_histogram); 49 50 control(wait_idle); 51 52 dispatch opencl_build_morton_kernel_sort_scatter_items (threads, 1, 1) args( 53 build_globals, 54 shift, 55 global_histogram, 56 input0, 57 input1, 58 input0_offset, 59 input1_offset, 60 iteration); 61 62 control(wait_idle); 63 64} 65 66metakernel sort_bin_items( 67 qword build_globals, 68 qword global_histogram, 69 qword wg_flags, 70 qword input0, 71 dword iteration, 72 dword threads, 73 dword update_wg_flags 74 ) 75{ 76 dispatch opencl_build_morton_kernel_sort_bin_items (threads, 1, 1) args( 77 build_globals, 78 global_histogram, 79 wg_flags, 80 input0, 81 iteration, 82 threads, 83 update_wg_flags 84 ); 85} 86 87metakernel sort_reduce_bins( 88 qword build_globals, 89 qword global_histogram, 90 dword threads, 91 dword iteration) 92{ 93 dispatch opencl_build_morton_kernel_sort_reduce_bins (1, 1, 1) args( 94 build_globals, 95 threads, 96 global_histogram, 97 iteration); 98} 99 100metakernel sort_scatter_items( 101 qword build_globals, 102 qword global_histogram, 103 qword input0, 104 qword input1, 105 dword iteration, 106 dword threads, 107 dword update_morton_sort_in_flight ) 108{ 109 dispatch opencl_build_morton_kernel_sort_scatter_items( threads, 1, 1 ) args( 110 build_globals, 111 global_histogram, 112 input0, 113 input1, 114 iteration, 115 threads, 116 update_morton_sort_in_flight 117 ); 118} 119 120metakernel sort_bin_items_merged( 121 qword build_globals, 122 qword global_histogram, 123 qword input0, 124 dword iteration, 125 dword threads) 126{ 127 dispatch opencl_build_morton_codes_sort_merged (threads, 1, 1) args( 128 build_globals, 129 global_histogram, 130 input0, 131 iteration, 132 threads 133 ); 134} 135 136metakernel sort_reduce_bins_wide( 137 qword build_globals, 138 qword global_histogram, 139 qword global_histogram_tmp, 140 qword wg_flags, 141 dword threads, 142 dword threads_groups, 143 dword iteration) 144{ 145 dispatch opencl_build_morton_kernel_sort_reduce_bins_wide_partial_sum(threads_groups, 1, 1) args( 146 build_globals, 147 threads, 148 threads_groups, 149 global_histogram, 150 global_histogram_tmp, 151 wg_flags, 152 iteration); 153 154 control(wait_idle); 155 156 dispatch opencl_build_morton_kernel_sort_reduce_bins_wide_add_reduce(threads_groups, 1, 1) args( 157 build_globals, 158 threads, 159 threads_groups, 160 global_histogram, 161 global_histogram_tmp, 162 iteration); 163} 164