1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module morton_builder; 9 10kernel_module morton_kernels ("morton/pre_sort.cl") 11{ 12 kernel opencl_build_kernel_init < kernelFunction="init" >; 13 kernel opencl_build_morton_kernel_create_morton_codes_indirect < kernelFunction="create_morton_codes_indirect" >; 14 kernel opencl_build_morton_kernel_init_bottom_up_indirect < kernelFunction="init_bottom_up_indirect" >; 15} 16 17kernel_module morton_kernels ("morton/post_sort.cl") 18{ 19 links lsc_intrinsics; 20 21 kernel opencl_build_morton_kernel_build_bottom_up_indirect < kernelFunction="build_bottom_up_indirect" >; 22} 23 24kernel_module morton_kernels ("morton/phase0.cl") 25{ 26 links lsc_intrinsics; 27 28 kernel opencl_build_morton_kernel_parallel_build_phase0 < kernelFunction="parallel_build_phase0" >; 29 kernel opencl_build_morton_kernel_parallel_build_phase0_local_sync < kernelFunction="parallel_build_phase0_local_sync" >; 30} 31 32kernel_module morton_kernels ("morton/phase1.cl") 33{ 34 links lsc_intrinsics; 35 36 kernel opencl_build_morton_kernel_parallel_build_phase1_Indirect < kernelFunction="parallel_build_phase1_Indirect_SG" >; 37 kernel opencl_build_morton_kernel_parallel_build_phase1_root < kernelFunction="parallel_build_phase1_Indirect_global_root" >; 38} 39 40kernel_module morton_kernels ("morton/phase2.cl") 41{ 42 links lsc_intrinsics; 43 44 kernel opencl_build_morton_kernel_parallel_build_phase2_refit < kernelFunction="parallel_build_phase2_refit" >; 45 kernel opencl_build_morton_kernel_parallel_build_phase2_refit_local < kernelFunction="parallel_build_phase2_refit_local" >; 46} 47 48import struct MKBuilderState "structs.grl"; 49 50/* 51metakernel begin( 52 MKBuilderState state, 53 qword morton_code_buffer, 54 dword primLeafType, 55 dword numHwThreads) 56{ 57 dispatch opencl_build_kernel_init(1, 1, 1) args( 58 state.build_globals 59 ); 60 61 control(wait_idle); 62 63 64 dispatch opencl_build_morton_kernel_create_morton_codes(numHwThreads, 1, 1) args( 65 state.build_globals, 66 state.bvh_buffer, 67 state.build_primref_buffer, 68 morton_code_buffer); 69 70 control(wait_idle); 71 72} 73 74metakernel build_bottom_up( 75 MKBuilderState state, 76 qword buildrecords_bottom_up, 77 qword morton_code_buffer, 78 dword numHwThreads) 79{ 80 dispatch opencl_build_morton_kernel_init_bottom_up(numHwThreads, 1, 1) args( 81 state.build_globals, 82 buildrecords_bottom_up); 83 84 control(wait_idle); 85 86 dispatch opencl_build_morton_kernel_build_bottom_up(numHwThreads, 1, 1) args( 87 state.build_globals, 88 buildrecords_bottom_up, 89 morton_code_buffer); 90 91 control(wait_idle); 92 93} 94 95 96metakernel parallel_build( 97 MKBuilderState state, 98 qword buildrecords_bottom_up, 99 qword morton_code_buffer, 100 dword numHwThreads) 101{ 102 dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( 103 state.build_globals, 104 buildrecords_bottom_up, 105 state.bvh_buffer); 106 107 control(wait_idle); 108 109 dispatch opencl_build_morton_kernel_parallel_build_phase1(numHwThreads, 1, 1) args( 110 state.build_globals, 111 morton_code_buffer, 112 state.build_primref_buffer, 113 buildrecords_bottom_up, 114 state.bvh_buffer); 115 116 control(wait_idle); 117 118} 119 120*/ 121 122metakernel NewMorton_pre_sort( 123 qword num_primrefs_counter, 124 MKBuilderState state, 125 qword morton_code_buffer, 126 qword morton_code_buffer_tmp, 127 qword buildrecords_bottom_up, 128 dword use_new_morton_sort) 129{ 130 131 132 { 133 REG1 = 15; 134 REG2 = 4; 135 REG0 = load_dword( num_primrefs_counter ); 136 137 REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals 138 REG1 = ~REG1; 139 REG0 = REG0 & REG1; 140 REG0 = REG0 >> REG2; 141 } 142 143 dispatch opencl_build_kernel_init(1, 1, 1) args( state.build_globals ); 144 145 DISPATCHDIM_X = REG0.lo; 146 DISPATCHDIM_Y = 1; 147 DISPATCHDIM_Z = 1; 148 149 /* 150 // new bottom-up kernel does not need this 151 dispatch_indirect opencl_build_morton_kernel_init_bottom_up_indirect args( 152 state.build_globals, 153 buildrecords_bottom_up); 154 */ 155 dispatch_indirect opencl_build_morton_kernel_create_morton_codes_indirect args( 156 state.build_globals, 157 state.bvh_buffer, 158 state.build_primref_buffer, 159 morton_code_buffer, 160 morton_code_buffer_tmp, 161 use_new_morton_sort); 162 163 164} 165 166 167 168metakernel NewMorton_post_sort( 169 qword num_primrefs_counter, 170 qword num_buildrecords_counter, 171 MKBuilderState state, 172 qword buildrecords_bottom_up, 173 qword morton_code_buffer ) 174{ 175 176 { 177 REG1 = 15; 178 REG2 = 4; 179 REG0 = load_dword( num_primrefs_counter ); 180 181 REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals 182 REG1 = ~REG1; 183 REG0 = REG0 & REG1; 184 REG0 = REG0 >> REG2; 185 } 186 187 DISPATCHDIM_X = REG0.lo; 188 DISPATCHDIM_Y = 1; 189 DISPATCHDIM_Z = 1; 190 191 dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args( 192 state.build_globals, 193 buildrecords_bottom_up, 194 morton_code_buffer); 195 196 197 /* 198 dispatch opencl_build_morton_kernel_build_bottom_up(16, 1, 1) args( 199 state.build_globals, 200 buildrecords_bottom_up, 201 morton_code_buffer); 202 */ 203 204 control(wait_idle); 205 206 dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( 207 state.build_globals, 208 buildrecords_bottom_up, 209 state.bvh_buffer); 210 211 control(wait_idle); 212 213 DISPATCHDIM_X = load_dword( num_buildrecords_counter ); 214 215 dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args( 216 state.build_globals, 217 morton_code_buffer, 218 state.build_primref_buffer, 219 buildrecords_bottom_up, 220 state.bvh_buffer); 221 222 control(wait_idle); 223 224} 225 226metakernel NewMorton_bottom_up( 227 qword num_primrefs_counter, 228 MKBuilderState state, 229 qword buildrecords_bottom_up, 230 qword morton_code_buffer ) 231{ 232 233 { 234 REG1 = 15; 235 REG2 = 4; 236 REG0 = load_dword( num_primrefs_counter ); 237 238 REG0 = REG0 + REG1; // JDB TODO: TGL will need to do this computation in the EU and store it in globals 239 REG1 = ~REG1; 240 REG0 = REG0 & REG1; 241 REG0 = REG0 >> REG2; 242 } 243 244 DISPATCHDIM_X = REG0.lo; 245 DISPATCHDIM_Y = 1; 246 DISPATCHDIM_Z = 1; 247 248 dispatch_indirect opencl_build_morton_kernel_build_bottom_up_indirect args( 249 state.build_globals, 250 buildrecords_bottom_up, 251 morton_code_buffer); 252} 253 254 255metakernel NewMorton_phase0( 256 MKBuilderState state, 257 qword buildrecords_bottom_up, 258 qword morton_p0_refit_startpoints) 259{ 260 261 dispatch opencl_build_morton_kernel_parallel_build_phase0(1, 1, 1) args( 262 state.build_globals, 263 buildrecords_bottom_up, 264 state.bvh_buffer, 265 morton_p0_refit_startpoints); 266} 267 268metakernel NewMorton_phase0_local_sync( 269 MKBuilderState state, 270 qword buildrecords_bottom_up, 271 qword p0_boxless_nodes) 272{ 273 274 dispatch opencl_build_morton_kernel_parallel_build_phase0_local_sync(1, 1, 1) args( 275 state.build_globals, 276 buildrecords_bottom_up, 277 state.bvh_buffer, 278 p0_boxless_nodes); 279} 280 281 282metakernel NewMorton_phase1( 283 qword num_buildrecords_counter, 284 MKBuilderState state, 285 qword buildrecords_bottom_up, 286 qword morton_code_buffer) 287{ 288 289 DISPATCHDIM_X = load_dword( num_buildrecords_counter ); 290 291 dispatch_indirect opencl_build_morton_kernel_parallel_build_phase1_Indirect args( 292 state.build_globals, 293 morton_code_buffer, 294 state.build_primref_buffer, 295 buildrecords_bottom_up, 296 state.bvh_buffer); 297} 298 299metakernel NewMorton_phase1_root( 300 qword num_buildrecords_counter, 301 MKBuilderState state, 302 qword buildrecords_bottom_up, 303 qword morton_code_buffer) 304{ 305 dispatch opencl_build_morton_kernel_parallel_build_phase1_root(1, 1, 1) args( 306 state.build_globals, 307 morton_code_buffer, 308 state.build_primref_buffer, 309 buildrecords_bottom_up, 310 state.bvh_buffer); 311} 312 313metakernel NewMorton_phase2( 314 qword num_leaves_counter, 315 MKBuilderState state, 316 qword bottom_node_ids ) 317{ 318 319 DISPATCHDIM_X = load_dword( num_leaves_counter ); 320 321 dispatch_indirect opencl_build_morton_kernel_parallel_build_phase2_refit args( 322 state.bvh_buffer, 323 bottom_node_ids); 324} 325 326metakernel NewMorton_phase2_local( 327 MKBuilderState state, 328 qword p0_boxless_nodes) 329{ 330 331 dispatch opencl_build_morton_kernel_parallel_build_phase2_refit_local(1, 1, 1) args( 332 state.build_globals, 333 state.bvh_buffer, 334 p0_boxless_nodes); 335} 336