1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module build_refit; 9 10kernel_module morton_kernels ("bvh_build_refit.cl") 11{ 12 links lsc_intrinsics; 13 14 kernel update_instance_leaves < kernelFunction="update_instance_leaves" >; 15 kernel refit_indirect_sg < kernelFunction="Refit_indirect_sg" >; 16 kernel update_instance_leaves_indirect < kernelFunction="update_instance_leaves_indirect" >; 17 18 19} 20 21const INSTANCE_LEAF_GROUP_SIZE = 16; 22const REFIT_GROUP_SIZE = 8; 23 24metakernel update_instance_leaves( 25 qword bvh, 26 qword dxrInstancesArray, 27 qword dxrInstancesPtrArray, 28 qword instance_leaf_aabbs, 29 dword num_instances ) 30{ 31 define num_groups (num_instances + INSTANCE_LEAF_GROUP_SIZE - 1) / INSTANCE_LEAF_GROUP_SIZE; 32 33 dispatch update_instance_leaves(num_groups, 1, 1) args( 34 bvh, 35 dxrInstancesArray, 36 dxrInstancesPtrArray, 37 instance_leaf_aabbs); 38} 39 40metakernel update_instance_leaves_indirect( 41 qword bvh, 42 qword dxrInstancesArray, 43 qword dxrInstancesPtrArray, 44 qword instance_leaf_aabbs, 45 qword indirectBuildRangeInfo) 46{ 47 define num_groups REG0; 48 define groupsize_1 REG1; // groupsize - 1 49 define C_4 REG2; 50 51 // init with primitiveCount 52 num_groups = load_dword(indirectBuildRangeInfo); 53 groupsize_1 = 15; // INSTANCE_LEAF_GROUP_SIZE - 1 54 C_4 = 4; // log_2(INSTANCE_LEAF_GROUP_SIZE) 55 56 num_groups = num_groups + groupsize_1; 57 num_groups = num_groups >> C_4; // num_groups / INSTANCE_LEAF_GROUP_SIZE; 58 59 DISPATCHDIM_X = num_groups.lo; 60 DISPATCHDIM_Y = 1; 61 DISPATCHDIM_Z = 1; 62 63 dispatch_indirect update_instance_leaves_indirect args( 64 bvh, 65 dxrInstancesArray, 66 dxrInstancesPtrArray, 67 instance_leaf_aabbs, 68 indirectBuildRangeInfo); 69} 70 71/* 72metakernel refit( 73 qword bvh, 74 qword geomDesc, 75 qword instance_aabbs, 76 dword dispatchSize ) 77{ 78 define num_groups (dispatchSize + REFIT_GROUP_SIZE - 1) / REFIT_GROUP_SIZE; 79 80 dispatch refit(num_groups, 1, 1) args( 81 bvh, 82 geomDesc, 83 instance_aabbs); 84} 85 86const REFIT_SIMD_SIZE = 8; 87const REFIT_SIMD_SIZE_SHIFT = 3; 88 89metakernel refit_indirect( 90 qword bvh, 91 qword bvh_inner_nodes_start_value, 92 qword bvh_inner_nodes_end, 93 qword geomDesc, 94 qword instance_aabbs ) 95{ 96 define cRoundingSIMD REG4; 97 define TWO REG3; 98 define ONE REG5; 99 cRoundingSIMD = (REFIT_SIMD_SIZE - 1); 100 101 TWO = 2; 102 ONE = 1; 103 104 REG0 = bvh_inner_nodes_start_value; 105 REG1 = load_dword(bvh_inner_nodes_end); 106 REG1.hi = 0; 107 REG2 = REG1 - REG0; 108 REG2 = REG2 + cRoundingSIMD; 109 REG2 = REG2 >> TWO; // JDB: >>3 must be implemented as >>2 then >>1 because command streamer 110 REG2 = REG2 >> ONE; // only supports pow2 shifts because somebody wanted to save area. 111 112 DISPATCHDIM_X = REG2.lo; 113 DISPATCHDIM_Y = 1; 114 DISPATCHDIM_Z = 1; 115 116 dispatch_indirect refit_indirect args( 117 bvh, 118 geomDesc, 119 instance_aabbs); 120 121} 122*/ 123 124metakernel refit_indirect_sg( 125 qword bvh, 126 qword bvh_inner_nodes_start_value, 127 qword bvh_inner_nodes_end, 128 qword geomDesc, 129 qword instance_aabbs ) 130{ 131 132 REG0 = bvh_inner_nodes_start_value; 133 REG1.lo = load_dword(bvh_inner_nodes_end); 134 REG1.hi = 0; 135 REG2 = REG1 - REG0; 136 137 DISPATCHDIM_X = REG2.lo; 138 DISPATCHDIM_Y = 1; 139 DISPATCHDIM_Z = 1; 140 141 dispatch_indirect refit_indirect_sg args( 142 bvh, 143 geomDesc, 144 instance_aabbs); 145 146} 147/* 148//////////////////////////////////////////////////////////////// 149// constructing treelets 150// phase 1: mark nodes that will be roots of bottom treelets 151// also for each node leave a number of startpoints that are under it and max depth of the path from the node 152metakernel find_refit_treelets( 153 qword bvh, 154 qword treelet_node_data, 155 qword scratch_startpoints, 156 qword startpointAlloc, 157 qword bvh_inner_nodes_start_value, 158 qword bvh_inner_nodes_end ) 159{ 160 define cRoundingSIMD REG4; 161 define TWO REG3; 162 define ONE REG5; 163 cRoundingSIMD = (REFIT_SIMD_SIZE - 1); 164 165 TWO = 2; 166 ONE = 1; 167 168 REG0 = bvh_inner_nodes_start_value; 169 REG1.lo = load_dword(bvh_inner_nodes_end); 170 REG1.hi = 0; 171 REG2 = REG1 - REG0; 172 REG2 = REG2 + cRoundingSIMD; 173 REG2 = REG2 >> TWO; // JDB: >>3 must be implemented as >>2 then >>1 because command streamer 174 REG2 = REG2 >> ONE; // only supports pow2 shifts because somebody wanted to save area. 175 176 DISPATCHDIM_X = REG2.lo; 177 DISPATCHDIM_Y = 1; 178 DISPATCHDIM_Z = 1; 179 180 dispatch_indirect find_refit_treelets args( 181 bvh, 182 treelet_node_data, 183 scratch_startpoints, 184 startpointAlloc); 185} 186 187 188//////////////////////////////////////////////////////////////// 189// constructing treelets 190// phase 2 totally parallel, run threads up to assign startpoints to given treelet 191// 192metakernel assign_refit_startpoints_to_treelets( 193 qword bvh, 194 qword treelet_node_data, 195 qword scratch_startpoints, 196 qword bvh_inner_nodes_start_value, 197 qword bvh_inner_nodes_end ) 198{ 199 define cRoundingSIMD REG4; 200 define TWO REG3; 201 define ONE REG5; 202 cRoundingSIMD = (REFIT_SIMD_SIZE - 1); 203 204 TWO = 2; 205 ONE = 1; 206 207 REG0 = bvh_inner_nodes_start_value; 208 REG1.lo = load_dword(bvh_inner_nodes_end); 209 REG1.hi = 0; 210 REG2 = REG1 - REG0; 211 REG2 = REG2 + cRoundingSIMD; 212 REG2 = REG2 >> TWO; // JDB: >>3 must be implemented as >>2 then >>1 because command streamer 213 REG2 = REG2 >> ONE; // only supports pow2 shifts because somebody wanted to save area. 214 215 DISPATCHDIM_X = REG2.lo; 216 DISPATCHDIM_Y = 1; 217 DISPATCHDIM_Z = 1; 218 219 dispatch_indirect assign_refit_startpoints_to_treelets args( 220 bvh, 221 treelet_node_data, 222 scratch_startpoints); 223} 224 225 226//////////////////////////////////////////////////////////////// 227// constructing treelets 228// phase 3 local work: group per treelet, sort the startpoints in treelets ?// by length of the path 229metakernel finalize_treelets_in_groups( 230 qword bvh, 231 qword scratch_startpoints, 232 qword ptrNumTreelets ) 233{ 234 REG0 = load_qword(ptrNumTreelets); 235 236 DISPATCHDIM_X = REG0.lo; 237 DISPATCHDIM_Y = 1; 238 DISPATCHDIM_Z = 1; 239 240 dispatch_indirect finalize_treelets_in_groups args( 241 bvh, 242 scratch_startpoints); 243} 244 245 246//////////////////////////////////////////////////////////////// 247// Updating treelets 248// phase 1 update vertex and generate boxes for vertices 249// 250 251const PER_GROUP_ELEMENTS_ROUNDING = 15; 252const PER_GROUP_ELEMENTS_SHIFT = 4; 253 254metakernel init_treelets_refit(qword pSquashGroupsCountToReset) 255{ 256 REG1 = 0; 257 store_qword(pSquashGroupsCountToReset, REG1); 258 DISPATCHDIM_Y = 1; 259 DISPATCHDIM_Z = 1; 260 //REG4 = PER_GROUP_ELEMENTS_SHIFT; 261 //REG5.hi = PER_GROUP_ELEMENTS_ROUNDING; 262 //REG5.lo = 0; 263} 264 265metakernel update_quads( 266 qword scratch_box, 267 qword bvh, 268 qword input, 269 dword numPrimsDividedBy32, 270 qword bigSquashInput) 271{ 272 //REG0 = load_qword(quads_nodes_begin_end_pair); 273 //REG1.hi = REG0.lo; // this holds inner nodes begin 274 //REG2 = REG0 - REG1; 275 //REG2 = REG2 + REG5; 276 //REG2 = REG2 >> REG4; 277 //DISPATCHDIM_X = REG2.hi; 278 279 dispatch refit_quads(numPrimsDividedBy32, 1, 1) args( 280 bvh, 281 input, 282 scratch_box, 283 numPrimsDividedBy32, 284 bigSquashInput ); 285} 286 287// 288//////////////////////////////////////////////////////////////// 289 290 291//////////////////////////////////////////////////////////////// 292// 293// phase 1 or 2 - update primitives as well as bottom up refit internal nodes 294// in single dispatch (in single group per tree) 295metakernel refit_tree_by_group_including_quads( 296 qword squashed_inputs, 297 dword numBuilds 298) 299{ 300 dispatch refit_tree_per_group(numBuilds, 1, 1) args( 301 squashed_inputs); 302} 303// 304//////////////////////////////////////////////////////////////// 305 306 307//////////////////////////////////////////////////////////////// 308// 309// phase 2 bottom up refit internal nodes 310// 311metakernel refit_treelet_per_group( 312 qword bigSquashInput, 313 qword ptrNumTreelets) 314{ 315 DISPATCHDIM_X = load_dword(ptrNumTreelets); 316 317 dispatch_indirect refit_treelet_per_group args( 318 bigSquashInput); 319} 320// 321//////////////////////////////////////////////////////////////// 322 323#endif 324*/ 325