1// 2// Copyright (C) 2009-2021 Intel Corporation 3// 4// SPDX-License-Identifier: MIT 5// 6// 7 8module atomic_update; 9 10kernel_module atomic_update ("atomic_update.cl") 11{ 12 links lsc_intrinsics; 13 kernel init_refit_scratch < kernelFunction = "init_refit_scratch" >; 14 kernel traverse_aabbs_quad < kernelFunction = "traverse_aabbs_quad" >; 15 kernel write_inner_nodes < kernelFunction = "write_inner_nodes" >; 16 kernel build_fatleaf_table < kernelFunction = "build_fatleaf_table" >; 17 kernel build_innernode_table < kernelFunction = "build_innernode_table" >; 18 19 kernel update_single_group_quads < kernelFunction = "update_single_group_quads" >; 20 21 kernel build_fatleaf_table_new_update < kernelFunction = "build_fatleaf_table_new_update" >; 22 kernel fixup_quad_table < kernelFunction = "fixup_quad_table" >; 23 kernel traverse_aabbs_new_update < kernelFunction = "traverse_aabbs_new_update" >; 24 kernel traverse_aabbs_new_update_single_geo < kernelFunction = "traverse_aabbs_new_update_single_geo" >; 25} 26 27import struct MKBuilderState "structs.grl"; 28 29// this metakernel only initializes registers for use in a batching loop by "init_refit_scratch" 30metakernel init_refit_scratch_metakernel_registers() 31{ 32 REG0.hi = 0; 33 REG1 = 3; 34 REG2 = 63; 35 REG3 = 4; 36 REG4 = 2; 37 38 DISPATCHDIM_Y = 1; 39 DISPATCHDIM_Z = 1; 40} 41 42metakernel init_refit_scratch( qword bvh_base, qword scratch)//, dword max_inner_nodes ) 43{ 44 REG0.lo = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! 45 define C_3 REG1; 46 define C_63 REG2; 47 define C_4 REG3; 48 define C_2 REG4; 49 50 REG0 = REG0 - C_3; // nodedataCurr - fixed offset 51 REG0 = REG0 + C_63; // + 63 52 REG0 = REG0 >> C_4; // >> 4 53 REG0 = REG0 >> C_2; // >> 2 == >> 6 == /64 54 55 DISPATCHDIM_X = REG0.lo; 56 57 dispatch_indirect init_refit_scratch//( (max_inner_nodes+63)/64, 1, 1 ) 58 args(bvh_base,scratch); 59 60} 61 62metakernel build_node_tables( qword bvh_base ) 63{ 64 REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! 65 REG1 = 2; 66 REG2 = 63; 67 REG3 = 4; 68 REG4 = 3; // fixed offset... TODO: DON'T HARDCODE!! 69 70 REG0 = REG0 - REG4; // nodedataCurr - fixed offset 71 REG0 = REG0 + REG2; // + 63 72 REG0 = REG0 >> REG3; // >> 4 73 REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64 74 75 DISPATCHDIM_X = REG0.lo; 76 DISPATCHDIM_Y = 1; 77 DISPATCHDIM_Z = 1; 78 79 dispatch_indirect build_fatleaf_table//( (max_inner_nodes+63)/64, 1, 1 ) 80 args(bvh_base); 81 dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 ) 82 args(bvh_base); 83} 84 85metakernel build_node_tables_new_update( MKBuilderState state, qword bvh_base ) 86{ 87 REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!! 88 REG1 = 2; 89 REG2 = 63; 90 REG3 = 4; 91 REG4 = 3; // fixed offset... TODO: DON'T HARDCODE!! 92 93 REG0 = REG0 - REG4; // nodedataCurr - fixed offset 94 REG0 = REG0 + REG2; // + 63 95 REG0 = REG0 >> REG3; // >> 4 96 REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64 97 98 DISPATCHDIM_X = REG0.lo; 99 DISPATCHDIM_Y = 1; 100 DISPATCHDIM_Z = 1; 101 102 dispatch_indirect build_fatleaf_table_new_update//( (max_inner_nodes+63)/64, 1, 1 ) 103 args(state.build_globals, bvh_base); 104 dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 ) 105 args(bvh_base); 106} 107 108metakernel fixup_quad_table( qword bvh_base ) 109{ 110 dispatch fixup_quad_table(2,1,1) 111 args(bvh_base); 112} 113 114// this metakernel only initializes registers for use in a batching loop by "traverse_aabbs_quad" and "write_inner_nodes" 115metakernel init_traverse_aabbs_quad_and_write_inner_nodes() 116{ 117 REG0.hi = 0; 118 REG1 = 1; 119 REG2 = 31; 120 REG3 = 4; 121 REG4 = 2; 122 REG5 = 7; 123 REG6 = 255; 124 DISPATCHDIM_Y = 1; 125 DISPATCHDIM_Z = 1; 126} 127 128metakernel traverse_aabbs_quad( qword bvh_base, qword scratch, qword geos)//, dword max_inner_nodes ) 129{ 130 131 REG0.lo = load_dword( bvh_base + 64 ); // TODO: DOn't hardcode! 132 define C_1 REG1; 133 define C_31 REG2; 134 define C_4 REG3; 135 136 REG0 = REG0 + C_31; // + 31 137 REG0 = REG0 >> C_4; // >> 4 138 REG0 = REG0 >> C_1; // >> 1 == >> 5 == /32 139 140 DISPATCHDIM_X = REG0.lo; 141 142 dispatch_indirect traverse_aabbs_quad//( (max_inner_nodes+32)/32, 1, 1 ) 143 args(bvh_base,scratch,geos); 144} 145 146metakernel write_inner_nodes( qword bvh_base, qword scratch )//, dword max_inner_nodes ) 147{ 148 REG0.lo = load_dword( bvh_base + 68 ); // TODO: DOn't hardcode! 149 define C_1 REG1; 150 define C_2 REG4; 151 define C_7 REG5; 152 153 REG0 = REG0 + C_7; // + 7 154 REG0 = REG0 >> C_2; // >> 2 155 REG0 = REG0 >> C_1; // >> 1 ==> >> 3 (/8) 156 DISPATCHDIM_X = REG0.lo; 157 158 dispatch_indirect write_inner_nodes//( (max_inner_nodes+7)/8, 1, 1 ) 159 args(bvh_base,scratch); 160} 161 162metakernel update_single_group_quads( qword bvh_base, qword geos, qword aabbs ) 163{ 164 dispatch update_single_group_quads(1,1,1) //( (max_inner_nodes+1)/2, 1, 1 ) 165 args(bvh_base,geos,aabbs); 166} 167 168metakernel traverse_aabbs_new_update( qword bvh_base, qword geos, qword scratch ) 169{ 170 REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode! 171 define C_255 REG6; 172 define C_4 REG3; 173 174 REG0 = REG0 + C_255; // + 255 175 REG0 = REG0 >> C_4; // >> 4 176 REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32 177 178 DISPATCHDIM_X = REG0.lo; 179 180 dispatch_indirect traverse_aabbs_new_update//( (max_inner_nodes+255)/256, 1, 1 ) 181 args(bvh_base, geos, scratch); 182} 183 184metakernel traverse_aabbs_new_update_single_geo( qword bvh_base, qword vertices, qword geos, qword scratch, dword vertex_format ) 185{ 186 REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode! 187 define C_255 REG6; 188 define C_4 REG3; 189 190 REG0 = REG0 + C_255; // + 255 191 REG0 = REG0 >> C_4; // >> 4 192 REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32 193 194 DISPATCHDIM_X = REG0.lo; 195 196 dispatch_indirect traverse_aabbs_new_update_single_geo//( (max_inner_nodes+255)/256, 1, 1 ) 197 args(bvh_base, vertices, geos, scratch, vertex_format); 198}