xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/grl/gpu/atomic_update.grl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1//
2// Copyright (C) 2009-2021 Intel Corporation
3//
4// SPDX-License-Identifier: MIT
5//
6//
7
8module atomic_update;
9
10kernel_module atomic_update ("atomic_update.cl")
11{
12    links lsc_intrinsics;
13    kernel init_refit_scratch   < kernelFunction = "init_refit_scratch"  >;
14    kernel traverse_aabbs_quad  < kernelFunction = "traverse_aabbs_quad" >;
15    kernel write_inner_nodes    < kernelFunction = "write_inner_nodes"   >;
16    kernel build_fatleaf_table  < kernelFunction = "build_fatleaf_table" >;
17    kernel build_innernode_table < kernelFunction = "build_innernode_table" >;
18
19    kernel update_single_group_quads < kernelFunction = "update_single_group_quads" >;
20
21    kernel build_fatleaf_table_new_update  < kernelFunction = "build_fatleaf_table_new_update" >;
22    kernel fixup_quad_table  < kernelFunction = "fixup_quad_table" >;
23    kernel traverse_aabbs_new_update  < kernelFunction = "traverse_aabbs_new_update" >;
24    kernel traverse_aabbs_new_update_single_geo  < kernelFunction = "traverse_aabbs_new_update_single_geo" >;
25}
26
27import struct MKBuilderState "structs.grl";
28
29// this metakernel only initializes registers for use in a batching loop by "init_refit_scratch"
30metakernel init_refit_scratch_metakernel_registers()
31{
32    REG0.hi = 0;
33    REG1 = 3;
34    REG2 = 63;
35    REG3 = 4;
36    REG4 = 2;
37
38    DISPATCHDIM_Y = 1;
39    DISPATCHDIM_Z = 1;
40}
41
42metakernel init_refit_scratch( qword bvh_base, qword scratch)//, dword max_inner_nodes )
43{
44    REG0.lo = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!!
45    define C_3  REG1;
46    define C_63 REG2;
47    define C_4  REG3;
48    define C_2  REG4;
49
50    REG0 = REG0 - C_3; // nodedataCurr - fixed offset
51    REG0 = REG0 + C_63; // + 63
52    REG0 = REG0 >> C_4; // >> 4
53    REG0 = REG0 >> C_2; // >> 2 == >> 6 == /64
54
55    DISPATCHDIM_X = REG0.lo;
56
57    dispatch_indirect init_refit_scratch//( (max_inner_nodes+63)/64, 1, 1 )
58        args(bvh_base,scratch);
59
60}
61
62metakernel build_node_tables( qword bvh_base )
63{
64    REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!!
65    REG1 = 2;
66    REG2 = 63;
67    REG3 = 4;
68    REG4 = 3;  // fixed offset... TODO: DON'T HARDCODE!!
69
70    REG0 = REG0 - REG4; // nodedataCurr - fixed offset
71    REG0 = REG0 + REG2; // + 63
72    REG0 = REG0 >> REG3; // >> 4
73    REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64
74
75    DISPATCHDIM_X = REG0.lo;
76    DISPATCHDIM_Y = 1;
77    DISPATCHDIM_Z = 1;
78
79    dispatch_indirect build_fatleaf_table//( (max_inner_nodes+63)/64, 1, 1 )
80        args(bvh_base);
81    dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 )
82        args(bvh_base);
83}
84
85metakernel build_node_tables_new_update( MKBuilderState state, qword bvh_base )
86{
87    REG0 = load_dword( bvh_base + 12 ); // TODO: DON'T HARDCODE!!
88    REG1 = 2;
89    REG2 = 63;
90    REG3 = 4;
91    REG4 = 3;  // fixed offset... TODO: DON'T HARDCODE!!
92
93    REG0 = REG0 - REG4; // nodedataCurr - fixed offset
94    REG0 = REG0 + REG2; // + 63
95    REG0 = REG0 >> REG3; // >> 4
96    REG0 = REG0 >> REG1; // >> 2 == >> 6 == /64
97
98    DISPATCHDIM_X = REG0.lo;
99    DISPATCHDIM_Y = 1;
100    DISPATCHDIM_Z = 1;
101
102    dispatch_indirect build_fatleaf_table_new_update//( (max_inner_nodes+63)/64, 1, 1 )
103        args(state.build_globals, bvh_base);
104    dispatch_indirect build_innernode_table//( (max_inner_nodes+63)/64, 1, 1 )
105        args(bvh_base);
106}
107
108metakernel fixup_quad_table( qword bvh_base )
109{
110    dispatch  fixup_quad_table(2,1,1)
111        args(bvh_base);
112}
113
114// this metakernel only initializes registers for use in a batching loop by "traverse_aabbs_quad" and "write_inner_nodes"
115metakernel init_traverse_aabbs_quad_and_write_inner_nodes()
116{
117    REG0.hi = 0;
118    REG1 = 1;
119    REG2 = 31;
120    REG3 = 4;
121    REG4 = 2;
122    REG5 = 7;
123    REG6 = 255;
124    DISPATCHDIM_Y = 1;
125    DISPATCHDIM_Z = 1;
126}
127
128metakernel traverse_aabbs_quad( qword bvh_base, qword scratch, qword geos)//, dword max_inner_nodes )
129{
130
131    REG0.lo = load_dword( bvh_base + 64 ); // TODO: DOn't hardcode!
132    define C_1  REG1;
133    define C_31 REG2;
134    define C_4  REG3;
135
136    REG0 = REG0 + C_31; // + 31
137    REG0 = REG0 >> C_4; // >> 4
138    REG0 = REG0 >> C_1; // >> 1 == >> 5 == /32
139
140    DISPATCHDIM_X = REG0.lo;
141
142    dispatch_indirect traverse_aabbs_quad//( (max_inner_nodes+32)/32, 1, 1 )
143        args(bvh_base,scratch,geos);
144}
145
146metakernel write_inner_nodes( qword bvh_base, qword scratch )//, dword max_inner_nodes )
147{
148    REG0.lo = load_dword( bvh_base + 68 ); // TODO: DOn't hardcode!
149    define C_1 REG1;
150    define C_2 REG4;
151    define C_7 REG5;
152
153    REG0 = REG0 + C_7;  // + 7
154    REG0 = REG0 >> C_2; // >> 2
155    REG0 = REG0 >> C_1; // >> 1 ==>  >> 3  (/8)
156    DISPATCHDIM_X = REG0.lo;
157
158    dispatch_indirect  write_inner_nodes//( (max_inner_nodes+7)/8, 1, 1 )
159        args(bvh_base,scratch);
160}
161
162metakernel update_single_group_quads( qword bvh_base, qword geos, qword aabbs  )
163{
164    dispatch  update_single_group_quads(1,1,1) //( (max_inner_nodes+1)/2, 1, 1 )
165        args(bvh_base,geos,aabbs);
166}
167
168metakernel traverse_aabbs_new_update( qword bvh_base, qword geos, qword scratch )
169{
170    REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode!
171    define C_255 REG6;
172    define C_4   REG3;
173
174    REG0 = REG0 + C_255; // + 255
175    REG0 = REG0 >> C_4; // >> 4
176    REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32
177
178    DISPATCHDIM_X = REG0.lo;
179
180    dispatch_indirect traverse_aabbs_new_update//( (max_inner_nodes+255)/256, 1, 1 )
181        args(bvh_base, geos, scratch);
182}
183
184metakernel traverse_aabbs_new_update_single_geo( qword bvh_base, qword vertices, qword geos, qword scratch, dword vertex_format )
185{
186    REG0.lo = load_dword( bvh_base + 84 ); // TODO: DOn't hardcode!
187    define C_255 REG6;
188    define C_4   REG3;
189
190    REG0 = REG0 + C_255; // + 255
191    REG0 = REG0 >> C_4; // >> 4
192    REG0 = REG0 >> C_4; // >> 4 == >> 8 == /32
193
194    DISPATCHDIM_X = REG0.lo;
195
196    dispatch_indirect traverse_aabbs_new_update_single_geo//( (max_inner_nodes+255)/256, 1, 1 )
197        args(bvh_base, vertices, geos, scratch, vertex_format);
198}