xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/radeonsi/si_build_pm4.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 /**
8  * This file contains helpers for writing commands to commands streams.
9  */
10 
11 #ifndef SI_BUILD_PM4_H
12 #define SI_BUILD_PM4_H
13 
14 #include "si_pipe.h"
15 #include "sid.h"
16 
17 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
18                          unsigned __cs_num = __cs->current.cdw; \
19                          UNUSED unsigned __cs_num_initial = __cs_num; \
20                          uint32_t *__cs_buf = __cs->current.buf
21 
22 #define radeon_begin_again(cs) do { \
23    assert(__cs == NULL); \
24    __cs = (cs); \
25    __cs_num = __cs->current.cdw; \
26    __cs_num_initial = __cs_num; \
27    __cs_buf = __cs->current.buf; \
28 } while (0)
29 
30 #define radeon_end() do { \
31    __cs->current.cdw = __cs_num; \
32    assert(__cs->current.cdw <= __cs->current.max_dw); \
33    __cs = NULL; \
34 } while (0)
35 
36 #define radeon_emit(value)  __cs_buf[__cs_num++] = (value)
37 #define radeon_packets_added()  (__cs_num != __cs_num_initial)
38 
39 #define radeon_end_update_context_roll() do { \
40    radeon_end(); \
41    if (radeon_packets_added()) \
42       sctx->context_roll = true; \
43 } while (0)
44 
45 #define radeon_emit_array(values, num) do { \
46    unsigned __n = (num); \
47    memcpy(__cs_buf + __cs_num, (values), __n * 4); \
48    __cs_num += __n; \
49 } while (0)
50 
51 /* Instead of writing into the command buffer, return the pointer to the command buffer and
52  * assume that the caller will fill the specified number of elements.
53  */
54 #define radeon_emit_array_get_ptr(num, ptr) do { \
55    *(ptr) = __cs_buf + __cs_num; \
56    __cs_num += (num); \
57 } while (0)
58 
59 /* Packet building helpers. Don't use directly. */
60 #define radeon_set_reg_seq(reg, num, idx, prefix_name, packet, reset_filter_cam) do { \
61    assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
62    radeon_emit(PKT3(packet, num, 0) | PKT3_RESET_FILTER_CAM_S(reset_filter_cam)); \
63    radeon_emit((((reg) - prefix_name##_REG_OFFSET) >> 2) | ((idx) << 28)); \
64 } while (0)
65 
66 #define radeon_set_reg(reg, idx, value, prefix_name, packet) do { \
67    radeon_set_reg_seq(reg, 1, idx, prefix_name, packet, 0); \
68    radeon_emit(value); \
69 } while (0)
70 
71 #define radeon_opt_set_reg(reg, reg_enum, idx, value, prefix_name, packet) do { \
72    unsigned __value = (value); \
73    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
74        sctx->tracked_regs.reg_value[(reg_enum)] != __value) { \
75       radeon_set_reg(reg, idx, __value, prefix_name, packet); \
76       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
77       sctx->tracked_regs.reg_value[(reg_enum)] = __value; \
78    } \
79 } while (0)
80 
81 /* Set consecutive registers if any value is different. */
82 #define radeon_opt_set_reg2(reg, reg_enum, v1, v2, prefix_name, packet) do { \
83    unsigned __v1 = (v1), __v2 = (v2); \
84    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
85                                       (reg_enum), (reg_enum) + 1, 0x3) || \
86        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
87        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2) { \
88       radeon_set_reg_seq(reg, 2, 0, prefix_name, packet, 0); \
89       radeon_emit(__v1); \
90       radeon_emit(__v2); \
91       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
92                                    (reg_enum), (reg_enum) + 1); \
93       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
94       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
95    } \
96 } while (0)
97 
98 #define radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, prefix_name, packet) do { \
99    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3); \
100    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
101                                       (reg_enum), (reg_enum) + 2, 0x7) || \
102        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
103        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
104        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3) { \
105       radeon_set_reg_seq(reg, 3, 0, prefix_name, packet, 0); \
106       radeon_emit(__v1); \
107       radeon_emit(__v2); \
108       radeon_emit(__v3); \
109       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
110                                    (reg_enum), (reg_enum) + 2); \
111       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
112       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
113       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
114    } \
115 } while (0)
116 
117 #define radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, packet) do { \
118    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
119    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
120                                       (reg_enum), (reg_enum) + 3, 0xf) || \
121        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
122        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
123        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
124        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
125       radeon_set_reg_seq(reg, 4, 0, prefix_name, packet, 0); \
126       radeon_emit(__v1); \
127       radeon_emit(__v2); \
128       radeon_emit(__v3); \
129       radeon_emit(__v4); \
130       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
131                                    (reg_enum), (reg_enum) + 3); \
132       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
133       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
134       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
135       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
136    } \
137 } while (0)
138 
139 #define radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, prefix_name, packet) do { \
140    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5); \
141    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
142                                       (reg_enum), (reg_enum) + 4, 0x1f) || \
143        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
144        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
145        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
146        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
147        sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5) { \
148       radeon_set_reg_seq(reg, 5, 0, prefix_name, packet, 0); \
149       radeon_emit(__v1); \
150       radeon_emit(__v2); \
151       radeon_emit(__v3); \
152       radeon_emit(__v4); \
153       radeon_emit(__v5); \
154       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
155                                    (reg_enum), (reg_enum) + 4); \
156       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
157       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
158       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
159       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
160       sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
161    } \
162 } while (0)
163 
164 #define radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, prefix_name, packet) do { \
165    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4), __v5 = (v5), __v6 = (v6); \
166    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
167                                       (reg_enum), (reg_enum) + 5, 0x3f) || \
168        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
169        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
170        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
171        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4 || \
172        sctx->tracked_regs.reg_value[(reg_enum) + 4] != __v5 || \
173        sctx->tracked_regs.reg_value[(reg_enum) + 5] != __v6) { \
174       radeon_set_reg_seq(reg, 6, 0, prefix_name, packet, 0); \
175       radeon_emit(__v1); \
176       radeon_emit(__v2); \
177       radeon_emit(__v3); \
178       radeon_emit(__v4); \
179       radeon_emit(__v5); \
180       radeon_emit(__v6); \
181       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
182                                    (reg_enum), (reg_enum) + 5); \
183       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
184       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
185       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
186       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
187       sctx->tracked_regs.reg_value[(reg_enum) + 4] = __v5; \
188       sctx->tracked_regs.reg_value[(reg_enum) + 5] = __v6; \
189    } \
190 } while (0)
191 
192 #define radeon_opt_set_regn(reg, values, saved_values, num, prefix_name, packet) do { \
193    if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) { \
194       radeon_set_reg_seq(reg, num, 0, prefix_name, packet, 0); \
195       radeon_emit_array(values, num); \
196       memcpy(saved_values, values, sizeof(uint32_t) * (num)); \
197    } \
198 } while (0)
199 
200 /* Packet building helpers for CONFIG registers. */
201 #define radeon_set_config_reg(reg, value) \
202    radeon_set_reg(reg, 0, value, SI_CONFIG, PKT3_SET_CONFIG_REG)
203 
204 /* Packet building helpers for CONTEXT registers. */
205 #define radeon_set_context_reg_seq(reg, num) \
206    radeon_set_reg_seq(reg, num, 0, SI_CONTEXT, PKT3_SET_CONTEXT_REG, 0)
207 
208 #define radeon_set_context_reg(reg, value) \
209    radeon_set_reg(reg, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
210 
211 #define radeon_opt_set_context_reg(reg, reg_enum, value) \
212    radeon_opt_set_reg(reg, reg_enum, 0, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
213 
214 #define radeon_opt_set_context_reg_idx(reg, reg_enum, idx, value) \
215    radeon_opt_set_reg(reg, reg_enum, idx, value, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
216 
217 #define radeon_opt_set_context_reg2(reg, reg_enum, v1, v2) \
218    radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
219 
220 #define radeon_opt_set_context_reg3(reg, reg_enum, v1, v2, v3) \
221    radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
222 
223 #define radeon_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
224    radeon_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
225 
226 #define radeon_opt_set_context_reg5(reg, reg_enum, v1, v2, v3, v4, v5) \
227    radeon_opt_set_reg5(reg, reg_enum, v1, v2, v3, v4, v5, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
228 
229 #define radeon_opt_set_context_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6) \
230    radeon_opt_set_reg6(reg, reg_enum, v1, v2, v3, v4, v5, v6, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
231 
232 #define radeon_opt_set_context_regn(reg, values, saved_values, num) \
233    radeon_opt_set_regn(reg, values, saved_values, num, SI_CONTEXT, PKT3_SET_CONTEXT_REG)
234 
235 /* Packet building helpers for SH registers. */
236 #define radeon_set_sh_reg_seq(reg, num) \
237    radeon_set_reg_seq(reg, num, 0, SI_SH, PKT3_SET_SH_REG, 0)
238 
239 #define radeon_set_sh_reg(reg, value) \
240    radeon_set_reg(reg, 0, value, SI_SH, PKT3_SET_SH_REG)
241 
242 #define radeon_opt_set_sh_reg(reg, reg_enum, value) \
243    radeon_opt_set_reg(reg, reg_enum, 0, value, SI_SH, PKT3_SET_SH_REG)
244 
245 #define radeon_opt_set_sh_reg2(reg, reg_enum, v1, v2) \
246    radeon_opt_set_reg2(reg, reg_enum, v1, v2, SI_SH, PKT3_SET_SH_REG)
247 
248 #define radeon_opt_set_sh_reg3(reg, reg_enum, v1, v2, v3) \
249    radeon_opt_set_reg3(reg, reg_enum, v1, v2, v3, SI_SH, PKT3_SET_SH_REG)
250 
251 #define radeon_opt_set_sh_reg_idx(reg, reg_enum, idx, value) do { \
252    assert(sctx->gfx_level >= GFX10); \
253    radeon_opt_set_reg(reg, reg_enum, idx, value, SI_SH, PKT3_SET_SH_REG_INDEX); \
254 } while (0)
255 
256 #define radeon_emit_32bit_pointer(va) do { \
257    assert((va) == 0 || ((va) >> 32) == sctx->screen->info.address32_hi); \
258    radeon_emit(va); \
259 } while (0)
260 
261 #define radeon_emit_one_32bit_pointer(desc, sh_base) do { \
262    radeon_set_sh_reg_seq((sh_base) + (desc)->shader_userdata_offset, 1); \
263    radeon_emit_32bit_pointer((desc)->gpu_address); \
264 } while (0)
265 
266 /* Packet building helpers for UCONFIG registers. */
267 #define radeon_set_uconfig_reg_seq(reg, num) \
268    radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, 0)
269 
270 #define radeon_set_uconfig_perfctr_reg_seq(reg, num) \
271    radeon_set_reg_seq(reg, num, 0, CIK_UCONFIG, PKT3_SET_UCONFIG_REG, \
272                       sctx->gfx_level >= GFX10 && \
273                       sctx->ws->cs_get_ip_type(__cs) == AMD_IP_GFX)
274 
275 #define radeon_set_uconfig_reg(reg, value) \
276    radeon_set_reg(reg, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
277 
278 #define radeon_opt_set_uconfig_reg(reg, reg_enum, value) \
279    radeon_opt_set_reg(reg, reg_enum, 0, value, CIK_UCONFIG, PKT3_SET_UCONFIG_REG)
280 
281 #define RESOLVE_PKT3_SET_UCONFIG_REG_INDEX \
282    (GFX_VERSION >= GFX10 || (GFX_VERSION == GFX9 && sctx->screen->info.me_fw_version >= 26) ? \
283     PKT3_SET_UCONFIG_REG_INDEX : PKT3_SET_UCONFIG_REG)
284 
285 #define radeon_set_uconfig_reg_idx(reg, idx, value) \
286    radeon_set_reg(reg, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
287 
288 #define radeon_opt_set_uconfig_reg_idx(reg, reg_enum, idx, value) \
289    radeon_opt_set_reg(reg, reg_enum, idx, value, CIK_UCONFIG, RESOLVE_PKT3_SET_UCONFIG_REG_INDEX)
290 
291 #define radeon_set_privileged_config_reg(reg, value) do { \
292    assert((reg) < CIK_UCONFIG_REG_OFFSET); \
293    radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
294    radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
295                COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
296    radeon_emit(value); \
297    radeon_emit(0); /* unused */ \
298    radeon_emit((reg) >> 2); \
299    radeon_emit(0); /* unused */ \
300 } while (0)
301 
302 /* GFX11 generic packet building helpers for buffered SH registers. Don't use these directly. */
303 #define gfx11_push_reg(reg, value, prefix_name, buffer, reg_count) do { \
304    unsigned __i = (reg_count)++; \
305    assert((reg) >= prefix_name##_REG_OFFSET && (reg) < prefix_name##_REG_END); \
306    assert(__i / 2 < ARRAY_SIZE(buffer)); \
307    buffer[__i / 2].reg_offset[__i % 2] = ((reg) - prefix_name##_REG_OFFSET) >> 2; \
308    buffer[__i / 2].reg_value[__i % 2] = value; \
309 } while (0)
310 
311 #define gfx11_opt_push_reg(reg, reg_enum, value, prefix_name, buffer, reg_count) do { \
312    unsigned __value = value; \
313    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
314        sctx->tracked_regs.reg_value[reg_enum] != __value) { \
315       gfx11_push_reg(reg, __value, prefix_name, buffer, reg_count); \
316       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
317       sctx->tracked_regs.reg_value[reg_enum] = __value; \
318    } \
319 } while (0)
320 
321 #define gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, prefix_name, buffer, reg_count) do { \
322    unsigned __v1 = (v1); \
323    unsigned __v2 = (v2); \
324    unsigned __v3 = (v3); \
325    unsigned __v4 = (v4); \
326    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
327                                       (reg_enum), (reg_enum) + 3, 0xf) || \
328        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
329        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
330        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
331        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
332       gfx11_push_reg((reg), __v1, prefix_name, buffer, reg_count); \
333       gfx11_push_reg((reg) + 4, __v2, prefix_name, buffer, reg_count); \
334       gfx11_push_reg((reg) + 8, __v3, prefix_name, buffer, reg_count); \
335       gfx11_push_reg((reg) + 12, __v4, prefix_name, buffer, reg_count); \
336       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
337                                    (reg_enum), (reg_enum) + 3); \
338       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
339       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
340       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
341       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
342    } \
343 } while (0)
344 
345 /* GFX11 packet building helpers for buffered SH registers. */
346 #define gfx11_push_gfx_sh_reg(reg, value) \
347    gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
348                   sctx->num_buffered_gfx_sh_regs)
349 
350 #define gfx11_push_compute_sh_reg(reg, value) \
351    gfx11_push_reg(reg, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
352                   sctx->num_buffered_compute_sh_regs)
353 
354 #define gfx11_opt_push_gfx_sh_reg(reg, reg_enum, value) \
355    gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_gfx_sh_regs, \
356                       sctx->num_buffered_gfx_sh_regs)
357 
358 #define gfx11_opt_push_compute_sh_reg(reg, reg_enum, value) \
359    gfx11_opt_push_reg(reg, reg_enum, value, SI_SH, sctx->gfx11.buffered_compute_sh_regs, \
360                       sctx->num_buffered_compute_sh_regs)
361 
362 /* GFX11 packet building helpers for SET_CONTEXT_REG_PAIRS_PACKED.
363  * Registers are buffered on the stack and then copied to the command buffer at the end.
364  */
365 #define gfx11_begin_packed_context_regs() \
366    struct gfx11_reg_pair __cs_context_regs[50]; \
367    unsigned __cs_context_reg_count = 0;
368 
369 #define gfx11_set_context_reg(reg, value) \
370    gfx11_push_reg(reg, value, SI_CONTEXT, __cs_context_regs, __cs_context_reg_count)
371 
372 #define gfx11_opt_set_context_reg(reg, reg_enum, value) \
373    gfx11_opt_push_reg(reg, reg_enum, value, SI_CONTEXT, __cs_context_regs, \
374                       __cs_context_reg_count)
375 
376 #define gfx11_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
377    gfx11_opt_push_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT, __cs_context_regs, \
378                        __cs_context_reg_count)
379 
380 #define gfx11_end_packed_context_regs() do { \
381    if (__cs_context_reg_count >= 2) { \
382       /* Align the count to 2 by duplicating the first register. */ \
383       if (__cs_context_reg_count % 2 == 1) { \
384          gfx11_set_context_reg(SI_CONTEXT_REG_OFFSET + __cs_context_regs[0].reg_offset[0] * 4, \
385                                __cs_context_regs[0].reg_value[0]); \
386       } \
387       assert(__cs_context_reg_count % 2 == 0); \
388       unsigned __num_dw = (__cs_context_reg_count / 2) * 3; \
389       radeon_emit(PKT3(PKT3_SET_CONTEXT_REG_PAIRS_PACKED, __num_dw, 0) | PKT3_RESET_FILTER_CAM_S(1)); \
390       radeon_emit(__cs_context_reg_count); \
391       radeon_emit_array(__cs_context_regs, __num_dw); \
392    } else if (__cs_context_reg_count == 1) { \
393       radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
394       radeon_emit(__cs_context_regs[0].reg_offset[0]); \
395       radeon_emit(__cs_context_regs[0].reg_value[0]); \
396    } \
397 } while (0)
398 
399 /* GFX12 generic packet building helpers for PAIRS packets. Don't use these directly. */
400 #define gfx12_begin_regs(header) unsigned header = __cs_num++
401 
402 #define gfx12_set_reg(reg, value, base_offset) do { \
403    radeon_emit(((reg) - (base_offset)) >> 2); \
404    radeon_emit(value); \
405 } while (0)
406 
407 #define gfx12_opt_set_reg(reg, reg_enum, value, base_offset) do { \
408    unsigned __value = value; \
409    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
410        sctx->tracked_regs.reg_value[reg_enum] != __value) { \
411       gfx12_set_reg(reg, __value, base_offset); \
412       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
413       sctx->tracked_regs.reg_value[reg_enum] = __value; \
414    } \
415 } while (0)
416 
417 #define gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, base_offset) do { \
418    unsigned __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4); \
419    if (!BITSET_TEST_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
420                                       (reg_enum), (reg_enum) + 3, 0xf) || \
421        sctx->tracked_regs.reg_value[(reg_enum)] != __v1 || \
422        sctx->tracked_regs.reg_value[(reg_enum) + 1] != __v2 || \
423        sctx->tracked_regs.reg_value[(reg_enum) + 2] != __v3 || \
424        sctx->tracked_regs.reg_value[(reg_enum) + 3] != __v4) { \
425       gfx12_set_reg((reg), __v1, (base_offset)); \
426       gfx12_set_reg((reg) + 4, __v2, (base_offset)); \
427       gfx12_set_reg((reg) + 8, __v3, (base_offset)); \
428       gfx12_set_reg((reg) + 12, __v4, (base_offset)); \
429       BITSET_SET_RANGE_INSIDE_WORD(sctx->tracked_regs.reg_saved_mask, \
430                                    (reg_enum), (reg_enum) + 3); \
431       sctx->tracked_regs.reg_value[(reg_enum)] = __v1; \
432       sctx->tracked_regs.reg_value[(reg_enum) + 1] = __v2; \
433       sctx->tracked_regs.reg_value[(reg_enum) + 2] = __v3; \
434       sctx->tracked_regs.reg_value[(reg_enum) + 3] = __v4; \
435    } \
436 } while (0)
437 
438 #define gfx12_end_regs(header, packet) do { \
439    if ((header) + 1 == __cs_num) { \
440       __cs_num--; /* no registers have been set, back off */ \
441    } else { \
442       unsigned __dw_count = __cs_num - (header) - 2; \
443       __cs_buf[(header)] = PKT3((packet), __dw_count, 0) | PKT3_RESET_FILTER_CAM_S(1); \
444    } \
445 } while (0)
446 
447 /* GFX12 generic packet building helpers for buffered registers. Don't use these directly. */
448 #define gfx12_push_reg(reg, value, base_offset, type) do { \
449    unsigned __i = sctx->num_buffered_##type##_regs++; \
450    assert(__i < ARRAY_SIZE(sctx->gfx12.buffered_##type##_regs)); \
451    sctx->gfx12.buffered_##type##_regs[__i].reg_offset = ((reg) - (base_offset)) >> 2; \
452    sctx->gfx12.buffered_##type##_regs[__i].reg_value = value; \
453 } while (0)
454 
455 #define gfx12_opt_push_reg(reg, reg_enum, value, type) do { \
456    unsigned __value = value; \
457    unsigned __reg_enum = reg_enum; \
458    if (!BITSET_TEST(sctx->tracked_regs.reg_saved_mask, (reg_enum)) || \
459        sctx->tracked_regs.reg_value[__reg_enum] != __value) { \
460       gfx12_push_##type##_reg(reg, __value); \
461       BITSET_SET(sctx->tracked_regs.reg_saved_mask, (reg_enum)); \
462       sctx->tracked_regs.reg_value[__reg_enum] = __value; \
463    } \
464 } while (0)
465 
466 /* GFX12 packet building helpers for PAIRS packets. */
467 #define gfx12_begin_context_regs() \
468    gfx12_begin_regs(__cs_context_reg_header)
469 
470 #define gfx12_set_context_reg(reg, value) \
471    gfx12_set_reg(reg, value, SI_CONTEXT_REG_OFFSET)
472 
473 #define gfx12_opt_set_context_reg(reg, reg_enum, value) \
474    gfx12_opt_set_reg(reg, reg_enum, value, SI_CONTEXT_REG_OFFSET)
475 
476 #define gfx12_opt_set_context_reg4(reg, reg_enum, v1, v2, v3, v4) \
477    gfx12_opt_set_reg4(reg, reg_enum, v1, v2, v3, v4, SI_CONTEXT_REG_OFFSET)
478 
479 #define gfx12_end_context_regs() \
480    gfx12_end_regs(__cs_context_reg_header, PKT3_SET_CONTEXT_REG_PAIRS)
481 
482 /* GFX12 packet building helpers for buffered registers. */
483 #define gfx12_push_gfx_sh_reg(reg, value) \
484    gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, gfx_sh)
485 
486 #define gfx12_push_compute_sh_reg(reg, value) \
487    gfx12_push_reg(reg, value, SI_SH_REG_OFFSET, compute_sh)
488 
489 #define gfx12_opt_push_gfx_sh_reg(reg, reg_enum, value) \
490    gfx12_opt_push_reg(reg, reg_enum, value, gfx_sh)
491 
492 #define gfx12_opt_push_compute_sh_reg(reg, reg_enum, value) \
493    gfx12_opt_push_reg(reg, reg_enum, value, compute_sh)
494 
495 #define radeon_set_or_push_gfx_sh_reg(reg, value) do { \
496    if (GFX_VERSION >= GFX12) { \
497       gfx12_push_gfx_sh_reg(reg, value); \
498    } else if (GFX_VERSION >= GFX11 && HAS_SH_PAIRS_PACKED) { \
499       gfx11_push_gfx_sh_reg(reg, value); \
500    } else { \
501       radeon_set_sh_reg_seq(reg, 1); \
502       radeon_emit(value); \
503    } \
504 } while (0)
505 
506 /* Other packet helpers. */
507 #define radeon_event_write(event_type) do { \
508    unsigned __event_type = (event_type); \
509    radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); \
510    radeon_emit(EVENT_TYPE(__event_type) | \
511                EVENT_INDEX(__event_type == V_028A90_VS_PARTIAL_FLUSH || \
512                            __event_type == V_028A90_PS_PARTIAL_FLUSH || \
513                            __event_type == V_028A90_CS_PARTIAL_FLUSH ? 4 : \
514                            __event_type == V_028A90_PIXEL_PIPE_STAT_CONTROL ? 1 : 0)); \
515 } while (0)
516 
517 /* This should be evaluated at compile time if all parameters are constants. */
518 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum amd_gfx_level gfx_level,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)519 si_get_user_data_base(enum amd_gfx_level gfx_level, enum si_has_tess has_tess,
520                       enum si_has_gs has_gs, enum si_has_ngg ngg,
521                       enum pipe_shader_type shader)
522 {
523    switch (shader) {
524    case PIPE_SHADER_VERTEX:
525       /* VS can be bound as VS, ES, LS, or GS. */
526       if (has_tess) {
527          if (gfx_level >= GFX10) {
528             return R_00B430_SPI_SHADER_USER_DATA_HS_0;
529          } else if (gfx_level == GFX9) {
530             return R_00B430_SPI_SHADER_USER_DATA_LS_0;
531          } else {
532             return R_00B530_SPI_SHADER_USER_DATA_LS_0;
533          }
534       } else if (gfx_level >= GFX10) {
535          if (ngg || has_gs) {
536             return R_00B230_SPI_SHADER_USER_DATA_GS_0;
537          } else {
538             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
539          }
540       } else if (has_gs) {
541          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
542       } else {
543          return R_00B130_SPI_SHADER_USER_DATA_VS_0;
544       }
545 
546    case PIPE_SHADER_TESS_CTRL:
547       if (gfx_level == GFX9) {
548          return R_00B430_SPI_SHADER_USER_DATA_LS_0;
549       } else {
550          return R_00B430_SPI_SHADER_USER_DATA_HS_0;
551       }
552 
553    case PIPE_SHADER_TESS_EVAL:
554       /* TES can be bound as ES, VS, or not bound. */
555       if (has_tess) {
556          if (gfx_level >= GFX10) {
557             if (ngg || has_gs) {
558                return R_00B230_SPI_SHADER_USER_DATA_GS_0;
559             } else {
560                return R_00B130_SPI_SHADER_USER_DATA_VS_0;
561             }
562          } else if (has_gs) {
563             return R_00B330_SPI_SHADER_USER_DATA_ES_0;
564          } else {
565             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
566          }
567       } else {
568          return 0;
569       }
570 
571    case PIPE_SHADER_GEOMETRY:
572       if (gfx_level == GFX9) {
573          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
574       } else {
575          return R_00B230_SPI_SHADER_USER_DATA_GS_0;
576       }
577 
578    default:
579       assert(0);
580       return 0;
581    }
582 }
583 
584 #endif
585