xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_cs.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #ifndef RADV_CS_H
9 #define RADV_CS_H
10 
11 #include <assert.h>
12 #include <stdint.h>
13 #include <string.h>
14 
15 #include "radv_cmd_buffer.h"
16 #include "radv_radeon_winsys.h"
17 #include "sid.h"
18 
19 static inline unsigned
radeon_check_space(struct radeon_winsys * ws,struct radeon_cmdbuf * cs,unsigned needed)20 radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
21 {
22    assert(cs->cdw <= cs->reserved_dw);
23    if (cs->max_dw - cs->cdw < needed)
24       ws->cs_grow(cs, needed);
25    cs->reserved_dw = MAX2(cs->reserved_dw, cs->cdw + needed);
26    return cs->cdw + needed;
27 }
28 
29 static inline void
radeon_set_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num,unsigned idx,unsigned base_reg_offset,unsigned packet,bool reset_filter_cam)30 radeon_set_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num, unsigned idx, unsigned base_reg_offset,
31                    unsigned packet, bool reset_filter_cam)
32 {
33    assert(cs->cdw + 2 + num <= cs->reserved_dw);
34    assert(num);
35    radeon_emit(cs, PKT3(packet, num, 0) | PKT3_RESET_FILTER_CAM_S(reset_filter_cam));
36    radeon_emit(cs, ((reg - base_reg_offset) >> 2) | (idx << 28));
37 }
38 
39 static inline void
radeon_set_config_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)40 radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
41 {
42    assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
43    radeon_set_reg_seq(cs, reg, num, 0, SI_CONFIG_REG_OFFSET, PKT3_SET_CONFIG_REG, false);
44 }
45 
46 static inline void
radeon_set_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)47 radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
48 {
49    radeon_set_config_reg_seq(cs, reg, 1);
50    radeon_emit(cs, value);
51 }
52 
53 static inline void
radeon_set_context_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)54 radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
55 {
56    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
57    radeon_set_reg_seq(cs, reg, num, 0, SI_CONTEXT_REG_OFFSET, PKT3_SET_CONTEXT_REG, false);
58 }
59 
60 static inline void
radeon_set_context_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)61 radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
62 {
63    radeon_set_context_reg_seq(cs, reg, 1);
64    radeon_emit(cs, value);
65 }
66 
67 static inline void
radeon_set_context_reg_idx(struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)68 radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
69 {
70    assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
71    radeon_set_reg_seq(cs, reg, 1, idx, SI_CONTEXT_REG_OFFSET, PKT3_SET_CONTEXT_REG, false);
72    radeon_emit(cs, value);
73 }
74 
75 static inline void
radeon_set_sh_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)76 radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
77 {
78    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
79    radeon_set_reg_seq(cs, reg, num, 0, SI_SH_REG_OFFSET, PKT3_SET_SH_REG, false);
80 }
81 
82 static inline void
radeon_set_sh_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)83 radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
84 {
85    radeon_set_sh_reg_seq(cs, reg, 1);
86    radeon_emit(cs, value);
87 }
88 
89 static inline void
radeon_set_sh_reg_idx(const struct radeon_info * info,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)90 radeon_set_sh_reg_idx(const struct radeon_info *info, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
91                       unsigned value)
92 {
93    assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
94    assert(idx);
95 
96    unsigned opcode = PKT3_SET_SH_REG_INDEX;
97    if (info->gfx_level < GFX10)
98       opcode = PKT3_SET_SH_REG;
99 
100    radeon_set_reg_seq(cs, reg, 1, idx, SI_SH_REG_OFFSET, opcode, false);
101    radeon_emit(cs, value);
102 }
103 
104 static inline void
radeon_set_uconfig_reg_seq(struct radeon_cmdbuf * cs,unsigned reg,unsigned num)105 radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
106 {
107    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
108    radeon_set_reg_seq(cs, reg, num, 0, CIK_UCONFIG_REG_OFFSET, PKT3_SET_UCONFIG_REG, false);
109 }
110 
111 static inline void
radeon_set_uconfig_perfctr_reg_seq(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned num)112 radeon_set_uconfig_perfctr_reg_seq(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
113                                    unsigned reg, unsigned num)
114 {
115    /*
116     * On GFX10, there is a bug with the ME implementation of its content addressable memory (CAM),
117     * that means that it can skip register writes due to not taking correctly into account the
118     * fields from the GRBM_GFX_INDEX. With this bit we can force the write.
119     */
120    const bool filter_cam_workaround = gfx_level >= GFX10 && qf == RADV_QUEUE_GENERAL;
121 
122    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
123    radeon_set_reg_seq(cs, reg, num, 0, CIK_UCONFIG_REG_OFFSET, PKT3_SET_UCONFIG_REG, filter_cam_workaround);
124 }
125 
126 static inline void
radeon_set_uconfig_perfctr_reg(enum amd_gfx_level gfx_level,enum radv_queue_family qf,struct radeon_cmdbuf * cs,unsigned reg,unsigned value)127 radeon_set_uconfig_perfctr_reg(enum amd_gfx_level gfx_level, enum radv_queue_family qf, struct radeon_cmdbuf *cs,
128                                unsigned reg, unsigned value)
129 {
130    radeon_set_uconfig_perfctr_reg_seq(gfx_level, qf, cs, reg, 1);
131    radeon_emit(cs, value);
132 }
133 
134 static inline void
radeon_set_uconfig_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)135 radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
136 {
137    radeon_set_uconfig_reg_seq(cs, reg, 1);
138    radeon_emit(cs, value);
139 }
140 
141 static inline void
radeon_set_uconfig_reg_idx(const struct radeon_info * info,struct radeon_cmdbuf * cs,unsigned reg,unsigned idx,unsigned value)142 radeon_set_uconfig_reg_idx(const struct radeon_info *info, struct radeon_cmdbuf *cs, unsigned reg, unsigned idx,
143                            unsigned value)
144 {
145    assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
146    assert(idx);
147 
148    unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
149    if (info->gfx_level < GFX9 || (info->gfx_level == GFX9 && info->me_fw_version < 26))
150       opcode = PKT3_SET_UCONFIG_REG;
151 
152    radeon_set_reg_seq(cs, reg, 1, idx, CIK_UCONFIG_REG_OFFSET, opcode, false);
153    radeon_emit(cs, value);
154 }
155 
156 static inline void
radeon_set_privileged_config_reg(struct radeon_cmdbuf * cs,unsigned reg,unsigned value)157 radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
158 {
159    assert(reg < CIK_UCONFIG_REG_OFFSET);
160    assert(cs->cdw + 6 <= cs->reserved_dw);
161 
162    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
163    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
164    radeon_emit(cs, value);
165    radeon_emit(cs, 0); /* unused */
166    radeon_emit(cs, reg >> 2);
167    radeon_emit(cs, 0); /* unused */
168 }
169 
170 #define radeon_opt_set_context_reg(cmdbuf, reg, reg_enum, value)                                                       \
171    do {                                                                                                                \
172       struct radv_cmd_buffer *__cmdbuf = (cmdbuf);                                                                     \
173       struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs;                                              \
174       const uint32_t __value = (value);                                                                                \
175       if (!BITSET_TEST(__tracked_regs->reg_saved_mask, (reg_enum)) ||                                                  \
176           __tracked_regs->reg_value[(reg_enum)] != __value) {                                                          \
177          radeon_set_context_reg(__cmdbuf->cs, reg, __value);                                                           \
178          BITSET_SET(__tracked_regs->reg_saved_mask, (reg_enum));                                                       \
179          __tracked_regs->reg_value[(reg_enum)] = __value;                                                              \
180          __cmdbuf->state.context_roll_without_scissor_emitted = true;                                                  \
181       }                                                                                                                \
182    } while (0)
183 
184 #define radeon_opt_set_context_reg2(cmdbuf, reg, reg_enum, v1, v2)                                                     \
185    do {                                                                                                                \
186       struct radv_cmd_buffer *__cmdbuf = (cmdbuf);                                                                     \
187       struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs;                                              \
188       const uint32_t __v1 = (v1), __v2 = (v2);                                                                         \
189       if (!BITSET_TEST_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 1, 0x3) ||           \
190           __tracked_regs->reg_value[(reg_enum)] != __v1 || __tracked_regs->reg_value[(reg_enum) + 1] != __v2) {        \
191          radeon_set_context_reg_seq(cmdbuf->cs, reg, 2);                                                               \
192          radeon_emit(cmdbuf->cs, __v1);                                                                                \
193          radeon_emit(cmdbuf->cs, __v2);                                                                                \
194          BITSET_SET_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 1);                     \
195          __tracked_regs->reg_value[(reg_enum)] = __v1;                                                                 \
196          __tracked_regs->reg_value[(reg_enum) + 1] = __v2;                                                             \
197          cmdbuf->state.context_roll_without_scissor_emitted = true;                                                    \
198       }                                                                                                                \
199    } while (0)
200 
201 #define radeon_opt_set_context_reg3(cmdbuf, reg, reg_enum, v1, v2, v3)                                                 \
202    do {                                                                                                                \
203       struct radv_cmd_buffer *__cmdbuf = (cmdbuf);                                                                     \
204       struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs;                                              \
205       const uint32_t __v1 = (v1), __v2 = (v2), __v3 = (v3);                                                            \
206       if (!BITSET_TEST_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2, 0x7) ||           \
207           __tracked_regs->reg_value[(reg_enum)] != __v1 || __tracked_regs->reg_value[(reg_enum) + 1] != __v2 ||        \
208           __tracked_regs->reg_value[(reg_enum) + 2] != __v3) {                                                         \
209          radeon_set_context_reg_seq(cmdbuf->cs, reg, 3);                                                               \
210          radeon_emit(cmdbuf->cs, __v1);                                                                                \
211          radeon_emit(cmdbuf->cs, __v2);                                                                                \
212          radeon_emit(cmdbuf->cs, __v3);                                                                                \
213          BITSET_SET_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 2);                     \
214          __tracked_regs->reg_value[(reg_enum)] = __v1;                                                                 \
215          __tracked_regs->reg_value[(reg_enum) + 1] = __v2;                                                             \
216          __tracked_regs->reg_value[(reg_enum) + 2] = __v3;                                                             \
217          cmdbuf->state.context_roll_without_scissor_emitted = true;                                                    \
218       }                                                                                                                \
219    } while (0)
220 
221 #define radeon_opt_set_context_reg4(cmdbuf, reg, reg_enum, v1, v2, v3, v4)                                             \
222    do {                                                                                                                \
223       struct radv_cmd_buffer *__cmdbuf = (cmdbuf);                                                                     \
224       struct radv_tracked_regs *__tracked_regs = &__cmdbuf->tracked_regs;                                              \
225       const uint32_t __v1 = (v1), __v2 = (v2), __v3 = (v3), __v4 = (v4);                                               \
226       if (!BITSET_TEST_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 3, 0xf) ||           \
227           __tracked_regs->reg_value[(reg_enum)] != __v1 || __tracked_regs->reg_value[(reg_enum) + 1] != __v2 ||        \
228           __tracked_regs->reg_value[(reg_enum) + 2] != __v3 || __tracked_regs->reg_value[(reg_enum) + 3] != __v4) {    \
229          radeon_set_context_reg_seq(cmdbuf->cs, reg, 4);                                                               \
230          radeon_emit(cmdbuf->cs, __v1);                                                                                \
231          radeon_emit(cmdbuf->cs, __v2);                                                                                \
232          radeon_emit(cmdbuf->cs, __v3);                                                                                \
233          radeon_emit(cmdbuf->cs, __v4);                                                                                \
234          BITSET_SET_RANGE_INSIDE_WORD(__tracked_regs->reg_saved_mask, (reg_enum), (reg_enum) + 3);                     \
235          __tracked_regs->reg_value[(reg_enum)] = __v1;                                                                 \
236          __tracked_regs->reg_value[(reg_enum) + 1] = __v2;                                                             \
237          __tracked_regs->reg_value[(reg_enum) + 2] = __v3;                                                             \
238          __tracked_regs->reg_value[(reg_enum) + 3] = __v4;                                                             \
239          cmdbuf->state.context_roll_without_scissor_emitted = true;                                                    \
240       }                                                                                                                \
241    } while (0)
242 
243 #define radeon_opt_set_context_regn(cmdbuf, reg, values, saved_values, num)                                            \
244    do {                                                                                                                \
245       struct radv_cmd_buffer *__cmdbuf = (cmdbuf);                                                                     \
246       if (memcmp(values, saved_values, sizeof(uint32_t) * (num))) {                                                    \
247          radeon_set_context_reg_seq(cmdbuf->cs, reg, num);                                                             \
248          radeon_emit_array(cmdbuf->cs, values, num);                                                                   \
249          memcpy(saved_values, values, sizeof(uint32_t) * (num));                                                       \
250          __cmdbuf->state.context_roll_without_scissor_emitted = true;                                                  \
251       }                                                                                                                \
252    } while (0)
253 
254 ALWAYS_INLINE static void
radv_cp_wait_mem(struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const uint32_t op,const uint64_t va,const uint32_t ref,const uint32_t mask)255 radv_cp_wait_mem(struct radeon_cmdbuf *cs, const enum radv_queue_family qf, const uint32_t op, const uint64_t va,
256                  const uint32_t ref, const uint32_t mask)
257 {
258    assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL || op == WAIT_REG_MEM_GREATER_OR_EQUAL);
259 
260    if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
261       radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
262       radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
263       radeon_emit(cs, va);
264       radeon_emit(cs, va >> 32);
265       radeon_emit(cs, ref);  /* reference value */
266       radeon_emit(cs, mask); /* mask */
267       radeon_emit(cs, 4);    /* poll interval */
268    } else if (qf == RADV_QUEUE_TRANSFER) {
269       radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_POLL_REGMEM, 0, 0) | op << 28 | SDMA_POLL_MEM);
270       radeon_emit(cs, va);
271       radeon_emit(cs, va >> 32);
272       radeon_emit(cs, ref);
273       radeon_emit(cs, mask);
274       radeon_emit(cs, SDMA_POLL_INTERVAL_160_CLK | SDMA_POLL_RETRY_INDEFINITELY << 16);
275    } else {
276       unreachable("unsupported queue family");
277    }
278 }
279 
280 ALWAYS_INLINE static unsigned
radv_cs_write_data_head(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const bool predicating)281 radv_cs_write_data_head(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
282                         const unsigned engine_sel, const uint64_t va, const unsigned count, const bool predicating)
283 {
284    /* Return the correct cdw at the end of the packet so the caller can assert it. */
285    const unsigned cdw_end = radeon_check_space(device->ws, cs, 4 + count);
286 
287    if (qf == RADV_QUEUE_GENERAL || qf == RADV_QUEUE_COMPUTE) {
288       radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, predicating));
289       radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
290       radeon_emit(cs, va);
291       radeon_emit(cs, va >> 32);
292    } else if (qf == RADV_QUEUE_TRANSFER) {
293       /* Vulkan transfer queues don't support conditional rendering, so we can ignore predication here.
294        * Furthermore, we can ignore the engine selection here, it is meaningless to the SDMA.
295        */
296       radeon_emit(cs, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
297       radeon_emit(cs, va);
298       radeon_emit(cs, va >> 32);
299       radeon_emit(cs, count - 1);
300    } else {
301       unreachable("unsupported queue family");
302    }
303 
304    return cdw_end;
305 }
306 
307 ALWAYS_INLINE static void
radv_cs_write_data(const struct radv_device * device,struct radeon_cmdbuf * cs,const enum radv_queue_family qf,const unsigned engine_sel,const uint64_t va,const unsigned count,const uint32_t * dwords,const bool predicating)308 radv_cs_write_data(const struct radv_device *device, struct radeon_cmdbuf *cs, const enum radv_queue_family qf,
309                    const unsigned engine_sel, const uint64_t va, const unsigned count, const uint32_t *dwords,
310                    const bool predicating)
311 {
312    ASSERTED const unsigned cdw_end = radv_cs_write_data_head(device, cs, qf, engine_sel, va, count, predicating);
313    radeon_emit_array(cs, dwords, count);
314    assert(cs->cdw == cdw_end);
315 }
316 
317 void radv_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, enum radv_queue_family qf,
318                                   unsigned event, unsigned event_flags, unsigned dst_sel, unsigned data_sel,
319                                   uint64_t va, uint32_t new_fence, uint64_t gfx9_eop_bug_va);
320 
321 void radv_cs_emit_cache_flush(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level,
322                               uint32_t *flush_cnt, uint64_t flush_va, enum radv_queue_family qf,
323                               enum radv_cmd_flush_bits flush_bits, enum rgp_flush_bits *sqtt_flush_bits,
324                               uint64_t gfx9_eop_bug_va);
325 
326 void radv_emit_cond_exec(const struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va, uint32_t count);
327 
328 void radv_cs_write_data_imm(struct radeon_cmdbuf *cs, unsigned engine_sel, uint64_t va, uint32_t imm);
329 
330 #endif /* RADV_CS_H */
331