1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * Copyright 2024 Valve Corporation
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 #include "ac_cmdbuf.h"
9 #include "ac_pm4.h"
10 #include "ac_shader_util.h"
11
12 #include "sid.h"
13
14 #include "util/u_math.h"
15
16 #define SI_GS_PER_ES 128
17
18 static void
gfx6_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)19 gfx6_init_compute_preamble_state(const struct ac_preamble_state *state,
20 struct ac_pm4_state *pm4)
21 {
22 const struct radeon_info *info = pm4->info;
23 const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
24 S_00B858_SH1_CU_EN(info->spi_cu_en);
25
26 ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
27
28 for (unsigned i = 0; i < 2; ++i)
29 ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4,
30 i < info->max_se ? compute_cu_en : 0x0);
31
32 if (info->gfx_level >= GFX7) {
33 for (unsigned i = 2; i < 4; ++i)
34 ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 + (i - 2) * 4,
35 i < info->max_se ? compute_cu_en : 0x0);
36 }
37
38 if (info->gfx_level >= GFX9)
39 ac_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0);
40
41 /* Set the pointer to border colors. */
42 if (info->gfx_level >= GFX7) {
43 ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
44 ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI,
45 S_030E04_ADDRESS(state->border_color_va >> 40));
46 } else if (info->gfx_level == GFX6) {
47 ac_pm4_set_reg(pm4, R_00950C_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
48 }
49 }
50
51 static void
gfx10_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)52 gfx10_init_compute_preamble_state(const struct ac_preamble_state *state,
53 struct ac_pm4_state *pm4)
54 {
55 const struct radeon_info *info = pm4->info;
56 const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
57 S_00B858_SH1_CU_EN(info->spi_cu_en);
58
59 if (info->gfx_level < GFX11)
60 ac_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 0x20);
61 ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
62 ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(state->border_color_va >> 40));
63
64 ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
65
66 for (unsigned i = 0; i < 2; ++i)
67 ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 + i * 4,
68 i < info->max_se ? compute_cu_en : 0x0);
69
70 for (unsigned i = 2; i < 4; ++i)
71 ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2 + (i - 2) * 4,
72 i < info->max_se ? compute_cu_en : 0x0);
73
74 ac_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
75 ac_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
76 ac_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0);
77 ac_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
78
79 if (info->gfx_level >= GFX11) {
80 for (unsigned i = 4; i < 8; ++i)
81 ac_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4 + (i - 4) * 4,
82 i < info->max_se ? compute_cu_en : 0x0);
83
84 /* How many threads should go to 1 SE before moving onto the next. Think of GL1 cache hits.
85 * Only these values are valid: 0 (disabled), 64, 128, 256, 512
86 * Recommendation: 64 = RT, 256 = non-RT (run benchmarks to be sure)
87 */
88 ac_pm4_set_reg(pm4, R_00B8BC_COMPUTE_DISPATCH_INTERLEAVE,
89 S_00B8BC_INTERLEAVE(state->gfx11.compute_dispatch_interleave));
90 }
91
92 ac_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
93 }
94
95 static void
gfx12_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)96 gfx12_init_compute_preamble_state(const struct ac_preamble_state *state,
97 struct ac_pm4_state *pm4)
98 {
99 const struct radeon_info *info = pm4->info;
100 const uint32_t compute_cu_en = S_00B858_SH0_CU_EN(info->spi_cu_en) |
101 S_00B858_SH1_CU_EN(info->spi_cu_en);
102 const uint32_t num_se = info->max_se;
103
104 ac_pm4_set_reg(pm4, R_030E00_TA_CS_BC_BASE_ADDR, state->border_color_va >> 8);
105 ac_pm4_set_reg(pm4, R_030E04_TA_CS_BC_BASE_ADDR_HI, S_030E04_ADDRESS(state->border_color_va >> 40));
106
107 ac_pm4_set_reg(pm4, R_00B82C_COMPUTE_PERFCOUNT_ENABLE, 0);
108 ac_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, S_00B834_DATA(info->address32_hi >> 8));
109 ac_pm4_set_reg(pm4, R_00B838_COMPUTE_DISPATCH_PKT_ADDR_LO, 0);
110 ac_pm4_set_reg(pm4, R_00B83C_COMPUTE_DISPATCH_PKT_ADDR_HI, 0);
111 ac_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, compute_cu_en);
112 ac_pm4_set_reg(pm4, R_00B85C_COMPUTE_STATIC_THREAD_MGMT_SE1, num_se > 1 ? compute_cu_en : 0);
113 ac_pm4_set_reg(pm4, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, num_se > 2 ? compute_cu_en : 0);
114 ac_pm4_set_reg(pm4, R_00B868_COMPUTE_STATIC_THREAD_MGMT_SE3, num_se > 3 ? compute_cu_en : 0);
115 ac_pm4_set_reg(pm4, R_00B88C_COMPUTE_STATIC_THREAD_MGMT_SE8, num_se > 8 ? compute_cu_en : 0);
116 ac_pm4_set_reg(pm4, R_00B890_COMPUTE_USER_ACCUM_0, 0);
117 ac_pm4_set_reg(pm4, R_00B894_COMPUTE_USER_ACCUM_1, 0);
118 ac_pm4_set_reg(pm4, R_00B898_COMPUTE_USER_ACCUM_2, 0);
119 ac_pm4_set_reg(pm4, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
120 ac_pm4_set_reg(pm4, R_00B8AC_COMPUTE_STATIC_THREAD_MGMT_SE4, num_se > 4 ? compute_cu_en : 0);
121 ac_pm4_set_reg(pm4, R_00B8B0_COMPUTE_STATIC_THREAD_MGMT_SE5, num_se > 5 ? compute_cu_en : 0);
122 ac_pm4_set_reg(pm4, R_00B8B4_COMPUTE_STATIC_THREAD_MGMT_SE6, num_se > 6 ? compute_cu_en : 0);
123 ac_pm4_set_reg(pm4, R_00B8B8_COMPUTE_STATIC_THREAD_MGMT_SE7, num_se > 7 ? compute_cu_en : 0);
124 ac_pm4_set_reg(pm4, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
125 }
126
127 void
ac_init_compute_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)128 ac_init_compute_preamble_state(const struct ac_preamble_state *state,
129 struct ac_pm4_state *pm4)
130 {
131 const struct radeon_info *info = pm4->info;
132
133 if (info->gfx_level >= GFX12) {
134 gfx12_init_compute_preamble_state(state, pm4);
135 } else if (info->gfx_level >= GFX10) {
136 gfx10_init_compute_preamble_state(state, pm4);
137 } else {
138 gfx6_init_compute_preamble_state(state, pm4);
139 }
140 }
141
142 static void
ac_set_grbm_gfx_index(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned value)143 ac_set_grbm_gfx_index(const struct radeon_info *info, struct ac_pm4_state *pm4, unsigned value)
144 {
145 const unsigned reg = info->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX;
146 ac_pm4_set_reg(pm4, reg, value);
147 }
148
149 static void
ac_set_grbm_gfx_index_se(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned se)150 ac_set_grbm_gfx_index_se(const struct radeon_info *info, struct ac_pm4_state *pm4, unsigned se)
151 {
152 assert(se == ~0 || se < info->max_se);
153 ac_set_grbm_gfx_index(info, pm4,
154 (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) |
155 S_030800_SH_BROADCAST_WRITES(1) |
156 S_030800_INSTANCE_BROADCAST_WRITES(1));
157 }
158
159 static void
ac_write_harvested_raster_configs(const struct radeon_info * info,struct ac_pm4_state * pm4,unsigned raster_config,unsigned raster_config_1)160 ac_write_harvested_raster_configs(const struct radeon_info *info, struct ac_pm4_state *pm4,
161 unsigned raster_config, unsigned raster_config_1)
162 {
163 const unsigned num_se = MAX2(info->max_se, 1);
164 unsigned raster_config_se[4];
165 unsigned se;
166
167 ac_get_harvested_configs(info, raster_config, &raster_config_1, raster_config_se);
168
169 for (se = 0; se < num_se; se++) {
170 ac_set_grbm_gfx_index_se(info, pm4, se);
171 ac_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
172 }
173 ac_set_grbm_gfx_index(info, pm4, ~0);
174
175 if (info->gfx_level >= GFX7) {
176 ac_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
177 }
178 }
179
180 static void
ac_set_raster_config(const struct radeon_info * info,struct ac_pm4_state * pm4)181 ac_set_raster_config(const struct radeon_info *info, struct ac_pm4_state *pm4)
182 {
183 const unsigned num_rb = MIN2(info->max_render_backends, 16);
184 const uint64_t rb_mask = info->enabled_rb_mask;
185 unsigned raster_config, raster_config_1;
186
187 ac_get_raster_config(info, &raster_config, &raster_config_1, NULL);
188
189 if (!rb_mask || util_bitcount64(rb_mask) >= num_rb) {
190 /* Always use the default config when all backends are enabled
191 * (or when we failed to determine the enabled backends).
192 */
193 ac_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config);
194 if (info->gfx_level >= GFX7)
195 ac_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
196 } else {
197 ac_write_harvested_raster_configs(info, pm4, raster_config, raster_config_1);
198 }
199 }
200
201 static void
gfx6_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)202 gfx6_init_graphics_preamble_state(const struct ac_preamble_state *state,
203 struct ac_pm4_state *pm4)
204 {
205 const struct radeon_info *info = pm4->info;
206
207 /* Graphics registers. */
208 /* CLEAR_STATE doesn't restore these correctly. */
209 ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
210 ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
211 S_028244_BR_X(16384) | S_028244_BR_Y(16384));
212
213 ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
214 if (!info->has_clear_state)
215 ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
216
217 if (!info->has_clear_state) {
218 ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
219 ac_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
220 ac_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
221 ac_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
222 ac_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
223 ac_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
224 ac_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
225 ac_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
226 }
227
228 ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
229 if (info->gfx_level >= GFX7)
230 ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
231
232 if (info->gfx_level == GFX6) {
233 ac_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
234 S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
235 }
236
237 if (info->gfx_level >= GFX7) {
238 ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
239 ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
240 } else {
241 ac_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0);
242 ac_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0);
243 }
244
245 /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
246 ac_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
247 S_02882C_XMAX_RIGHT_EXCLUSION(info->gfx_level >= GFX7) |
248 S_02882C_YMAX_BOTTOM_EXCLUSION(info->gfx_level >= GFX7));
249
250 if (info->gfx_level <= GFX7 || !info->has_clear_state) {
251 ac_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
252 ac_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
253
254 /* CLEAR_STATE doesn't clear these correctly on certain generations.
255 * I don't know why. Deduced by trial and error.
256 */
257 ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
258 ac_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
259 ac_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
260 }
261
262 if (info->gfx_level >= GFX7) {
263 ac_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
264 ac_apply_cu_en(S_00B01C_CU_EN(0xffffffff) |
265 S_00B01C_WAVE_LIMIT_GFX7(0x3F),
266 C_00B01C_CU_EN, 0, info));
267 }
268
269 if (info->gfx_level <= GFX8) {
270 ac_set_raster_config(info, pm4);
271
272 /* FIXME calculate these values somehow ??? */
273 ac_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
274 ac_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
275
276 /* These registers, when written, also overwrite the CLEAR_STATE
277 * context, so we can't rely on CLEAR_STATE setting them.
278 * It would be an issue if there was another UMD changing them.
279 */
280 ac_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
281 ac_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
282 ac_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
283 }
284
285 if (info->gfx_level == GFX9) {
286 ac_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS,
287 S_00B414_MEM_BASE(info->address32_hi >> 8));
288 ac_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES,
289 S_00B214_MEM_BASE(info->address32_hi >> 8));
290 } else {
291 ac_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
292 S_00B524_MEM_BASE(info->address32_hi >> 8));
293 }
294
295 if (info->gfx_level >= GFX7 && info->gfx_level <= GFX8) {
296 ac_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
297 ac_apply_cu_en(S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F),
298 C_00B51C_CU_EN, 0, info));
299 ac_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
300 ac_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
301 ac_apply_cu_en(S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F),
302 C_00B31C_CU_EN, 0, info));
303
304 /* If this is 0, Bonaire can hang even if GS isn't being used.
305 * Other chips are unaffected. These are suboptimal values,
306 * but we don't use on-chip GS.
307 */
308 ac_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
309 S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
310 }
311
312 if (info->gfx_level >= GFX8) {
313 unsigned vgt_tess_distribution;
314
315 if (info->gfx_level == GFX9) {
316 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(12) |
317 S_028B50_ACCUM_TRI(30) |
318 S_028B50_ACCUM_QUAD(24) |
319 S_028B50_DONUT_SPLIT_GFX9(24) |
320 S_028B50_TRAP_SPLIT(6);
321 } else {
322 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
323 S_028B50_ACCUM_TRI(11) |
324 S_028B50_ACCUM_QUAD(11) |
325 S_028B50_DONUT_SPLIT_GFX81(16);
326
327 /* Testing with Unigine Heaven extreme tessellation yielded best results
328 * with TRAP_SPLIT = 3.
329 */
330 if (info->family == CHIP_FIJI || info->family >= CHIP_POLARIS10)
331 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
332 }
333
334 ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
335 }
336
337 ac_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
338
339 if (info->gfx_level == GFX9) {
340 ac_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
341 ac_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
342 ac_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
343
344 ac_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
345
346 ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
347 ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
348 C_00B41C_CU_EN, 0, info));
349
350 ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
351 S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - 1) |
352 S_028C48_MAX_PRIM_PER_BATCH(1023));
353
354 ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
355 ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
356 }
357 }
358
359 static void
gfx10_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)360 gfx10_init_graphics_preamble_state(const struct ac_preamble_state *state,
361 struct ac_pm4_state *pm4)
362 {
363 const struct radeon_info *info = pm4->info;
364 unsigned meta_write_policy, meta_read_policy, color_write_policy, color_read_policy;
365 unsigned zs_write_policy, zs_read_policy;
366 unsigned cache_no_alloc = info->gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11:
367 V_02807C_CACHE_NOA_GFX10;
368
369 if (state->gfx10.cache_rb_gl2) {
370 color_write_policy = V_028410_CACHE_LRU_WR;
371 color_read_policy = V_028410_CACHE_LRU_RD;
372 zs_write_policy = V_02807C_CACHE_LRU_WR;
373 zs_read_policy = V_02807C_CACHE_LRU_RD;
374 meta_write_policy = V_02807C_CACHE_LRU_WR;
375 meta_read_policy = V_02807C_CACHE_LRU_RD;
376 } else {
377 color_write_policy = V_028410_CACHE_STREAM;
378 color_read_policy = cache_no_alloc;
379 zs_write_policy = V_02807C_CACHE_STREAM;
380 zs_read_policy = cache_no_alloc;
381
382 /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */
383 if (info->max_render_backends <= 4) {
384 meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
385 meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
386 } else {
387 meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
388 meta_read_policy = cache_no_alloc; /* don't cache reads that miss */
389 }
390 }
391
392 const unsigned cu_mask_ps = info->gfx_level >= GFX10_3 ? ac_gfx103_get_cu_mask_ps(info) : ~0u;
393 ac_pm4_set_reg_idx3(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
394 ac_apply_cu_en(S_00B01C_CU_EN(cu_mask_ps) |
395 S_00B01C_WAVE_LIMIT_GFX7(0x3F) |
396 S_00B01C_LDS_GROUP_SIZE_GFX11(info->gfx_level >= GFX11),
397 C_00B01C_CU_EN, 0, info));
398 ac_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
399 S_00B0C0_SOFT_GROUPING_EN(1) |
400 S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
401 ac_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
402 ac_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
403 ac_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
404 ac_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
405
406 if (info->gfx_level < GFX11) {
407 /* Shader registers - VS. */
408 ac_pm4_set_reg_idx3(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
409 ac_apply_cu_en(S_00B104_CU_EN(0xffff), /* CUs 16-31 */
410 C_00B104_CU_EN, 16, info));
411 ac_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
412 ac_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
413 ac_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
414 ac_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
415 ac_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
416
417 /* Shader registers - PS. */
418 unsigned cu_mask_ps = info->gfx_level >= GFX10_3 ? ac_gfx103_get_cu_mask_ps(info) : ~0u;
419 ac_pm4_set_reg_idx3(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
420 ac_apply_cu_en(S_00B004_CU_EN(cu_mask_ps >> 16), /* CUs 16-31 */
421 C_00B004_CU_EN, 16, info));
422
423 /* Shader registers - HS. */
424 ac_pm4_set_reg_idx3(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
425 ac_apply_cu_en(S_00B404_CU_EN(0xffff), /* CUs 16-31 */
426 C_00B404_CU_EN, 16, info));
427 }
428
429 /* Shader registers - GS. */
430 ac_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
431 ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
432 ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
433 ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
434 ac_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES,
435 S_00B324_MEM_BASE(info->address32_hi >> 8));
436
437 ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
438 ac_apply_cu_en(S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F),
439 C_00B41C_CU_EN, 0, info));
440 ac_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
441 ac_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
442 ac_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
443 ac_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
444 ac_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS,
445 S_00B524_MEM_BASE(info->address32_hi >> 8));
446
447 /* Context registers. */
448 if (info->gfx_level < GFX11) {
449 ac_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF));
450 }
451
452 ac_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,
453 S_02807C_Z_WR_POLICY(zs_write_policy) |
454 S_02807C_S_WR_POLICY(zs_write_policy) |
455 S_02807C_HTILE_WR_POLICY(meta_write_policy) |
456 S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | /* occlusion query writes */
457 S_02807C_Z_RD_POLICY(zs_read_policy) |
458 S_02807C_S_RD_POLICY(zs_read_policy) |
459 S_02807C_HTILE_RD_POLICY(meta_read_policy));
460 ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
461 ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
462
463 ac_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
464 (info->gfx_level >= GFX11 ?
465 S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) |
466 S_028410_COLOR_RD_POLICY(color_read_policy) |
467 S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) |
468 S_028410_DCC_RD_POLICY(meta_read_policy)
469 :
470 S_028410_COLOR_WR_POLICY_GFX10(color_write_policy) |
471 S_028410_COLOR_RD_POLICY(color_read_policy)) |
472 S_028410_FMASK_WR_POLICY(color_write_policy) |
473 S_028410_FMASK_RD_POLICY(color_read_policy) |
474 S_028410_CMASK_WR_POLICY(meta_write_policy) |
475 S_028410_CMASK_RD_POLICY(meta_read_policy) |
476 S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) |
477 S_028410_DCC_RD_POLICY(meta_read_policy));
478
479 if (info->gfx_level >= GFX10_3)
480 ac_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
481
482 /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
483 ac_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
484 S_02882C_XMAX_RIGHT_EXCLUSION(1) |
485 S_02882C_YMAX_BOTTOM_EXCLUSION(1));
486 ac_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
487 S_028830_SMALL_PRIM_FILTER_ENABLE(1));
488
489 ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
490 ac_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 1);
491 ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
492 info->gfx_level >= GFX11 ?
493 S_028B50_ACCUM_ISOLINE(128) |
494 S_028B50_ACCUM_TRI(128) |
495 S_028B50_ACCUM_QUAD(128) |
496 S_028B50_DONUT_SPLIT_GFX9(24) |
497 S_028B50_TRAP_SPLIT(6)
498 :
499 S_028B50_ACCUM_ISOLINE(12) |
500 S_028B50_ACCUM_TRI(30) |
501 S_028B50_ACCUM_QUAD(24) |
502 S_028B50_DONUT_SPLIT_GFX9(24) |
503 S_028B50_TRAP_SPLIT(6));
504
505 /* GFX11+ shouldn't subtract 1 from pbb_max_alloc_count. */
506 unsigned gfx10_one = info->gfx_level < GFX11;
507 ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
508 S_028C48_MAX_ALLOC_COUNT(info->pbb_max_alloc_count - gfx10_one) |
509 S_028C48_MAX_PRIM_PER_BATCH(1023));
510
511 if (info->gfx_level >= GFX11_5)
512 ac_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2,
513 S_028C54_ENABLE_PING_PONG_BIN_ORDER(1));
514
515 /* Break up a pixel wave if it contains deallocs for more than
516 * half the parameter cache.
517 *
518 * To avoid a deadlock where pixel waves aren't launched
519 * because they're waiting for more pixels while the frontend
520 * is stuck waiting for PC space, the maximum allowed value is
521 * the size of the PC minus the largest possible allocation for
522 * a single primitive shader subgroup.
523 */
524 ac_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL,
525 S_028C50_MAX_DEALLOCS_IN_WAVE(info->gfx_level >= GFX11 ? 16 : 512));
526 if (info->gfx_level < GFX11)
527 ac_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); /* Reuse for legacy (non-NGG) only. */
528
529 /* Uconfig registers. */
530 ac_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
531 ac_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
532 if (info->gfx_level >= GFX11) {
533 /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX
534 * here, which disables primitive restart for all non-indexed draws, so that those draws
535 * won't have to set this state.
536 */
537 ac_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1));
538 }
539 ac_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
540 ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
541 ac_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
542 ac_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
543
544 ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
545 ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
546
547 if (info->gfx_level >= GFX11) {
548 uint64_t rb_mask = BITFIELD64_MASK(info->max_render_backends);
549
550 ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
551 ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
552 ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
553 PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
554 PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
555 ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
556 }
557 }
558
559 static void
gfx12_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)560 gfx12_init_graphics_preamble_state(const struct ac_preamble_state *state,
561 struct ac_pm4_state *pm4)
562 {
563 const struct radeon_info *info = pm4->info;
564 unsigned color_write_policy, color_read_policy;
565 enum gfx12_store_temporal_hint color_write_temporal_hint, zs_write_temporal_hint;
566 enum gfx12_load_temporal_hint color_read_temporal_hint, zs_read_temporal_hint;
567
568 if (state->gfx10.cache_rb_gl2) {
569 color_write_policy = V_028410_CACHE_LRU_WR;
570 color_read_policy = V_028410_CACHE_LRU_RD;
571 color_write_temporal_hint = gfx12_store_regular_temporal;
572 color_read_temporal_hint = gfx12_load_regular_temporal;
573 zs_write_temporal_hint = gfx12_store_regular_temporal;
574 zs_read_temporal_hint = gfx12_load_regular_temporal;
575 } else {
576 color_write_policy = V_028410_CACHE_STREAM;
577 color_read_policy = V_02807C_CACHE_NOA_GFX11;
578 color_write_temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
579 color_read_temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
580 zs_write_temporal_hint = gfx12_store_near_non_temporal_far_regular_temporal;
581 zs_read_temporal_hint = gfx12_load_near_non_temporal_far_regular_temporal;
582 }
583
584 /* Shader registers - PS */
585 ac_pm4_set_reg_idx3(pm4, R_00B018_SPI_SHADER_PGM_RSRC3_PS,
586 ac_apply_cu_en(S_00B018_CU_EN(0xffff),
587 C_00B018_CU_EN, 0, info));
588 ac_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
589 S_00B0C0_SOFT_GROUPING_EN(1) |
590 S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
591 ac_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
592 ac_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
593 ac_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
594 ac_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
595
596 /* Shader registers - GS */
597 ac_pm4_set_reg(pm4, R_00B218_SPI_SHADER_PGM_HI_ES,
598 S_00B324_MEM_BASE(info->address32_hi >> 8));
599 ac_pm4_set_reg_idx3(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
600 ac_apply_cu_en(0xfffffdfd, 0, 0, info));
601 ac_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
602 ac_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
603 ac_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
604 ac_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
605
606 /* Shader registers - HS */
607 ac_pm4_set_reg(pm4, R_00B418_SPI_SHADER_PGM_HI_LS,
608 S_00B524_MEM_BASE(info->address32_hi >> 8));
609 ac_pm4_set_reg_idx3(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
610 ac_apply_cu_en(0xffffffff, 0, 0, info));
611 ac_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
612 ac_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
613 ac_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
614 ac_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
615
616 /* Context registers */
617 ac_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, S_02800C_FORCE_STENCIL_READ(1));
618 ac_pm4_set_reg(pm4, R_028040_DB_GL1_INTERFACE_CONTROL, 0);
619 ac_pm4_set_reg(pm4, R_028048_DB_MEM_TEMPORAL,
620 S_028048_Z_TEMPORAL_READ(zs_read_temporal_hint) |
621 S_028048_Z_TEMPORAL_WRITE(zs_write_temporal_hint) |
622 S_028048_STENCIL_TEMPORAL_READ(zs_read_temporal_hint) |
623 S_028048_STENCIL_TEMPORAL_WRITE(zs_write_temporal_hint) |
624 S_028048_OCCLUSION_TEMPORAL_WRITE(gfx12_store_regular_temporal));
625 ac_pm4_set_reg(pm4, R_028064_DB_VIEWPORT_CONTROL, 0);
626 ac_pm4_set_reg(pm4, R_028068_DB_SPI_VRS_CENTER_LOCATION, 0);
627 ac_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, state->border_color_va >> 8);
628 ac_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(state->border_color_va >> 40));
629 ac_pm4_set_reg(pm4, R_02808C_DB_STENCIL_OPVAL, S_02808C_OPVAL(1) | S_02808C_OPVAL_BF(1));
630 ac_pm4_set_reg(pm4, R_0280F8_SC_MEM_TEMPORAL,
631 S_0280F8_VRS_TEMPORAL_READ(gfx12_load_regular_temporal) |
632 S_0280F8_VRS_TEMPORAL_WRITE(gfx12_store_regular_temporal) |
633 S_0280F8_HIZ_TEMPORAL_READ(gfx12_load_regular_temporal) |
634 S_0280F8_HIZ_TEMPORAL_WRITE(gfx12_store_regular_temporal) |
635 S_0280F8_HIS_TEMPORAL_READ(gfx12_load_regular_temporal) |
636 S_0280F8_HIS_TEMPORAL_WRITE(gfx12_store_regular_temporal));
637 ac_pm4_set_reg(pm4, R_0280FC_SC_MEM_SPEC_READ,
638 S_0280FC_VRS_SPECULATIVE_READ(gfx12_spec_read_force_on) |
639 S_0280FC_HIZ_SPECULATIVE_READ(gfx12_spec_read_force_on) |
640 S_0280FC_HIS_SPECULATIVE_READ(gfx12_spec_read_force_on));
641
642 /* We don't need to initialize PA_SC_VPORT_* because we don't enable
643 * IMPLICIT_VPORT_SCISSOR_ENABLE, but it might be useful for Vulkan.
644 *
645 * If you set IMPLICIT_VPORT_SCISSOR_ENABLE, PA_SC_VPORT_* will take effect and allows
646 * setting a scissor that covers the whole viewport. If you set VPORT_SCISSOR_ENABLE,
647 * PA_SC_VPORT_SCISSOR_* will take effect and allows setting a user scissor. If you set
648 * both enable bits, the hw will use the intersection of both. It allows separating implicit
649 * viewport scissors from user scissors.
650 */
651 ac_pm4_set_reg(pm4, R_028180_PA_SC_SCREEN_SCISSOR_TL, 0);
652 ac_pm4_set_reg(pm4, R_028184_PA_SC_SCREEN_SCISSOR_BR,
653 S_028184_BR_X(65535) | S_028184_BR_Y(65535)); /* inclusive bounds */
654 ac_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, 0);
655 ac_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, 0);
656 ac_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
657 S_028244_BR_X(65535) | S_028244_BR_Y(65535)); /* inclusive bounds */
658 ac_pm4_set_reg(pm4, R_028358_PA_SC_SCREEN_EXTENT_CONTROL, 0);
659 ac_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
660 info->pa_sc_tile_steering_override);
661 ac_pm4_set_reg(pm4, R_0283E0_PA_SC_VRS_INFO, 0);
662
663 ac_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
664 S_028410_COLOR_WR_POLICY_GFX11(color_write_policy) |
665 S_028410_COLOR_RD_POLICY(color_read_policy));
666 ac_pm4_set_reg(pm4, R_0286E4_SPI_BARYC_SSAA_CNTL, S_0286E4_COVERED_CENTROID_IS_CENTER(1));
667 ac_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
668 ac_pm4_set_reg(pm4, R_0287D4_PA_CL_POINT_X_RAD, 0);
669 ac_pm4_set_reg(pm4, R_0287D8_PA_CL_POINT_Y_RAD, 0);
670 ac_pm4_set_reg(pm4, R_0287DC_PA_CL_POINT_SIZE, 0);
671 ac_pm4_set_reg(pm4, R_0287E0_PA_CL_POINT_CULL_RAD, 0);
672 ac_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
673 ac_pm4_set_reg(pm4, R_028824_PA_SU_LINE_STIPPLE_CNTL, 0);
674 ac_pm4_set_reg(pm4, R_028828_PA_SU_LINE_STIPPLE_SCALE, 0);
675 /* If any sample location uses the -8 coordinate, the EXCLUSION fields should be set to 0. */
676 ac_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL,
677 S_02882C_XMAX_RIGHT_EXCLUSION(1) |
678 S_02882C_YMAX_BOTTOM_EXCLUSION(1));
679 ac_pm4_set_reg(pm4, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
680 S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
681 S_028830_SC_1XMSAA_COMPATIBLE_DISABLE(1) /* use sample locations even for MSAA 1x */);
682 ac_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
683 ac_pm4_set_reg(pm4, R_028840_PA_STEREO_CNTL, S_028840_STEREO_MODE(1));
684
685 ac_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
686 ac_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
687 ac_pm4_set_reg(pm4, R_028A50_GE_SE_ENHANCE, 0);
688 ac_pm4_set_reg(pm4, R_028A70_GE_IA_ENHANCE, 0);
689 ac_pm4_set_reg(pm4, R_028A80_GE_WD_ENHANCE, 0);
690 ac_pm4_set_reg(pm4, R_028A9C_VGT_REUSE_OFF, 0);
691 ac_pm4_set_reg(pm4, R_028AA0_VGT_DRAW_PAYLOAD_CNTL, 0);
692 ac_pm4_set_reg(pm4, R_028ABC_DB_HTILE_SURFACE, 0);
693
694 ac_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
695 ac_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
696 S_028B50_ACCUM_ISOLINE(128) |
697 S_028B50_ACCUM_TRI(128) |
698 S_028B50_ACCUM_QUAD(128) |
699 S_028B50_DONUT_SPLIT_GFX9(24) |
700 S_028B50_TRAP_SPLIT(6));
701 ac_pm4_set_reg(pm4, R_028BC0_PA_SC_HISZ_RENDER_OVERRIDE, 0);
702
703 ac_pm4_set_reg(pm4, R_028C40_PA_SC_BINNER_OUTPUT_TIMEOUT_COUNTER, 0x800);
704 ac_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
705 S_028C48_MAX_ALLOC_COUNT(254) |
706 S_028C48_MAX_PRIM_PER_BATCH(511));
707 ac_pm4_set_reg(pm4, R_028C4C_PA_SC_BINNER_CNTL_2, S_028C4C_ENABLE_PING_PONG_BIN_ORDER(1));
708 ac_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(64));
709 ac_pm4_set_reg(pm4, R_028C58_PA_SC_SHADER_CONTROL,
710 S_028C58_REALIGN_DQUADS_AFTER_N_WAVES(1));
711
712 for (unsigned i = 0; i < 8; i++) {
713 ac_pm4_set_reg(pm4, R_028F00_CB_MEM0_INFO + i * 4,
714 S_028F00_TEMPORAL_READ(color_read_temporal_hint) |
715 S_028F00_TEMPORAL_WRITE(color_write_temporal_hint));
716 }
717
718 /* Uconfig registers. */
719 ac_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
720 ac_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
721 /* This is changed by draws for indexed draws, but we need to set DISABLE_FOR_AUTO_INDEX
722 * here, which disables primitive restart for all non-indexed draws, so that those draws
723 * won't have to set this state.
724 */
725 ac_pm4_set_reg(pm4, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, S_03092C_DISABLE_FOR_AUTO_INDEX(1));
726 ac_pm4_set_reg(pm4, R_030950_GE_GS_THROTTLE,
727 S_030950_T0(0x1) |
728 S_030950_T1(0x4) |
729 S_030950_T2(0x3) |
730 S_030950_STALL_CYCLES(0x40) |
731 S_030950_FACTOR1(0x2) |
732 S_030950_FACTOR2(0x3) |
733 S_030950_ENABLE_THROTTLE(0) |
734 S_030950_NUM_INIT_GRPS(0xff));
735 ac_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
736 ac_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
737 ac_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
738 ac_pm4_set_reg(pm4, R_030980_GE_USER_VGPR_EN, 0);
739 ac_pm4_set_reg(pm4, R_0309B4_VGT_PRIMITIVEID_RESET, 0);
740 ac_pm4_set_reg(pm4, R_03098C_GE_VRS_RATE, 0);
741 ac_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0);
742 ac_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0);
743
744 ac_pm4_set_reg(pm4, R_031128_SPI_GRP_LAUNCH_GUARANTEE_ENABLE, 0x8A4D);
745 ac_pm4_set_reg(pm4, R_03112C_SPI_GRP_LAUNCH_GUARANTEE_CTRL, 0x1123);
746
747 uint64_t rb_mask = BITFIELD64_MASK(info->max_render_backends);
748
749 ac_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 2, 0));
750 ac_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_PIXEL_PIPE_STAT_CONTROL) | EVENT_INDEX(1));
751 ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_COUNTER_ID(0) |
752 PIXEL_PIPE_STATE_CNTL_STRIDE(2) |
753 PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_LO(rb_mask));
754 ac_pm4_cmd_add(pm4, PIXEL_PIPE_STATE_CNTL_INSTANCE_EN_HI(rb_mask));
755 }
756
757 void
ac_init_graphics_preamble_state(const struct ac_preamble_state * state,struct ac_pm4_state * pm4)758 ac_init_graphics_preamble_state(const struct ac_preamble_state *state,
759 struct ac_pm4_state *pm4)
760 {
761 const struct radeon_info *info = pm4->info;
762
763 if (info->gfx_level >= GFX12) {
764 gfx12_init_graphics_preamble_state(state, pm4);
765 } else if (info->gfx_level >= GFX10) {
766 gfx10_init_graphics_preamble_state(state, pm4);
767 } else {
768 gfx6_init_graphics_preamble_state(state, pm4);
769 }
770 }
771