xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/nir/radv_nir_lower_abi.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "ac_nir.h"
8 #include "nir.h"
9 #include "nir_builder.h"
10 #include "radv_constants.h"
11 #include "radv_nir.h"
12 #include "radv_pipeline_graphics.h"
13 #include "radv_shader.h"
14 #include "radv_shader_args.h"
15 #include "sid.h"
16 
17 #define GET_SGPR_FIELD_NIR(arg, field)                                                                                 \
18    ac_nir_unpack_arg(b, &s->args->ac, arg, field##__SHIFT, util_bitcount(field##__MASK))
19 
20 typedef struct {
21    enum amd_gfx_level gfx_level;
22    const struct radv_shader_args *args;
23    const struct radv_shader_info *info;
24    const struct radv_graphics_state_key *gfx_state;
25    uint32_t address32_hi;
26    nir_def *gsvs_ring[4];
27 } lower_abi_state;
28 
29 static nir_def *
load_ring(nir_builder * b,unsigned ring,lower_abi_state * s)30 load_ring(nir_builder *b, unsigned ring, lower_abi_state *s)
31 {
32    struct ac_arg arg =
33       b->shader->info.stage == MESA_SHADER_TASK ? s->args->task_ring_offsets : s->args->ac.ring_offsets;
34 
35    nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, arg);
36    ring_offsets = nir_pack_64_2x32_split(b, nir_channel(b, ring_offsets, 0), nir_channel(b, ring_offsets, 1));
37    return nir_load_smem_amd(b, 4, ring_offsets, nir_imm_int(b, ring * 16u), .align_mul = 4u);
38 }
39 
40 static nir_def *
nggc_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)41 nggc_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
42 {
43    nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
44    return nir_test_mask(b, settings, mask);
45 }
46 
47 static nir_def *
shader_query_bool_setting(nir_builder * b,unsigned mask,lower_abi_state * s)48 shader_query_bool_setting(nir_builder *b, unsigned mask, lower_abi_state *s)
49 {
50    nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->shader_query_state);
51    return nir_test_mask(b, settings, mask);
52 }
53 
54 static bool
lower_abi_instr(nir_builder * b,nir_intrinsic_instr * intrin,void * state)55 lower_abi_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
56 {
57    lower_abi_state *s = (lower_abi_state *)state;
58    gl_shader_stage stage = b->shader->info.stage;
59 
60    b->cursor = nir_before_instr(&intrin->instr);
61 
62    nir_def *replacement = NULL;
63    bool progress = true;
64 
65    switch (intrin->intrinsic) {
66    case nir_intrinsic_load_ring_tess_factors_amd:
67       replacement = load_ring(b, RING_HS_TESS_FACTOR, s);
68       break;
69    case nir_intrinsic_load_ring_tess_factors_offset_amd:
70       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_factor_offset);
71       break;
72    case nir_intrinsic_load_ring_tess_offchip_amd:
73       replacement = load_ring(b, RING_HS_TESS_OFFCHIP, s);
74       break;
75    case nir_intrinsic_load_ring_tess_offchip_offset_amd:
76       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tess_offchip_offset);
77       break;
78    case nir_intrinsic_load_tcs_num_patches_amd:
79       if (s->info->num_tess_patches) {
80          replacement = nir_imm_int(b, s->info->num_tess_patches);
81       } else {
82          nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
83          replacement = nir_iadd_imm_nuw(b, n, 1);
84       }
85       break;
86    case nir_intrinsic_load_tcs_tess_levels_to_tes_amd:
87       if (s->info->outputs_linked) {
88          replacement = nir_imm_bool(b, s->info->tcs.tes_reads_tess_factors);
89       } else {
90          replacement =
91             nir_ine_imm(b, GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_TES_READS_TF), 0);
92       }
93       break;
94    case nir_intrinsic_load_tcs_primitive_mode_amd:
95       if (s->info->outputs_linked) {
96          replacement = nir_imm_int(b, s->info->tes._primitive_mode);
97       } else {
98          replacement = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PRIMITIVE_MODE);
99       }
100       break;
101    case nir_intrinsic_load_ring_esgs_amd:
102       replacement = load_ring(b, stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS, s);
103       break;
104    case nir_intrinsic_load_ring_gsvs_amd:
105       if (stage == MESA_SHADER_VERTEX)
106          replacement = load_ring(b, RING_GSVS_VS, s);
107       else
108          replacement = s->gsvs_ring[nir_intrinsic_stream_id(intrin)];
109       break;
110    case nir_intrinsic_load_ring_gs2vs_offset_amd:
111       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs2vs_offset);
112       break;
113    case nir_intrinsic_load_ring_es2gs_offset_amd:
114       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.es2gs_offset);
115       break;
116 
117    case nir_intrinsic_load_ring_attr_amd:
118       replacement = load_ring(b, RING_PS_ATTR, s);
119 
120       /* Note, the HW always assumes there is at least 1 per-vertex param. */
121       const unsigned total_num_params = MAX2(1, s->info->outinfo.param_exports) + s->info->outinfo.prim_param_exports;
122 
123       nir_def *dword1 = nir_channel(b, replacement, 1);
124       dword1 = nir_ior_imm(b, dword1, S_008F04_STRIDE(16 * total_num_params));
125       replacement = nir_vector_insert_imm(b, replacement, dword1, 1);
126       break;
127 
128    case nir_intrinsic_load_ring_attr_offset_amd: {
129       nir_def *ring_attr_offset = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_attr_offset);
130       replacement = nir_ishl_imm(b, nir_ubfe_imm(b, ring_attr_offset, 0, 15), 9); /* 512b increments. */
131       break;
132    }
133 
134    case nir_intrinsic_load_tess_rel_patch_id_amd:
135       if (stage == MESA_SHADER_TESS_CTRL) {
136          replacement = nir_extract_u8(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.tcs_rel_ids), nir_imm_int(b, 0));
137       } else if (stage == MESA_SHADER_TESS_EVAL) {
138          /* Setting an upper bound like this will actually make it possible
139           * to optimize some multiplications (in address calculations) so that
140           * constant additions can be added to the const offset in memory load instructions.
141           */
142          nir_def *arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.tes_rel_patch_id);
143 
144          if (s->info->tes.tcs_vertices_out) {
145             nir_intrinsic_instr *load_arg = nir_instr_as_intrinsic(arg->parent_instr);
146             nir_intrinsic_set_arg_upper_bound_u32_amd(load_arg, 2048 / MAX2(s->info->tes.tcs_vertices_out, 1));
147          }
148 
149          replacement = arg;
150       } else {
151          unreachable("invalid tessellation shader stage");
152       }
153       break;
154    case nir_intrinsic_load_patch_vertices_in:
155       if (stage == MESA_SHADER_TESS_CTRL) {
156          if (s->gfx_state->ts.patch_control_points) {
157             replacement = nir_imm_int(b, s->gfx_state->ts.patch_control_points);
158          } else {
159             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_PATCH_CONTROL_POINTS);
160             replacement = nir_iadd_imm_nuw(b, n, 1);
161          }
162       } else if (stage == MESA_SHADER_TESS_EVAL) {
163          if (s->info->tes.tcs_vertices_out) {
164             replacement = nir_imm_int(b, s->info->tes.tcs_vertices_out);
165          } else {
166             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_OUT_PATCH_CP);
167             replacement = nir_iadd_imm_nuw(b, n, 1);
168          }
169       } else
170          unreachable("invalid tessellation shader stage");
171       break;
172    case nir_intrinsic_load_gs_vertex_offset_amd:
173       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[nir_intrinsic_base(intrin)]);
174       break;
175    case nir_intrinsic_load_workgroup_num_input_vertices_amd:
176       replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 12, 9);
177       break;
178    case nir_intrinsic_load_workgroup_num_input_primitives_amd:
179       replacement = nir_ubfe_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 22, 9);
180       break;
181    case nir_intrinsic_load_packed_passthrough_primitive_amd:
182       /* NGG passthrough mode: the HW already packs the primitive export value to a single register.
183        */
184       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_vtx_offset[0]);
185       break;
186    case nir_intrinsic_load_pipeline_stat_query_enabled_amd:
187       replacement = shader_query_bool_setting(b, radv_shader_query_pipeline_stat, s);
188       break;
189    case nir_intrinsic_load_prim_gen_query_enabled_amd:
190       replacement = shader_query_bool_setting(b, radv_shader_query_prim_gen, s);
191       break;
192    case nir_intrinsic_load_prim_xfb_query_enabled_amd:
193       replacement = shader_query_bool_setting(b, radv_shader_query_prim_xfb, s);
194       break;
195    case nir_intrinsic_load_merged_wave_info_amd:
196       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.merged_wave_info);
197       break;
198    case nir_intrinsic_load_cull_any_enabled_amd: {
199       nir_def *gs_tg_info = ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info);
200 
201       /* Consider a workgroup small if it contains less than 16 triangles.
202        *
203        * The gs_tg_info[30:22] is the number of primitives, which we know is non-zero,
204        * so the below is equivalent to: "ult(ubfe(gs_tg_info, 22, 9), 16)", but
205        * ACO can optimize out the comparison to zero (see try_optimize_scc_nocompare).
206        */
207       nir_def *small_workgroup = nir_ieq_imm(b, nir_iand_imm(b, gs_tg_info, BITFIELD_RANGE(22 + 4, 9 - 4)), 0);
208 
209       nir_def *mask =
210          nir_bcsel(b, small_workgroup, nir_imm_int(b, radv_nggc_none),
211                    nir_imm_int(b, radv_nggc_front_face | radv_nggc_back_face | radv_nggc_small_primitives));
212       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
213       replacement = nir_ine_imm(b, nir_iand(b, settings, mask), 0);
214       break;
215    }
216    case nir_intrinsic_load_cull_front_face_enabled_amd:
217       replacement = nggc_bool_setting(b, radv_nggc_front_face, s);
218       break;
219    case nir_intrinsic_load_cull_back_face_enabled_amd:
220       replacement = nggc_bool_setting(b, radv_nggc_back_face, s);
221       break;
222    case nir_intrinsic_load_cull_ccw_amd:
223       replacement = nggc_bool_setting(b, radv_nggc_face_is_ccw, s);
224       break;
225    case nir_intrinsic_load_cull_small_primitives_enabled_amd:
226       replacement = nggc_bool_setting(b, radv_nggc_small_primitives, s);
227       break;
228    case nir_intrinsic_load_cull_small_prim_precision_amd: {
229       /* To save space, only the exponent is stored in the high 8 bits.
230        * We calculate the precision from those 8 bits:
231        * exponent = nggc_settings >> 24
232        * precision = 1.0 * 2 ^ exponent
233        */
234       nir_def *settings = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_culling_settings);
235       nir_def *exponent = nir_ishr_imm(b, settings, 24u);
236       replacement = nir_ldexp(b, nir_imm_float(b, 1.0f), exponent);
237       break;
238    }
239 
240    case nir_intrinsic_load_viewport_xy_scale_and_offset: {
241       nir_def *comps[] = {
242          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[0]),
243          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_scale[1]),
244          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[0]),
245          ac_nir_load_arg(b, &s->args->ac, s->args->ngg_viewport_translate[1]),
246       };
247       replacement = nir_vec(b, comps, 4);
248       break;
249    }
250 
251    case nir_intrinsic_load_ring_task_draw_amd:
252       replacement = load_ring(b, RING_TS_DRAW, s);
253       break;
254    case nir_intrinsic_load_ring_task_payload_amd:
255       replacement = load_ring(b, RING_TS_PAYLOAD, s);
256       break;
257    case nir_intrinsic_load_ring_mesh_scratch_amd:
258       replacement = load_ring(b, RING_MS_SCRATCH, s);
259       break;
260    case nir_intrinsic_load_ring_mesh_scratch_offset_amd:
261       /* gs_tg_info[0:11] is ordered_wave_id. Multiply by the ring entry size. */
262       replacement = nir_imul_imm(b, nir_iand_imm(b, ac_nir_load_arg(b, &s->args->ac, s->args->ac.gs_tg_info), 0xfff),
263                                  RADV_MESH_SCRATCH_ENTRY_BYTES);
264       break;
265    case nir_intrinsic_load_task_ring_entry_amd:
266       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
267       break;
268    case nir_intrinsic_load_lshs_vertex_stride_amd: {
269       if (stage == MESA_SHADER_VERTEX) {
270          replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->vs.num_linked_outputs));
271       } else {
272          assert(stage == MESA_SHADER_TESS_CTRL);
273          if (s->info->inputs_linked) {
274             replacement = nir_imm_int(b, get_tcs_input_vertex_stride(s->info->tcs.num_linked_inputs));
275          } else {
276             nir_def *num_ls_out = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_LS_OUTPUTS);
277             nir_def *extra_dw = nir_bcsel(b, nir_ieq_imm(b, num_ls_out, 0), nir_imm_int(b, 0), nir_imm_int(b, 4));
278             replacement = nir_iadd_nuw(b, nir_ishl_imm(b, num_ls_out, 4), extra_dw);
279          }
280       }
281       break;
282    }
283    case nir_intrinsic_load_esgs_vertex_stride_amd: {
284       /* Emulate VGT_ESGS_RING_ITEMSIZE on GFX9+ to reduce context register writes. */
285       assert(s->gfx_level >= GFX9);
286       if (s->info->merged_shader_compiled_separately) {
287          replacement = ac_nir_load_arg(b, &s->args->ac, s->args->vgt_esgs_ring_itemsize);
288       } else {
289          const unsigned stride =
290             s->info->is_ngg ? s->info->ngg_info.vgt_esgs_ring_itemsize : s->info->gs_ring_info.esgs_itemsize;
291          replacement = nir_imm_int(b, stride);
292       }
293       break;
294    }
295    case nir_intrinsic_load_hs_out_patch_data_offset_amd: {
296       nir_def *num_tcs_outputs, *out_vertices_per_patch;
297 
298       if (stage == MESA_SHADER_TESS_CTRL) {
299          num_tcs_outputs = nir_imm_int(b, s->info->tcs.num_linked_outputs);
300          out_vertices_per_patch = nir_imm_int(b, s->info->tcs.tcs_vertices_out);
301       } else {
302          if (s->info->inputs_linked) {
303             out_vertices_per_patch = nir_imm_int(b, s->info->tes.tcs_vertices_out);
304             num_tcs_outputs = nir_imm_int(b, s->info->tes.num_linked_inputs);
305          } else {
306             nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_OUT_PATCH_CP);
307             out_vertices_per_patch = nir_iadd_imm_nuw(b, n, 1);
308             num_tcs_outputs = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_HS_OUTPUTS);
309          }
310       }
311 
312       nir_def *per_vertex_output_patch_size =
313          nir_imul(b, out_vertices_per_patch, nir_imul_imm(b, num_tcs_outputs, 16u));
314 
315       if (s->info->num_tess_patches) {
316          unsigned num_patches = s->info->num_tess_patches;
317          replacement = nir_imul_imm(b, per_vertex_output_patch_size, num_patches);
318       } else {
319          nir_def *n = GET_SGPR_FIELD_NIR(s->args->tcs_offchip_layout, TCS_OFFCHIP_LAYOUT_NUM_PATCHES);
320          nir_def *num_patches = nir_iadd_imm_nuw(b, n, 1);
321          replacement = nir_imul(b, per_vertex_output_patch_size, num_patches);
322       }
323       break;
324    }
325    case nir_intrinsic_load_sample_positions_amd: {
326       uint32_t sample_pos_offset = (RING_PS_SAMPLE_POSITIONS * 16) - 8;
327 
328       nir_def *ring_offsets = ac_nir_load_arg(b, &s->args->ac, s->args->ac.ring_offsets);
329       nir_def *addr = nir_pack_64_2x32(b, ring_offsets);
330       nir_def *sample_id = nir_umin(b, intrin->src[0].ssa, nir_imm_int(b, 7));
331       nir_def *offset = nir_ishl_imm(b, sample_id, 3); /* 2 floats containing samplepos.xy */
332 
333       nir_const_value *const_num_samples = nir_src_as_const_value(intrin->src[1]);
334       if (const_num_samples) {
335          sample_pos_offset += (const_num_samples->u32 << 3);
336       } else {
337          offset = nir_iadd(b, offset, nir_ishl_imm(b, intrin->src[1].ssa, 3));
338       }
339 
340       replacement =
341          nir_load_global_amd(b, 2, 32, addr, offset, .base = sample_pos_offset, .access = ACCESS_NON_WRITEABLE);
342       break;
343    }
344    case nir_intrinsic_load_rasterization_samples_amd:
345       if (s->gfx_state->dynamic_rasterization_samples) {
346          replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_NUM_SAMPLES);
347       } else {
348          replacement = nir_imm_int(b, s->gfx_state->ms.rasterization_samples);
349       }
350       break;
351    case nir_intrinsic_load_provoking_vtx_in_prim_amd: {
352       if (s->gfx_state->dynamic_provoking_vtx_mode) {
353          replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ngg_provoking_vtx);
354       } else {
355          unsigned provoking_vertex = 0;
356          if (s->gfx_state->rs.provoking_vtx_last) {
357             if (stage == MESA_SHADER_VERTEX) {
358                provoking_vertex = radv_get_num_vertices_per_prim(s->gfx_state) - 1;
359             } else if (stage == MESA_SHADER_GEOMETRY) {
360                provoking_vertex = b->shader->info.gs.vertices_in - 1;
361             } else {
362                /* TES won't use this intrinsic, because it can get primitive id directly
363                 * instead of using this intrinsic to pass primitive id by LDS.
364                 */
365                unreachable("load_provoking_vtx_in_prim_amd is only supported in VS and GS");
366             }
367          }
368 
369          replacement = nir_imm_int(b, provoking_vertex);
370       }
371       break;
372    }
373    case nir_intrinsic_atomic_add_gs_emit_prim_count_amd:
374       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, RADV_SHADER_QUERY_GS_PRIM_EMIT_OFFSET),
375                              nir_imm_int(b, 0x100));
376       break;
377    case nir_intrinsic_atomic_add_gen_prim_count_amd: {
378       uint32_t offset = stage == MESA_SHADER_MESH ? RADV_SHADER_QUERY_MS_PRIM_GEN_OFFSET
379                                                   : RADV_SHADER_QUERY_PRIM_GEN_OFFSET(nir_intrinsic_stream_id(intrin));
380 
381       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
382       break;
383    }
384    case nir_intrinsic_atomic_add_xfb_prim_count_amd:
385       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa,
386                              nir_imm_int(b, RADV_SHADER_QUERY_PRIM_XFB_OFFSET(nir_intrinsic_stream_id(intrin))),
387                              nir_imm_int(b, 0x100));
388       break;
389    case nir_intrinsic_atomic_add_shader_invocation_count_amd: {
390       uint32_t offset;
391 
392       if (stage == MESA_SHADER_MESH) {
393          offset = RADV_SHADER_QUERY_MS_INVOCATION_OFFSET;
394       } else if (stage == MESA_SHADER_TASK) {
395          offset = RADV_SHADER_QUERY_TS_INVOCATION_OFFSET;
396       } else {
397          offset = RADV_SHADER_QUERY_GS_INVOCATION_OFFSET;
398       }
399 
400       nir_gds_atomic_add_amd(b, 32, intrin->src[0].ssa, nir_imm_int(b, offset), nir_imm_int(b, 0x100));
401       break;
402    }
403    case nir_intrinsic_load_streamout_config_amd:
404       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_config);
405       break;
406    case nir_intrinsic_load_streamout_write_index_amd:
407       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_write_index);
408       break;
409    case nir_intrinsic_load_streamout_buffer_amd: {
410       nir_def *ptr = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_buffers),
411                                             nir_imm_int(b, s->address32_hi));
412       replacement = nir_load_smem_amd(b, 4, ptr, nir_imm_int(b, nir_intrinsic_base(intrin) * 16));
413       break;
414    }
415    case nir_intrinsic_load_streamout_offset_amd:
416       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.streamout_offset[nir_intrinsic_base(intrin)]);
417       break;
418    case nir_intrinsic_load_xfb_state_address_gfx12_amd:
419       replacement = nir_pack_64_2x32_split(b, ac_nir_load_arg(b, &s->args->ac, s->args->streamout_state),
420                                            nir_imm_int(b, s->address32_hi));
421       break;
422    case nir_intrinsic_load_lds_ngg_gs_out_vertex_base_amd:
423       if (s->info->merged_shader_compiled_separately) {
424          replacement = GET_SGPR_FIELD_NIR(s->args->ngg_lds_layout, NGG_LDS_LAYOUT_GS_OUT_VERTEX_BASE);
425       } else {
426          replacement = nir_imm_int(b, s->info->ngg_info.esgs_ring_size);
427       }
428       break;
429    case nir_intrinsic_load_lds_ngg_scratch_base_amd:
430       if (s->info->merged_shader_compiled_separately) {
431          replacement = GET_SGPR_FIELD_NIR(s->args->ngg_lds_layout, NGG_LDS_LAYOUT_SCRATCH_BASE);
432       } else {
433          replacement = nir_imm_int(b, s->info->ngg_info.scratch_lds_base);
434       }
435       break;
436    case nir_intrinsic_load_num_vertices_per_primitive_amd: {
437       unsigned num_vertices;
438 
439       if (stage == MESA_SHADER_VERTEX) {
440          /* For dynamic primitive topology with streamout. */
441          if (s->info->vs.dynamic_num_verts_per_prim) {
442             replacement = ac_nir_load_arg(b, &s->args->ac, s->args->num_verts_per_prim);
443          } else {
444             replacement = nir_imm_int(b, radv_get_num_vertices_per_prim(s->gfx_state));
445          }
446       } else if (stage == MESA_SHADER_TESS_EVAL) {
447          if (s->info->tes.point_mode) {
448             num_vertices = 1;
449          } else if (s->info->tes._primitive_mode == TESS_PRIMITIVE_ISOLINES) {
450             num_vertices = 2;
451          } else {
452             num_vertices = 3;
453          }
454          replacement = nir_imm_int(b, num_vertices);
455       } else {
456          assert(stage == MESA_SHADER_GEOMETRY);
457          switch (s->info->gs.output_prim) {
458          case MESA_PRIM_POINTS:
459             num_vertices = 1;
460             break;
461          case MESA_PRIM_LINE_STRIP:
462             num_vertices = 2;
463             break;
464          case MESA_PRIM_TRIANGLE_STRIP:
465             num_vertices = 3;
466             break;
467          default:
468             unreachable("invalid GS output primitive");
469             break;
470          }
471          replacement = nir_imm_int(b, num_vertices);
472       }
473       break;
474    }
475    case nir_intrinsic_load_ordered_id_amd:
476       replacement = ac_nir_unpack_arg(b, &s->args->ac, s->args->ac.gs_tg_info, 0, 12);
477       break;
478    case nir_intrinsic_load_force_vrs_rates_amd:
479       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.force_vrs_rates);
480       break;
481    case nir_intrinsic_load_fully_covered: {
482       nir_def *sample_coverage = ac_nir_load_arg(b, &s->args->ac, s->args->ac.sample_coverage);
483       replacement = nir_ine_imm(b, sample_coverage, 0);
484       break;
485    }
486    case nir_intrinsic_load_barycentric_optimize_amd: {
487       nir_def *prim_mask = ac_nir_load_arg(b, &s->args->ac, s->args->ac.prim_mask);
488       /* enabled when bit 31 is set */
489       replacement = nir_ilt_imm(b, prim_mask, 0);
490       break;
491    }
492    case nir_intrinsic_load_poly_line_smooth_enabled: {
493       nir_def *line_rast_mode = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_LINE_RAST_MODE);
494       replacement = nir_ieq_imm(b, line_rast_mode, VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR);
495       break;
496    }
497    case nir_intrinsic_load_initial_edgeflags_amd:
498       replacement = nir_imm_int(b, 0);
499       break;
500    case nir_intrinsic_load_provoking_vtx_amd:
501       replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.load_provoking_vtx);
502       break;
503    case nir_intrinsic_load_rasterization_primitive_amd:
504       assert(s->gfx_state->unknown_rast_prim);
505       /* Load the primitive topology from an user SGPR when it's unknown at compile time (GPL). */
506       replacement = GET_SGPR_FIELD_NIR(s->args->ps_state, PS_STATE_RAST_PRIM);
507       break;
508    default:
509       progress = false;
510       break;
511    }
512 
513    if (!progress)
514       return false;
515 
516    if (replacement)
517       nir_def_rewrite_uses(&intrin->def, replacement);
518 
519    nir_instr_remove(&intrin->instr);
520    nir_instr_free(&intrin->instr);
521 
522    return true;
523 }
524 
525 static nir_def *
load_gsvs_ring(nir_builder * b,lower_abi_state * s,unsigned stream_id)526 load_gsvs_ring(nir_builder *b, lower_abi_state *s, unsigned stream_id)
527 {
528    nir_def *ring = load_ring(b, RING_GSVS_GS, s);
529    unsigned stream_offset = 0;
530    unsigned stride = 0;
531    for (unsigned i = 0; i <= stream_id; i++) {
532       stride = 4 * s->info->gs.num_stream_output_components[i] * s->info->gs.vertices_out;
533       if (i < stream_id)
534          stream_offset += stride * s->info->wave_size;
535    }
536 
537    /* Limit on the stride field for <= GFX7. */
538    assert(stride < (1 << 14));
539 
540    if (stream_offset) {
541       nir_def *addr = nir_pack_64_2x32_split(b, nir_channel(b, ring, 0), nir_channel(b, ring, 1));
542       addr = nir_iadd_imm(b, addr, stream_offset);
543       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_x(b, addr), 0);
544       ring = nir_vector_insert_imm(b, ring, nir_unpack_64_2x32_split_y(b, addr), 1);
545    }
546 
547    ring = nir_vector_insert_imm(b, ring, nir_ior_imm(b, nir_channel(b, ring, 1), S_008F04_STRIDE(stride)), 1);
548    return nir_vector_insert_imm(b, ring, nir_imm_int(b, s->info->wave_size), 2);
549 }
550 
551 void
radv_nir_lower_abi(nir_shader * shader,enum amd_gfx_level gfx_level,const struct radv_shader_stage * stage,const struct radv_graphics_state_key * gfx_state,uint32_t address32_hi)552 radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage,
553                    const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi)
554 {
555    lower_abi_state state = {
556       .gfx_level = gfx_level,
557       .info = &stage->info,
558       .args = &stage->args,
559       .gfx_state = gfx_state,
560       .address32_hi = address32_hi,
561    };
562 
563    if (shader->info.stage == MESA_SHADER_GEOMETRY && !stage->info.is_ngg) {
564       nir_function_impl *impl = nir_shader_get_entrypoint(shader);
565 
566       nir_builder b = nir_builder_at(nir_before_impl(impl));
567 
568       u_foreach_bit (i, shader->info.gs.active_stream_mask)
569          state.gsvs_ring[i] = load_gsvs_ring(&b, &state, i);
570    }
571 
572    nir_shader_intrinsics_pass(shader, lower_abi_instr, nir_metadata_control_flow, &state);
573 }
574