xref: /aosp_15_r20/external/mesa3d/src/amd/common/ac_nir.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 
8 #ifndef AC_NIR_H
9 #define AC_NIR_H
10 
11 #include "ac_hw_stage.h"
12 #include "ac_shader_args.h"
13 #include "ac_shader_util.h"
14 #include "nir.h"
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 enum
21 {
22    /* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
23    AC_EXP_PARAM_OFFSET_0 = 0,
24    AC_EXP_PARAM_OFFSET_31 = 31,
25    /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
26    AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
27    AC_EXP_PARAM_DEFAULT_VAL_0001,
28    AC_EXP_PARAM_DEFAULT_VAL_1110,
29    AC_EXP_PARAM_DEFAULT_VAL_1111,
30    AC_EXP_PARAM_UNDEFINED = 255, /* deprecated, use AC_EXP_PARAM_DEFAULT_VAL_0000 instead */
31 };
32 
33 enum {
34    AC_EXP_FLAG_COMPRESSED = (1 << 0),
35    AC_EXP_FLAG_DONE       = (1 << 1),
36    AC_EXP_FLAG_VALID_MASK = (1 << 2),
37 };
38 
39 /* Maps I/O semantics to the actual location used by the lowering pass. */
40 typedef unsigned (*ac_nir_map_io_driver_location)(unsigned semantic);
41 
42 /* Forward declaration of nir_builder so we don't have to include nir_builder.h here */
43 struct nir_builder;
44 typedef struct nir_builder nir_builder;
45 
46 /* Executed by ac_nir_cull when the current primitive is accepted. */
47 typedef void (*ac_nir_cull_accepted)(nir_builder *b, void *state);
48 
49 nir_def *
50 ac_nir_load_arg_at_offset(nir_builder *b, const struct ac_shader_args *ac_args,
51                           struct ac_arg arg, unsigned relative_index);
52 
53 static inline nir_def *
ac_nir_load_arg(nir_builder * b,const struct ac_shader_args * ac_args,struct ac_arg arg)54 ac_nir_load_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg)
55 {
56    return ac_nir_load_arg_at_offset(b, ac_args, arg, 0);
57 }
58 
59 void ac_nir_store_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
60                       nir_def *val);
61 
62 nir_def *
63 ac_nir_unpack_arg(nir_builder *b, const struct ac_shader_args *ac_args, struct ac_arg arg,
64                   unsigned rshift, unsigned bitwidth);
65 
66 bool ac_nir_lower_sin_cos(nir_shader *shader);
67 
68 bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const enum amd_gfx_level gfx_level,
69                                      const enum ac_hw_stage hw_stage,
70                                      const struct ac_shader_args *ac_args);
71 
72 bool ac_nir_optimize_outputs(nir_shader *nir, bool sprite_tex_disallowed,
73                              int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
74                              uint8_t param_export_index[NUM_TOTAL_VARYING_SLOTS]);
75 
76 void
77 ac_nir_lower_ls_outputs_to_mem(nir_shader *ls,
78                                ac_nir_map_io_driver_location map,
79                                bool tcs_in_out_eq,
80                                uint64_t tcs_inputs_read,
81                                uint64_t tcs_temp_only_inputs);
82 
83 void
84 ac_nir_lower_hs_inputs_to_mem(nir_shader *shader,
85                               ac_nir_map_io_driver_location map,
86                               bool tcs_in_out_eq,
87                               uint64_t tcs_temp_only_inputs);
88 
89 void
90 ac_nir_lower_hs_outputs_to_mem(nir_shader *shader,
91                                ac_nir_map_io_driver_location map,
92                                enum amd_gfx_level gfx_level,
93                                uint64_t tes_inputs_read,
94                                uint32_t tes_patch_inputs_read,
95                                unsigned wave_size,
96                                bool pass_tessfactors_by_reg);
97 
98 void
99 ac_nir_lower_tes_inputs_to_mem(nir_shader *shader,
100                                ac_nir_map_io_driver_location map);
101 
102 void
103 ac_nir_lower_es_outputs_to_mem(nir_shader *shader,
104                                ac_nir_map_io_driver_location map,
105                                enum amd_gfx_level gfx_level,
106                                unsigned esgs_itemsize,
107                                uint64_t gs_inputs_read);
108 
109 void
110 ac_nir_lower_gs_inputs_to_mem(nir_shader *shader,
111                               ac_nir_map_io_driver_location map,
112                               enum amd_gfx_level gfx_level,
113                               bool triangle_strip_adjacency_fix);
114 
115 bool
116 ac_nir_lower_indirect_derefs(nir_shader *shader,
117                              enum amd_gfx_level gfx_level);
118 
119 typedef struct {
120    enum radeon_family family;
121    enum amd_gfx_level gfx_level;
122 
123    unsigned max_workgroup_size;
124    unsigned wave_size;
125    uint8_t clip_cull_dist_mask;
126    const uint8_t *vs_output_param_offset; /* GFX11+ */
127    bool has_param_exports;
128    bool can_cull;
129    bool disable_streamout;
130    bool has_gen_prim_query;
131    bool has_xfb_prim_query;
132    bool use_gfx12_xfb_intrinsic;
133    bool has_gs_invocations_query;
134    bool has_gs_primitives_query;
135    bool kill_pointsize;
136    bool kill_layer;
137    bool force_vrs;
138 
139    /* VS */
140    unsigned num_vertices_per_primitive;
141    bool early_prim_export;
142    bool passthrough;
143    bool use_edgeflags;
144    bool export_primitive_id;
145    uint32_t instance_rate_inputs;
146    uint32_t user_clip_plane_enable_mask;
147 
148    /* GS */
149    unsigned gs_out_vtx_bytes;
150 } ac_nir_lower_ngg_options;
151 
152 void
153 ac_nir_lower_ngg_nogs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
154 
155 void
156 ac_nir_lower_ngg_gs(nir_shader *shader, const ac_nir_lower_ngg_options *options);
157 
158 void
159 ac_nir_lower_ngg_ms(nir_shader *shader,
160                     enum amd_gfx_level gfx_level,
161                     uint32_t clipdist_enable_mask,
162                     const uint8_t *vs_output_param_offset,
163                     bool has_param_exports,
164                     bool *out_needs_scratch_ring,
165                     unsigned wave_size,
166                     unsigned workgroup_size,
167                     bool multiview,
168                     bool has_query,
169                     bool fast_launch_2);
170 
171 void
172 ac_nir_lower_task_outputs_to_mem(nir_shader *shader,
173                                  unsigned task_payload_entry_bytes,
174                                  unsigned task_num_entries,
175                                  bool has_query);
176 
177 void
178 ac_nir_lower_mesh_inputs_to_mem(nir_shader *shader,
179                                 unsigned task_payload_entry_bytes,
180                                 unsigned task_num_entries);
181 
182 bool
183 ac_nir_lower_global_access(nir_shader *shader);
184 
185 bool ac_nir_lower_resinfo(nir_shader *nir, enum amd_gfx_level gfx_level);
186 bool ac_nir_lower_image_opcodes(nir_shader *nir);
187 
188 typedef struct ac_nir_gs_output_info {
189    const uint8_t *streams;
190    const uint8_t *streams_16bit_lo;
191    const uint8_t *streams_16bit_hi;
192 
193    const uint8_t *usage_mask;
194    const uint8_t *usage_mask_16bit_lo;
195    const uint8_t *usage_mask_16bit_hi;
196 
197    /* type for each 16bit slot component */
198    nir_alu_type (*types_16bit_lo)[4];
199    nir_alu_type (*types_16bit_hi)[4];
200 } ac_nir_gs_output_info;
201 
202 nir_shader *
203 ac_nir_create_gs_copy_shader(const nir_shader *gs_nir,
204                              enum amd_gfx_level gfx_level,
205                              uint32_t clip_cull_mask,
206                              const uint8_t *param_offsets,
207                              bool has_param_exports,
208                              bool disable_streamout,
209                              bool kill_pointsize,
210                              bool kill_layer,
211                              bool force_vrs,
212                              ac_nir_gs_output_info *output_info);
213 
214 void
215 ac_nir_lower_legacy_vs(nir_shader *nir,
216                        enum amd_gfx_level gfx_level,
217                        uint32_t clip_cull_mask,
218                        const uint8_t *param_offsets,
219                        bool has_param_exports,
220                        bool export_primitive_id,
221                        bool disable_streamout,
222                        bool kill_pointsize,
223                        bool kill_layer,
224                        bool force_vrs);
225 
226 bool
227 ac_nir_gs_shader_query(nir_builder *b,
228                        bool has_gen_prim_query,
229                        bool has_gs_invocations_query,
230                        bool has_gs_primitives_query,
231                        unsigned num_vertices_per_primitive,
232                        unsigned wave_size,
233                        nir_def *vertex_count[4],
234                        nir_def *primitive_count[4]);
235 
236 void
237 ac_nir_lower_legacy_gs(nir_shader *nir,
238                        bool has_gen_prim_query,
239                        bool has_pipeline_stats_query,
240                        ac_nir_gs_output_info *output_info);
241 
242 typedef struct {
243    /* Which load instructions to lower depending on whether the number of
244     * components being loaded is 1 or more than 1.
245     */
246    nir_variable_mode modes_1_comp;  /* lower 1-component loads for these */
247    nir_variable_mode modes_N_comps; /* lower multi-component loads for these */
248 } ac_nir_lower_subdword_options;
249 
250 bool ac_nir_lower_subdword_loads(nir_shader *nir, ac_nir_lower_subdword_options options);
251 
252 typedef struct {
253    enum radeon_family family;
254    enum amd_gfx_level gfx_level;
255 
256    bool use_aco;
257    bool uses_discard;
258    bool alpha_to_coverage_via_mrtz;
259    bool dual_src_blend_swizzle;
260    unsigned spi_shader_col_format;
261    unsigned color_is_int8;
262    unsigned color_is_int10;
263 
264    bool bc_optimize_for_persp;
265    bool bc_optimize_for_linear;
266    bool force_persp_sample_interp;
267    bool force_linear_sample_interp;
268    bool force_persp_center_interp;
269    bool force_linear_center_interp;
270    unsigned ps_iter_samples;
271 
272    /* OpenGL only */
273    bool clamp_color;
274    bool alpha_to_one;
275    bool kill_samplemask;
276    enum compare_func alpha_func;
277    unsigned broadcast_last_cbuf;
278 
279    /* Vulkan only */
280    unsigned enable_mrt_output_nan_fixup;
281    bool no_color_export;
282    bool no_depth_export;
283 } ac_nir_lower_ps_options;
284 
285 void
286 ac_nir_lower_ps(nir_shader *nir, const ac_nir_lower_ps_options *options);
287 
288 typedef struct {
289    enum amd_gfx_level gfx_level;
290 
291    /* If true, round the layer component of the coordinates source to the nearest
292     * integer for all array ops. This is always done for cube array ops.
293     */
294    bool lower_array_layer_round_even;
295 
296    /* Fix derivatives of constants and FS inputs in control flow.
297     *
298     * Ignores interpolateAtSample()/interpolateAtOffset(), dynamically indexed input loads,
299     * pervertexEXT input loads, textureGather() with implicit LOD and 16-bit derivatives and
300     * texture samples with nir_tex_src_min_lod.
301     *
302     * The layer must also be a constant or FS input.
303     */
304    bool fix_derivs_in_divergent_cf;
305    unsigned max_wqm_vgprs;
306 } ac_nir_lower_tex_options;
307 
308 bool
309 ac_nir_lower_tex(nir_shader *nir, const ac_nir_lower_tex_options *options);
310 
311 void
312 ac_nir_store_debug_log_amd(nir_builder *b, nir_def *uvec4);
313 
314 bool
315 ac_nir_opt_pack_half(nir_shader *shader, enum amd_gfx_level gfx_level);
316 
317 unsigned
318 ac_nir_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer);
319 
320 unsigned
321 ac_nir_varying_estimate_instr_cost(nir_instr *instr);
322 
323 bool
324 ac_nir_opt_shared_append(nir_shader *shader);
325 
326 #ifdef __cplusplus
327 }
328 #endif
329 
330 #endif /* AC_NIR_H */
331