1 /* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * SPDX-License-Identifier: MIT 9 */ 10 11 #ifndef RADV_SHADER_INFO_H 12 #define RADV_SHADER_INFO_H 13 14 #include <inttypes.h> 15 #include <stdbool.h> 16 17 #include "radv_constants.h" 18 #include "radv_shader_args.h" 19 20 struct radv_device; 21 struct nir_shader; 22 struct radv_shader_layout; 23 struct radv_shader_stage_key; 24 enum radv_pipeline_type; 25 struct radv_shader_stage; 26 27 enum radv_shader_type { 28 RADV_SHADER_TYPE_DEFAULT = 0, 29 RADV_SHADER_TYPE_GS_COPY, 30 RADV_SHADER_TYPE_TRAP_HANDLER, 31 RADV_SHADER_TYPE_RT_PROLOG, 32 }; 33 34 struct radv_vs_output_info { 35 uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 36 uint8_t clip_dist_mask; 37 uint8_t cull_dist_mask; 38 uint8_t param_exports; 39 uint8_t prim_param_exports; 40 bool writes_pointsize; 41 bool writes_layer; 42 bool writes_layer_per_primitive; 43 bool writes_viewport_index; 44 bool writes_viewport_index_per_primitive; 45 bool writes_primitive_shading_rate; 46 bool writes_primitive_shading_rate_per_primitive; 47 bool export_prim_id; 48 unsigned pos_exports; 49 }; 50 51 struct radv_streamout_info { 52 uint16_t num_outputs; 53 uint16_t strides[MAX_SO_BUFFERS]; 54 uint32_t enabled_stream_buffers_mask; 55 }; 56 57 struct radv_legacy_gs_info { 58 uint32_t gs_inst_prims_in_subgroup; 59 uint32_t es_verts_per_subgroup; 60 uint32_t gs_prims_per_subgroup; 61 uint32_t esgs_itemsize; 62 uint32_t lds_size; 63 uint32_t esgs_ring_size; 64 uint32_t gsvs_ring_size; 65 }; 66 67 struct gfx10_ngg_info { 68 uint16_t ngg_emit_size; /* in dwords */ 69 uint32_t hw_max_esverts; 70 uint32_t max_gsprims; 71 uint32_t max_out_verts; 72 uint32_t prim_amp_factor; 73 uint32_t vgt_esgs_ring_itemsize; 74 uint32_t esgs_ring_size; 75 uint32_t scratch_lds_base; 76 uint32_t lds_size; 77 bool max_vert_out_per_gs_instance; 78 }; 79 80 struct radv_shader_info { 81 uint64_t inline_push_constant_mask; 82 bool can_inline_all_push_constants; 83 bool loads_push_constants; 84 bool loads_dynamic_offsets; 85 uint32_t desc_set_used_mask; 86 bool uses_view_index; 87 bool uses_invocation_id; 88 bool uses_prim_id; 89 uint8_t wave_size; 90 uint8_t ballot_bit_size; 91 struct radv_userdata_locations user_sgprs_locs; 92 bool is_ngg; 93 bool is_ngg_passthrough; 94 bool has_ngg_culling; 95 bool has_ngg_early_prim_export; 96 bool has_prim_query; 97 bool has_xfb_query; 98 uint32_t num_tess_patches; 99 uint32_t esgs_itemsize; /* Only for VS or TES as ES */ 100 struct radv_vs_output_info outinfo; 101 unsigned workgroup_size; 102 bool force_vrs_per_vertex; 103 gl_shader_stage stage; 104 gl_shader_stage next_stage; 105 enum radv_shader_type type; 106 uint32_t user_data_0; 107 bool inputs_linked; 108 bool outputs_linked; 109 bool merged_shader_compiled_separately; /* GFX9+ */ 110 bool force_indirect_desc_sets; 111 uint64_t gs_inputs_read; /* Mask of GS inputs read (only used by linked ES) */ 112 113 struct { 114 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 115 bool needs_draw_id; 116 bool needs_instance_id; 117 bool as_es; 118 bool as_ls; 119 bool tcs_in_out_eq; 120 uint64_t tcs_temp_only_input_mask; 121 uint8_t num_linked_outputs; 122 bool needs_base_instance; 123 bool use_per_attribute_vb_descs; 124 uint32_t vb_desc_usage_mask; 125 uint32_t input_slot_usage_mask; 126 bool has_prolog; 127 bool dynamic_inputs; 128 bool dynamic_num_verts_per_prim; 129 uint32_t num_outputs; /* For NGG streamout only */ 130 uint64_t hs_inputs_read; /* Mask of HS inputs read (only used by linked LS) */ 131 } vs; 132 struct { 133 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 134 uint8_t num_stream_output_components[4]; 135 uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 136 uint8_t max_stream; 137 unsigned gsvs_vertex_size; 138 unsigned max_gsvs_emit_size; 139 unsigned vertices_in; 140 unsigned vertices_out; 141 unsigned input_prim; 142 unsigned output_prim; 143 unsigned invocations; 144 unsigned es_type; /* GFX9: VS or TES */ 145 uint8_t num_linked_inputs; 146 bool has_pipeline_stat_query; 147 } gs; 148 struct { 149 uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 150 bool as_es; 151 enum tess_primitive_mode _primitive_mode; 152 enum gl_tess_spacing spacing; 153 bool ccw; 154 bool point_mode; 155 bool reads_tess_factors; 156 unsigned tcs_vertices_out; 157 uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in VRAM. */ 158 uint8_t num_linked_patch_inputs; /* Number of reserved per-patch input slots in VRAM. */ 159 uint8_t num_linked_outputs; 160 uint32_t num_outputs; /* For NGG streamout only */ 161 } tes; 162 struct { 163 bool uses_sample_shading; 164 bool needs_sample_positions; 165 bool needs_poly_line_smooth; 166 bool writes_memory; 167 bool writes_z; 168 bool writes_stencil; 169 bool writes_sample_mask; 170 bool writes_mrt0_alpha; 171 bool exports_mrtz_via_epilog; 172 bool has_pcoord; 173 bool prim_id_input; 174 bool layer_input; 175 bool viewport_index_input; 176 uint8_t input_clips_culls_mask; 177 uint32_t input_mask; 178 uint32_t input_per_primitive_mask; 179 uint32_t float32_shaded_mask; 180 uint32_t explicit_shaded_mask; 181 uint32_t explicit_strict_shaded_mask; 182 uint32_t float16_shaded_mask; 183 uint32_t float16_hi_shaded_mask; 184 uint32_t num_interp; 185 uint32_t num_prim_interp; 186 bool can_discard; 187 bool early_fragment_test; 188 bool post_depth_coverage; 189 bool reads_sample_mask_in; 190 bool reads_front_face; 191 bool reads_sample_id; 192 bool reads_frag_shading_rate; 193 bool reads_barycentric_model; 194 bool reads_persp_sample; 195 bool reads_persp_center; 196 bool reads_persp_centroid; 197 bool reads_linear_sample; 198 bool reads_linear_center; 199 bool reads_linear_centroid; 200 bool reads_fully_covered; 201 uint8_t reads_frag_coord_mask; 202 uint8_t reads_sample_pos_mask; 203 uint8_t depth_layout; 204 bool allow_flat_shading; 205 bool pops; /* Uses Primitive Ordered Pixel Shading (fragment shader interlock) */ 206 bool pops_is_per_sample; 207 bool mrt0_is_dual_src; 208 unsigned spi_ps_input_ena; 209 unsigned spi_ps_input_addr; 210 unsigned colors_written; 211 unsigned spi_shader_col_format; 212 unsigned cb_shader_mask; 213 uint8_t color0_written; 214 bool load_provoking_vtx; 215 bool load_rasterization_prim; 216 bool force_sample_iter_shading_rate; 217 bool uses_fbfetch_output; 218 bool has_epilog; 219 } ps; 220 struct { 221 bool uses_grid_size; 222 bool uses_block_id[3]; 223 bool uses_thread_id[3]; 224 bool uses_local_invocation_idx; 225 unsigned block_size[3]; 226 227 bool uses_rt; 228 bool uses_full_subgroups; 229 bool linear_taskmesh_dispatch; 230 bool has_query; /* Task shader only */ 231 232 bool regalloc_hang_bug; 233 } cs; 234 struct { 235 uint64_t tes_inputs_read; 236 uint64_t tes_patch_inputs_read; 237 unsigned tcs_vertices_out; 238 uint32_t num_lds_blocks; 239 uint8_t num_linked_inputs; /* Number of reserved per-vertex input slots in LDS. */ 240 uint8_t num_linked_outputs; /* Number of reserved per-vertex output slots in VRAM. */ 241 uint8_t num_linked_patch_outputs; /* Number of reserved per-patch output slots in VRAM. */ 242 uint8_t num_lds_per_vertex_outputs; /* Number of reserved per-vertex output slots in LDS. */ 243 uint8_t num_lds_per_patch_outputs; /* Number of reserved per-patch output slots in LDS. */ 244 bool tes_reads_tess_factors : 1; 245 } tcs; 246 struct { 247 enum mesa_prim output_prim; 248 bool needs_ms_scratch_ring; 249 bool has_task; /* If mesh shader is used together with a task shader. */ 250 bool has_query; 251 } ms; 252 253 struct radv_streamout_info so; 254 255 struct radv_legacy_gs_info gs_ring_info; 256 struct gfx10_ngg_info ngg_info; 257 258 /* Precomputed register values. */ 259 struct { 260 uint32_t pgm_lo; 261 uint32_t pgm_rsrc1; 262 uint32_t pgm_rsrc2; 263 uint32_t pgm_rsrc3; 264 265 struct { 266 uint32_t spi_shader_late_alloc_vs; 267 uint32_t spi_shader_pgm_rsrc3_vs; 268 uint32_t vgt_reuse_off; 269 } vs; 270 271 struct { 272 uint32_t vgt_esgs_ring_itemsize; 273 uint32_t vgt_gs_instance_cnt; 274 uint32_t vgt_gs_max_prims_per_subgroup; 275 uint32_t vgt_gs_vert_itemsize[4]; 276 uint32_t vgt_gsvs_ring_itemsize; 277 uint32_t vgt_gsvs_ring_offset[3]; 278 } gs; 279 280 struct { 281 uint32_t ge_cntl; /* Not fully precomputed. */ 282 uint32_t ge_max_output_per_subgroup; 283 uint32_t ge_ngg_subgrp_cntl; 284 uint32_t spi_shader_idx_format; 285 uint32_t vgt_primitiveid_en; 286 } ngg; 287 288 struct { 289 uint32_t spi_shader_gs_meshlet_dim; 290 uint32_t spi_shader_gs_meshlet_exp_alloc; 291 } ms; 292 293 struct { 294 uint32_t db_shader_control; 295 uint32_t pa_sc_shader_control; 296 uint32_t spi_ps_in_control; 297 uint32_t spi_shader_z_format; 298 uint32_t spi_gs_out_config_ps; 299 uint32_t pa_sc_hisz_control; 300 } ps; 301 302 struct { 303 uint32_t compute_num_thread_x; 304 uint32_t compute_num_thread_y; 305 uint32_t compute_num_thread_z; 306 uint32_t compute_resource_limits; 307 } cs; 308 309 /* Common registers between stages. */ 310 uint32_t vgt_gs_max_vert_out; 311 uint32_t vgt_gs_onchip_cntl; 312 uint32_t spi_shader_pgm_rsrc3_gs; 313 uint32_t spi_shader_pgm_rsrc4_gs; 314 uint32_t ge_pc_alloc; 315 uint32_t pa_cl_vs_out_cntl; 316 uint32_t spi_vs_out_config; 317 uint32_t spi_shader_pos_format; 318 uint32_t vgt_gs_instance_cnt; 319 } regs; 320 }; 321 322 void radv_nir_shader_info_init(gl_shader_stage stage, gl_shader_stage next_stage, struct radv_shader_info *info); 323 324 void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, 325 const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key, 326 const struct radv_graphics_state_key *gfx_state, 327 const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, 328 struct radv_shader_info *info); 329 330 void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, 331 struct radv_shader_info *gs_info, struct gfx10_ngg_info *out); 332 333 void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, 334 struct radv_shader_stage *stages); 335 336 enum ac_hw_stage radv_select_hw_stage(const struct radv_shader_info *const info, const enum amd_gfx_level gfx_level); 337 338 uint64_t radv_gather_unlinked_io_mask(const uint64_t nir_mask); 339 340 uint64_t radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t nir_patch_io_mask); 341 342 #endif /* RADV_SHADER_INFO_H */ 343