xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_pipeline.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "radv_pipeline.h"
12 #include "meta/radv_meta.h"
13 #include "nir/nir.h"
14 #include "nir/nir_builder.h"
15 #include "nir/nir_serialize.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/os_time.h"
20 #include "util/u_atomic.h"
21 #include "radv_cs.h"
22 #include "radv_debug.h"
23 #include "radv_pipeline_rt.h"
24 #include "radv_rmv.h"
25 #include "radv_shader.h"
26 #include "radv_shader_args.h"
27 #include "vk_pipeline.h"
28 #include "vk_render_pass.h"
29 #include "vk_util.h"
30 
31 #include "util/u_debug.h"
32 #include "ac_binary.h"
33 #include "ac_nir.h"
34 #include "ac_shader_util.h"
35 #include "aco_interface.h"
36 #include "sid.h"
37 #include "vk_format.h"
38 #include "vk_nir_convert_ycbcr.h"
39 #include "vk_ycbcr_conversion.h"
40 #if AMD_LLVM_AVAILABLE
41 #include "ac_llvm_util.h"
42 #endif
43 
44 bool
radv_shader_need_indirect_descriptor_sets(const struct radv_shader * shader)45 radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader)
46 {
47    const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_INDIRECT_DESCRIPTOR_SETS);
48    return loc->sgpr_idx != -1;
49 }
50 
51 bool
radv_pipeline_capture_shaders(const struct radv_device * device,VkPipelineCreateFlags2KHR flags)52 radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2KHR flags)
53 {
54    const struct radv_physical_device *pdev = radv_device_physical(device);
55    const struct radv_instance *instance = radv_physical_device_instance(pdev);
56 
57    return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
58           (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info;
59 }
60 
61 bool
radv_pipeline_capture_shader_stats(const struct radv_device * device,VkPipelineCreateFlags2KHR flags)62 radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2KHR flags)
63 {
64    const struct radv_physical_device *pdev = radv_device_physical(device);
65    const struct radv_instance *instance = radv_physical_device_instance(pdev);
66 
67    return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) ||
68           (instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
69 }
70 
71 void
radv_pipeline_init(struct radv_device * device,struct radv_pipeline * pipeline,enum radv_pipeline_type type)72 radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type)
73 {
74    vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
75 
76    pipeline->type = type;
77 }
78 
79 void
radv_pipeline_destroy(struct radv_device * device,struct radv_pipeline * pipeline,const VkAllocationCallbacks * allocator)80 radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
81                       const VkAllocationCallbacks *allocator)
82 {
83    if (pipeline->cache_object)
84       vk_pipeline_cache_object_unref(&device->vk, pipeline->cache_object);
85 
86    switch (pipeline->type) {
87    case RADV_PIPELINE_GRAPHICS:
88       radv_destroy_graphics_pipeline(device, radv_pipeline_to_graphics(pipeline));
89       break;
90    case RADV_PIPELINE_GRAPHICS_LIB:
91       radv_destroy_graphics_lib_pipeline(device, radv_pipeline_to_graphics_lib(pipeline));
92       break;
93    case RADV_PIPELINE_COMPUTE:
94       radv_destroy_compute_pipeline(device, radv_pipeline_to_compute(pipeline));
95       break;
96    case RADV_PIPELINE_RAY_TRACING:
97       radv_destroy_ray_tracing_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
98       break;
99    default:
100       unreachable("invalid pipeline type");
101    }
102 
103    radv_rmv_log_resource_destroy(device, (uint64_t)radv_pipeline_to_handle(pipeline));
104    vk_object_base_finish(&pipeline->base);
105    vk_free2(&device->vk.alloc, allocator, pipeline);
106 }
107 
108 VKAPI_ATTR void VKAPI_CALL
radv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)109 radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
110 {
111    VK_FROM_HANDLE(radv_device, device, _device);
112    VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
113 
114    if (!_pipeline)
115       return;
116 
117    radv_pipeline_destroy(device, pipeline, pAllocator);
118 }
119 
120 struct radv_shader_stage_key
radv_pipeline_get_shader_key(const struct radv_device * device,const VkPipelineShaderStageCreateInfo * stage,VkPipelineCreateFlags2KHR flags,const void * pNext)121 radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage,
122                              VkPipelineCreateFlags2KHR flags, const void *pNext)
123 {
124    const struct radv_physical_device *pdev = radv_device_physical(device);
125    const struct radv_instance *instance = radv_physical_device_instance(pdev);
126    gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
127    struct vk_pipeline_robustness_state rs;
128    struct radv_shader_stage_key key = {0};
129 
130    key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags);
131 
132    if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT_KHR)
133       key.optimisations_disabled = 1;
134 
135    if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR)
136       key.view_index_from_device_index = 1;
137 
138    if (flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV)
139       key.indirect_bindable = 1;
140 
141    if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
142       key.version = instance->drirc.override_graphics_shader_version;
143    } else if (stage->stage & RADV_RT_STAGE_BITS) {
144       key.version = instance->drirc.override_ray_tracing_shader_version;
145    } else {
146       assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT);
147       key.version = instance->drirc.override_compute_shader_version;
148    }
149 
150    vk_pipeline_robustness_state_fill(&device->vk, &rs, pNext, stage->pNext);
151 
152    radv_set_stage_key_robustness(&rs, s, &key);
153 
154    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *const subgroup_size =
155       vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
156 
157    if (subgroup_size) {
158       if (subgroup_size->requiredSubgroupSize == 32)
159          key.subgroup_required_size = RADV_REQUIRED_WAVE32;
160       else if (subgroup_size->requiredSubgroupSize == 64)
161          key.subgroup_required_size = RADV_REQUIRED_WAVE64;
162       else
163          unreachable("Unsupported required subgroup size.");
164    }
165 
166    if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) {
167       key.subgroup_require_full = 1;
168    }
169 
170    return key;
171 }
172 
173 void
radv_pipeline_stage_init(VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_pipeline_layout * pipeline_layout,const struct radv_shader_stage_key * stage_key,struct radv_shader_stage * out_stage)174 radv_pipeline_stage_init(VkPipelineCreateFlags2KHR pipeline_flags,
175                          const VkPipelineShaderStageCreateInfo *sinfo,
176                          const struct radv_pipeline_layout *pipeline_layout,
177                          const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage)
178 {
179    const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
180    const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
181       vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
182 
183    if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo)
184       return;
185 
186    memset(out_stage, 0, sizeof(*out_stage));
187 
188    out_stage->stage = vk_to_mesa_shader_stage(sinfo->stage);
189    out_stage->next_stage = MESA_SHADER_NONE;
190    out_stage->entrypoint = sinfo->pName;
191    out_stage->spec_info = sinfo->pSpecializationInfo;
192    out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
193    out_stage->key = *stage_key;
194 
195    if (sinfo->module != VK_NULL_HANDLE) {
196       struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module);
197 
198       out_stage->spirv.data = module->data;
199       out_stage->spirv.size = module->size;
200       out_stage->spirv.object = &module->base;
201 
202       if (module->nir)
203          out_stage->internal_nir = module->nir;
204    } else if (minfo) {
205       out_stage->spirv.data = (const char *)minfo->pCode;
206       out_stage->spirv.size = minfo->codeSize;
207    }
208 
209    radv_shader_layout_init(pipeline_layout, out_stage->stage, &out_stage->layout);
210 
211    vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, out_stage->shader_sha1);
212 }
213 
214 void
radv_shader_layout_init(const struct radv_pipeline_layout * pipeline_layout,gl_shader_stage stage,struct radv_shader_layout * layout)215 radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, gl_shader_stage stage,
216                         struct radv_shader_layout *layout)
217 {
218    layout->num_sets = pipeline_layout->num_sets;
219    for (unsigned i = 0; i < pipeline_layout->num_sets; i++) {
220       layout->set[i].layout = pipeline_layout->set[i].layout;
221       layout->set[i].dynamic_offset_start = pipeline_layout->set[i].dynamic_offset_start;
222    }
223 
224    layout->push_constant_size = pipeline_layout->push_constant_size;
225    layout->use_dynamic_descriptors = pipeline_layout->dynamic_offset_count &&
226                                      (pipeline_layout->dynamic_shader_stages & mesa_to_vk_shader_stage(stage));
227 }
228 
229 static const struct vk_ycbcr_conversion_state *
ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)230 ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
231 {
232    const struct radv_shader_layout *layout = data;
233 
234    const struct radv_descriptor_set_layout *set_layout = layout->set[set].layout;
235    const struct vk_ycbcr_conversion_state *ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, binding);
236 
237    if (!ycbcr_samplers)
238       return NULL;
239 
240    return ycbcr_samplers + array_index;
241 }
242 
243 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * _)244 lower_bit_size_callback(const nir_instr *instr, void *_)
245 {
246    struct radv_device *device = _;
247    const struct radv_physical_device *pdev = radv_device_physical(device);
248    enum amd_gfx_level chip = pdev->info.gfx_level;
249 
250    if (instr->type != nir_instr_type_alu)
251       return 0;
252    nir_alu_instr *alu = nir_instr_as_alu(instr);
253 
254    /* If an instruction is not scalarized by this point,
255     * it can be emitted as packed instruction */
256    if (alu->def.num_components > 1)
257       return 0;
258 
259    if (alu->def.bit_size & (8 | 16)) {
260       unsigned bit_size = alu->def.bit_size;
261       switch (alu->op) {
262       case nir_op_bitfield_select:
263       case nir_op_imul_high:
264       case nir_op_umul_high:
265       case nir_op_uadd_carry:
266       case nir_op_usub_borrow:
267          return 32;
268       case nir_op_iabs:
269       case nir_op_imax:
270       case nir_op_umax:
271       case nir_op_imin:
272       case nir_op_umin:
273       case nir_op_ishr:
274       case nir_op_ushr:
275       case nir_op_ishl:
276       case nir_op_isign:
277       case nir_op_uadd_sat:
278       case nir_op_usub_sat:
279          return (bit_size == 8 || !(chip >= GFX8 && alu->def.divergent)) ? 32 : 0;
280       case nir_op_iadd_sat:
281       case nir_op_isub_sat:
282          return bit_size == 8 || !alu->def.divergent ? 32 : 0;
283 
284       default:
285          return 0;
286       }
287    }
288 
289    if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
290       unsigned bit_size = nir_src_bit_size(alu->src[0].src);
291       switch (alu->op) {
292       case nir_op_bit_count:
293       case nir_op_find_lsb:
294       case nir_op_ufind_msb:
295          return 32;
296       case nir_op_ilt:
297       case nir_op_ige:
298       case nir_op_ieq:
299       case nir_op_ine:
300       case nir_op_ult:
301       case nir_op_uge:
302       case nir_op_bitz:
303       case nir_op_bitnz:
304          return (bit_size == 8 || !(chip >= GFX8 && alu->def.divergent)) ? 32 : 0;
305       default:
306          return 0;
307       }
308    }
309 
310    return 0;
311 }
312 
313 static uint8_t
opt_vectorize_callback(const nir_instr * instr,const void * _)314 opt_vectorize_callback(const nir_instr *instr, const void *_)
315 {
316    if (instr->type != nir_instr_type_alu)
317       return 0;
318 
319    const struct radv_device *device = _;
320    const struct radv_physical_device *pdev = radv_device_physical(device);
321    enum amd_gfx_level chip = pdev->info.gfx_level;
322    if (chip < GFX9)
323       return 1;
324 
325    const nir_alu_instr *alu = nir_instr_as_alu(instr);
326    const unsigned bit_size = alu->def.bit_size;
327    if (bit_size != 16)
328       return 1;
329 
330    return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
331 }
332 
333 static nir_component_mask_t
non_uniform_access_callback(const nir_src * src,void * _)334 non_uniform_access_callback(const nir_src *src, void *_)
335 {
336    if (src->ssa->num_components == 1)
337       return 0x1;
338    return nir_chase_binding(*src).success ? 0x2 : 0x3;
339 }
340 
341 void
radv_postprocess_nir(struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stage)342 radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
343                      struct radv_shader_stage *stage)
344 {
345    const struct radv_physical_device *pdev = radv_device_physical(device);
346    const struct radv_instance *instance = radv_physical_device_instance(pdev);
347    enum amd_gfx_level gfx_level = pdev->info.gfx_level;
348    bool progress;
349 
350    /* Wave and workgroup size should already be filled. */
351    assert(stage->info.wave_size && stage->info.workgroup_size);
352 
353    if (stage->stage == MESA_SHADER_FRAGMENT) {
354       if (!stage->key.optimisations_disabled) {
355          NIR_PASS(_, stage->nir, nir_opt_cse);
356       }
357       NIR_PASS(_, stage->nir, radv_nir_lower_fs_intrinsics, stage, gfx_state);
358    }
359 
360    /* LLVM could support more of these in theory. */
361    bool use_llvm = radv_use_llvm_for_stage(pdev, stage->stage);
362    bool has_inverse_ballot = true;
363 #if AMD_LLVM_AVAILABLE
364    has_inverse_ballot = !use_llvm || LLVM_VERSION_MAJOR >= 17;
365 #endif
366    radv_nir_opt_tid_function_options tid_options = {
367       .use_masked_swizzle_amd = true,
368       .use_dpp16_shift_amd = !use_llvm && gfx_level >= GFX8,
369       .use_clustered_rotate = !use_llvm,
370       .hw_subgroup_size = stage->info.wave_size,
371       .hw_ballot_bit_size = has_inverse_ballot ? stage->info.wave_size : 0,
372       .hw_ballot_num_comp = has_inverse_ballot ? 1 : 0,
373    };
374    NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
375 
376    enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
377       nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access |
378       nir_lower_non_uniform_image_access;
379 
380    /* In practice, most shaders do not have non-uniform-qualified
381     * accesses (see
382     * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
383     * thus a cheaper and likely to fail check is run first.
384     */
385    if (nir_has_non_uniform_access(stage->nir, lower_non_uniform_access_types)) {
386       if (!stage->key.optimisations_disabled) {
387          NIR_PASS(_, stage->nir, nir_opt_non_uniform_access);
388       }
389 
390       if (!radv_use_llvm_for_stage(pdev, stage->stage)) {
391          nir_lower_non_uniform_access_options options = {
392             .types = lower_non_uniform_access_types,
393             .callback = &non_uniform_access_callback,
394             .callback_data = NULL,
395          };
396          NIR_PASS(_, stage->nir, nir_lower_non_uniform_access, &options);
397       }
398    }
399    NIR_PASS(_, stage->nir, nir_lower_memory_model);
400 
401    nir_load_store_vectorize_options vectorize_opts = {
402       .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global |
403                nir_var_shader_temp,
404       .callback = ac_nir_mem_vectorize_callback,
405       .cb_data = &gfx_level,
406       .robust_modes = 0,
407       /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
408        * the final offset is not.
409        */
410       .has_shared2_amd = gfx_level >= GFX7,
411    };
412 
413    if (stage->key.uniform_robustness2)
414       vectorize_opts.robust_modes |= nir_var_mem_ubo;
415 
416    if (stage->key.storage_robustness2)
417       vectorize_opts.robust_modes |= nir_var_mem_ssbo;
418 
419    if (!stage->key.optimisations_disabled) {
420       progress = false;
421       NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
422       if (progress) {
423          NIR_PASS(_, stage->nir, nir_copy_prop);
424          NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !instance->drirc.disable_shrink_image_store);
425 
426          /* Ensure vectorized load_push_constant still have constant offsets, for
427           * radv_nir_apply_pipeline_layout. */
428          if (stage->args.ac.inline_push_const_mask)
429             NIR_PASS(_, stage->nir, nir_opt_constant_folding);
430 
431          /* Gather info again, to update whether 8/16-bit are used. */
432          nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
433       }
434    }
435 
436    NIR_PASS(
437       _, stage->nir, ac_nir_lower_subdword_loads,
438       (ac_nir_lower_subdword_options){.modes_1_comp = nir_var_mem_ubo | nir_var_mem_push_const,
439                                       .modes_N_comps = nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_ssbo});
440 
441    progress = false;
442    NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, &stage->layout);
443    /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */
444    if (progress)
445       nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
446 
447    bool fix_derivs_in_divergent_cf =
448       stage->stage == MESA_SHADER_FRAGMENT && !radv_use_llvm_for_stage(pdev, stage->stage);
449    if (fix_derivs_in_divergent_cf) {
450       NIR_PASS(_, stage->nir, nir_convert_to_lcssa, true, true);
451       nir_divergence_analysis(stage->nir);
452    }
453    NIR_PASS(_, stage->nir, ac_nir_lower_tex,
454             &(ac_nir_lower_tex_options){
455                .gfx_level = gfx_level,
456                .lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
457                .fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
458                .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
459             });
460    if (fix_derivs_in_divergent_cf)
461       NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
462 
463    if (stage->nir->info.uses_resource_info_query)
464       NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
465 
466    NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, stage);
467 
468    if (!stage->key.optimisations_disabled) {
469       NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true);
470    }
471 
472    NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
473 
474    nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
475 
476    if (!stage->key.optimisations_disabled) {
477       NIR_PASS(_, stage->nir, nir_opt_licm);
478       if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
479          sink_opts |= nir_move_load_input;
480 
481       NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
482       NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies);
483    }
484 
485    /* Lower VS inputs. We need to do this after nir_opt_sink, because
486     * load_input can be reordered, but buffer loads can't.
487     */
488    if (stage->stage == MESA_SHADER_VERTEX) {
489       NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
490    }
491 
492    /* Lower I/O intrinsics to memory instructions. */
493    bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
494    bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
495    bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
496    if (lowered_ngg) {
497       radv_lower_ngg(device, stage, gfx_state);
498    } else if (is_last_vgt_stage) {
499       if (stage->stage != MESA_SHADER_GEOMETRY) {
500          NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, gfx_level,
501                     stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
502                     stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
503                     stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
504 
505       } else {
506          bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat;
507 
508          ac_nir_gs_output_info gs_out_info = {
509             .streams = stage->info.gs.output_streams,
510             .usage_mask = stage->info.gs.output_usage_mask,
511          };
512          NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, emulate_ngg_gs_query_pipeline_stat, &gs_out_info);
513       }
514    } else if (stage->stage == MESA_SHADER_FRAGMENT) {
515       ac_nir_lower_ps_options options = {
516          .gfx_level = gfx_level,
517          .family = pdev->info.family,
518          .use_aco = !radv_use_llvm_for_stage(pdev, stage->stage),
519          .uses_discard = true,
520          .alpha_func = COMPARE_FUNC_ALWAYS,
521          .no_color_export = stage->info.ps.has_epilog,
522          .no_depth_export = stage->info.ps.exports_mrtz_via_epilog,
523 
524          .bc_optimize_for_persp = G_0286CC_PERSP_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
525                                   G_0286CC_PERSP_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
526          .bc_optimize_for_linear = G_0286CC_LINEAR_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
527                                    G_0286CC_LINEAR_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
528       };
529 
530       if (!options.no_color_export) {
531          options.dual_src_blend_swizzle = gfx_state->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11;
532          options.color_is_int8 = gfx_state->ps.epilog.color_is_int8;
533          options.color_is_int10 = gfx_state->ps.epilog.color_is_int10;
534          options.enable_mrt_output_nan_fixup =
535             gfx_state->ps.epilog.enable_mrt_output_nan_fixup && !stage->nir->info.internal;
536          /* Need to filter out unwritten color slots. */
537          options.spi_shader_col_format = gfx_state->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written;
538          options.alpha_to_one = gfx_state->ps.epilog.alpha_to_one;
539       }
540 
541       if (!options.no_depth_export) {
542          /* Compared to gfx_state.ps.alpha_to_coverage_via_mrtz,
543           * radv_shader_info.ps.writes_mrt0_alpha need any depth/stencil/sample_mask exist.
544           * ac_nir_lower_ps() require this field to reflect whether alpha via mrtz is really
545           * present.
546           */
547          options.alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha;
548       }
549 
550       NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
551    }
552 
553    if (radv_shader_should_clear_lds(device, stage->nir)) {
554       const unsigned chunk_size = 16; /* max single store size */
555       const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
556       NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
557    }
558 
559    NIR_PASS(_, stage->nir, nir_lower_int64);
560 
561    NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
562 
563    NIR_PASS(_, stage->nir, nir_lower_idiv,
564             &(nir_lower_idiv_options){
565                .allow_fp16 = gfx_level >= GFX9,
566             });
567 
568    if (radv_use_llvm_for_stage(pdev, stage->stage))
569       NIR_PASS_V(stage->nir, nir_lower_io_to_scalar, nir_var_mem_global, NULL, NULL);
570 
571    NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
572    NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level),
573               &stage->args.ac);
574    NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
575    radv_optimize_nir_algebraic(
576       stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK,
577       gfx_level >= GFX7);
578 
579    NIR_PASS(_, stage->nir, nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
580 
581    if (stage->nir->info.bit_sizes_int & (8 | 16)) {
582       if (gfx_level >= GFX8) {
583          NIR_PASS(_, stage->nir, nir_convert_to_lcssa, true, true);
584          nir_divergence_analysis(stage->nir);
585       }
586 
587       if (nir_lower_bit_size(stage->nir, lower_bit_size_callback, device)) {
588          NIR_PASS(_, stage->nir, nir_opt_constant_folding);
589       }
590 
591       if (gfx_level >= GFX8)
592          NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
593    }
594    if (gfx_level >= GFX9) {
595       bool separate_g16 = gfx_level >= GFX10;
596       struct nir_opt_tex_srcs_options opt_srcs_options[] = {
597          {
598             .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
599             .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) |
600                          (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) |
601                          (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)),
602          },
603          {
604             .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
605             .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
606          },
607       };
608       struct nir_opt_16bit_tex_image_options opt_16bit_options = {
609          .rounding_mode = nir_rounding_mode_undef,
610          .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
611          .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
612          .integer_dest_saturates = true,
613          .opt_image_store_data = true,
614          .opt_image_srcs = true,
615          .opt_srcs_options_count = separate_g16 ? 2 : 1,
616          .opt_srcs_options = opt_srcs_options,
617       };
618       NIR_PASS(_, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options);
619 
620       if (!stage->key.optimisations_disabled &&
621           ((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16)) {
622          NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device);
623       }
624    }
625 
626    /* cleanup passes */
627    NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
628 
629    /* This pass changes the global float control mode to RTZ, so can't be used
630     * with LLVM, which only supports RTNE, or RT, where the mode needs to match
631     * across separately compiled stages.
632     */
633    if (!radv_use_llvm_for_stage(pdev, stage->stage) && !gl_shader_stage_is_rt(stage->stage))
634       NIR_PASS(_, stage->nir, ac_nir_opt_pack_half, gfx_level);
635 
636    NIR_PASS(_, stage->nir, nir_lower_load_const_to_scalar);
637    NIR_PASS(_, stage->nir, nir_copy_prop);
638    NIR_PASS(_, stage->nir, nir_opt_dce);
639 
640    if (!stage->key.optimisations_disabled) {
641       sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo | nir_move_alu;
642       NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
643 
644       nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
645                                    nir_move_comparisons | nir_move_copies | nir_move_alu;
646       NIR_PASS(_, stage->nir, nir_opt_move, move_opts);
647 
648       /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC
649        * spilling.
650        */
651       NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons);
652    }
653 }
654 
655 bool
radv_shader_should_clear_lds(const struct radv_device * device,const nir_shader * shader)656 radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
657 {
658    const struct radv_physical_device *pdev = radv_device_physical(device);
659    const struct radv_instance *instance = radv_physical_device_instance(pdev);
660 
661    return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
662            shader->info.stage == MESA_SHADER_TASK) &&
663           shader->info.shared_size > 0 && instance->drirc.clear_lds;
664 }
665 
666 static uint32_t
radv_get_executable_count(struct radv_pipeline * pipeline)667 radv_get_executable_count(struct radv_pipeline *pipeline)
668 {
669    uint32_t ret = 0;
670 
671    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
672       struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
673       for (uint32_t i = 0; i < rt_pipeline->stage_count; i++)
674          ret += rt_pipeline->stages[i].shader ? 1 : 0;
675    }
676 
677    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
678       if (!pipeline->shaders[i])
679          continue;
680 
681       ret += 1u;
682       if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
683          ret += 1u;
684       }
685    }
686 
687    return ret;
688 }
689 
690 static struct radv_shader *
radv_get_shader_from_executable_index(struct radv_pipeline * pipeline,int index,gl_shader_stage * stage)691 radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
692 {
693    if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
694       struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
695       for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
696          struct radv_ray_tracing_stage *rt_stage = &rt_pipeline->stages[i];
697          if (!rt_stage->shader)
698             continue;
699 
700          if (!index) {
701             *stage = rt_stage->stage;
702             return rt_stage->shader;
703          }
704 
705          index--;
706       }
707    }
708 
709    for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
710       if (!pipeline->shaders[i])
711          continue;
712       if (!index) {
713          *stage = i;
714          return pipeline->shaders[i];
715       }
716 
717       --index;
718 
719       if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
720          if (!index) {
721             *stage = i;
722             return pipeline->gs_copy_shader;
723          }
724          --index;
725       }
726    }
727 
728    *stage = -1;
729    return NULL;
730 }
731 
732 /* Basically strlcpy (which does not exist on linux) specialized for
733  * descriptions. */
734 static void
desc_copy(char * desc,const char * src)735 desc_copy(char *desc, const char *src)
736 {
737    int len = strlen(src);
738    assert(len < VK_MAX_DESCRIPTION_SIZE);
739    memcpy(desc, src, len);
740    memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
741 }
742 
743 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutablePropertiesKHR(VkDevice _device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)744 radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
745                                         uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties)
746 {
747    VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
748    const uint32_t total_count = radv_get_executable_count(pipeline);
749 
750    if (!pProperties) {
751       *pExecutableCount = total_count;
752       return VK_SUCCESS;
753    }
754 
755    const uint32_t count = MIN2(total_count, *pExecutableCount);
756    for (uint32_t executable_idx = 0; executable_idx < count; executable_idx++) {
757       gl_shader_stage stage;
758       struct radv_shader *shader = radv_get_shader_from_executable_index(pipeline, executable_idx, &stage);
759 
760       pProperties[executable_idx].stages = mesa_to_vk_shader_stage(stage);
761 
762       const char *name = _mesa_shader_stage_to_string(stage);
763       const char *description = NULL;
764       switch (stage) {
765       case MESA_SHADER_VERTEX:
766          description = "Vulkan Vertex Shader";
767          break;
768       case MESA_SHADER_TESS_CTRL:
769          if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
770             pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
771             name = "vertex + tessellation control";
772             description = "Combined Vulkan Vertex and Tessellation Control Shaders";
773          } else {
774             description = "Vulkan Tessellation Control Shader";
775          }
776          break;
777       case MESA_SHADER_TESS_EVAL:
778          description = "Vulkan Tessellation Evaluation Shader";
779          break;
780       case MESA_SHADER_GEOMETRY:
781          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) {
782             name = "geometry copy";
783             description = "Extra shader stage that loads the GS output ringbuffer into the rasterizer";
784             break;
785          }
786 
787          if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
788             pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
789             name = "tessellation evaluation + geometry";
790             description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
791          } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) {
792             pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
793             name = "vertex + geometry";
794             description = "Combined Vulkan Vertex and Geometry Shaders";
795          } else {
796             description = "Vulkan Geometry Shader";
797          }
798          break;
799       case MESA_SHADER_FRAGMENT:
800          description = "Vulkan Fragment Shader";
801          break;
802       case MESA_SHADER_COMPUTE:
803          description = "Vulkan Compute Shader";
804          break;
805       case MESA_SHADER_MESH:
806          description = "Vulkan Mesh Shader";
807          break;
808       case MESA_SHADER_TASK:
809          description = "Vulkan Task Shader";
810          break;
811       case MESA_SHADER_RAYGEN:
812          description = "Vulkan Ray Generation Shader";
813          break;
814       case MESA_SHADER_ANY_HIT:
815          description = "Vulkan Any-Hit Shader";
816          break;
817       case MESA_SHADER_CLOSEST_HIT:
818          description = "Vulkan Closest-Hit Shader";
819          break;
820       case MESA_SHADER_MISS:
821          description = "Vulkan Miss Shader";
822          break;
823       case MESA_SHADER_INTERSECTION:
824          description = "Shader responsible for traversing the acceleration structure";
825          break;
826       case MESA_SHADER_CALLABLE:
827          description = "Vulkan Callable Shader";
828          break;
829       default:
830          unreachable("Unsupported shader stage");
831       }
832 
833       pProperties[executable_idx].subgroupSize = shader->info.wave_size;
834       desc_copy(pProperties[executable_idx].name, name);
835       desc_copy(pProperties[executable_idx].description, description);
836    }
837 
838    VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
839    *pExecutableCount = count;
840    return result;
841 }
842 
843 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)844 radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
845                                         uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics)
846 {
847    VK_FROM_HANDLE(radv_device, device, _device);
848    VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
849    gl_shader_stage stage;
850    struct radv_shader *shader =
851       radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
852 
853    const struct radv_physical_device *pdev = radv_device_physical(device);
854    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
855 
856    unsigned lds_increment =
857       gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;
858 
859    VkPipelineExecutableStatisticKHR *s = pStatistics;
860    VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
861    VkResult result = VK_SUCCESS;
862 
863    if (s < end) {
864       desc_copy(s->name, "Driver pipeline hash");
865       desc_copy(s->description, "Driver pipeline hash used by RGP");
866       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
867       s->value.u64 = pipeline->pipeline_hash;
868    }
869    ++s;
870 
871    if (s < end) {
872       desc_copy(s->name, "SGPRs");
873       desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
874       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
875       s->value.u64 = shader->config.num_sgprs;
876    }
877    ++s;
878 
879    if (s < end) {
880       desc_copy(s->name, "VGPRs");
881       desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
882       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
883       s->value.u64 = shader->config.num_vgprs;
884    }
885    ++s;
886 
887    if (s < end) {
888       desc_copy(s->name, "Spilled SGPRs");
889       desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
890       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
891       s->value.u64 = shader->config.spilled_sgprs;
892    }
893    ++s;
894 
895    if (s < end) {
896       desc_copy(s->name, "Spilled VGPRs");
897       desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
898       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
899       s->value.u64 = shader->config.spilled_vgprs;
900    }
901    ++s;
902 
903    if (s < end) {
904       desc_copy(s->name, "Code size");
905       desc_copy(s->description, "Code size in bytes");
906       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
907       s->value.u64 = shader->exec_size;
908    }
909    ++s;
910 
911    if (s < end) {
912       desc_copy(s->name, "LDS size");
913       desc_copy(s->description, "LDS size in bytes per workgroup");
914       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
915       s->value.u64 = shader->config.lds_size * lds_increment;
916    }
917    ++s;
918 
919    if (s < end) {
920       desc_copy(s->name, "Scratch size");
921       desc_copy(s->description, "Private memory in bytes per subgroup");
922       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
923       s->value.u64 = shader->config.scratch_bytes_per_wave;
924    }
925    ++s;
926 
927    if (s < end) {
928       desc_copy(s->name, "Subgroups per SIMD");
929       desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
930       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
931       s->value.u64 = shader->max_waves;
932    }
933    ++s;
934 
935    if (s < end) {
936       desc_copy(s->name, "Combined inputs");
937       desc_copy(s->description, "Number of input slots reserved for the shader (including merged stages)");
938       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
939       s->value.u64 = 0;
940 
941       switch (stage) {
942       case MESA_SHADER_VERTEX:
943          if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) {
944             /* VS inputs when VS is a separate stage */
945             s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
946          }
947          break;
948 
949       case MESA_SHADER_TESS_CTRL:
950          if (gfx_level >= GFX9) {
951             /* VS inputs when pipeline has tess */
952             s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
953          }
954 
955          /* VS -> TCS inputs */
956          s->value.u64 += shader->info.tcs.num_linked_inputs;
957          break;
958 
959       case MESA_SHADER_TESS_EVAL:
960          if (gfx_level <= GFX8 || !shader->info.tes.as_es) {
961             /* TCS -> TES inputs when TES is a separate stage */
962             s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
963          }
964          break;
965 
966       case MESA_SHADER_GEOMETRY:
967          /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
968          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
969             break;
970 
971          if (gfx_level >= GFX9) {
972             if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
973                /* VS inputs when pipeline has GS but no tess */
974                s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
975             } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
976                /* TCS -> TES inputs when pipeline has GS */
977                s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
978             }
979          }
980 
981          /* VS -> GS or TES -> GS inputs */
982          s->value.u64 += shader->info.gs.num_linked_inputs;
983          break;
984 
985       case MESA_SHADER_FRAGMENT:
986          s->value.u64 += shader->info.ps.num_interp + shader->info.ps.num_prim_interp;
987          break;
988 
989       default:
990          /* Other stages don't have IO or we are not interested in them. */
991          break;
992       }
993    }
994    ++s;
995 
996    if (s < end) {
997       desc_copy(s->name, "Combined outputs");
998       desc_copy(s->description, "Number of output slots reserved for the shader (including merged stages)");
999       s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1000       s->value.u64 = 0;
1001 
1002       switch (stage) {
1003       case MESA_SHADER_VERTEX:
1004          if (!shader->info.vs.as_ls && !shader->info.vs.as_es) {
1005             /* VS -> FS outputs. */
1006             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1007                             shader->info.outinfo.prim_param_exports;
1008          } else if (gfx_level <= GFX8) {
1009             /* VS -> TCS, VS -> GS outputs on GFX6-8 */
1010             s->value.u64 += shader->info.vs.num_linked_outputs;
1011          }
1012          break;
1013 
1014       case MESA_SHADER_TESS_CTRL:
1015          if (gfx_level >= GFX9) {
1016             /* VS -> TCS outputs on GFX9+ */
1017             s->value.u64 += shader->info.vs.num_linked_outputs;
1018          }
1019 
1020          /* TCS -> TES outputs */
1021          s->value.u64 += shader->info.tcs.num_linked_outputs + shader->info.tcs.num_linked_patch_outputs;
1022          break;
1023 
1024       case MESA_SHADER_TESS_EVAL:
1025          if (!shader->info.tes.as_es) {
1026             /* TES -> FS outputs */
1027             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1028                             shader->info.outinfo.prim_param_exports;
1029          } else if (gfx_level <= GFX8) {
1030             /* TES -> GS outputs on GFX6-8 */
1031             s->value.u64 += shader->info.tes.num_linked_outputs;
1032          }
1033          break;
1034 
1035       case MESA_SHADER_GEOMETRY:
1036          /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
1037          if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
1038             break;
1039 
1040          if (gfx_level >= GFX9) {
1041             if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
1042                /* VS -> GS outputs on GFX9+ */
1043                s->value.u64 += shader->info.vs.num_linked_outputs;
1044             } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
1045                /* TES -> GS outputs on GFX9+ */
1046                s->value.u64 += shader->info.tes.num_linked_outputs;
1047             }
1048          }
1049 
1050          if (shader->info.is_ngg) {
1051             /* GS -> FS outputs (GFX10+ NGG) */
1052             s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1053                             shader->info.outinfo.prim_param_exports;
1054          } else {
1055             /* GS -> FS outputs (GFX6-10.3 legacy) */
1056             s->value.u64 += shader->info.gs.gsvs_vertex_size / 16;
1057          }
1058          break;
1059 
1060       case MESA_SHADER_MESH:
1061          /* MS -> FS outputs */
1062          s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1063                          shader->info.outinfo.prim_param_exports;
1064          break;
1065 
1066       case MESA_SHADER_FRAGMENT:
1067          s->value.u64 += shader->info.ps.colors_written + !!shader->info.ps.writes_z +
1068                          !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
1069                          !!shader->info.ps.writes_mrt0_alpha;
1070          break;
1071 
1072       default:
1073          /* Other stages don't have IO or we are not interested in them. */
1074          break;
1075       }
1076    }
1077    ++s;
1078 
1079    if (shader->statistics) {
1080       for (unsigned i = 0; i < aco_num_statistics; i++) {
1081          const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
1082          if (s < end) {
1083             desc_copy(s->name, info->name);
1084             desc_copy(s->description, info->desc);
1085             s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1086             s->value.u64 = shader->statistics[i];
1087          }
1088          ++s;
1089       }
1090    }
1091 
1092    if (!pStatistics)
1093       *pStatisticCount = s - pStatistics;
1094    else if (s > end) {
1095       *pStatisticCount = end - pStatistics;
1096       result = VK_INCOMPLETE;
1097    } else {
1098       *pStatisticCount = s - pStatistics;
1099    }
1100 
1101    return result;
1102 }
1103 
1104 static VkResult
radv_copy_representation(void * data,size_t * data_size,const char * src)1105 radv_copy_representation(void *data, size_t *data_size, const char *src)
1106 {
1107    size_t total_size = strlen(src) + 1;
1108 
1109    if (!data) {
1110       *data_size = total_size;
1111       return VK_SUCCESS;
1112    }
1113 
1114    size_t size = MIN2(total_size, *data_size);
1115 
1116    memcpy(data, src, size);
1117    if (size)
1118       *((char *)data + size - 1) = 0;
1119    return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
1120 }
1121 
1122 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)1123 radv_GetPipelineExecutableInternalRepresentationsKHR(
1124    VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount,
1125    VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
1126 {
1127    VK_FROM_HANDLE(radv_device, device, _device);
1128    VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
1129    const struct radv_physical_device *pdev = radv_device_physical(device);
1130    gl_shader_stage stage;
1131    struct radv_shader *shader =
1132       radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
1133 
1134    VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
1135    VkPipelineExecutableInternalRepresentationKHR *end =
1136       p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
1137    VkResult result = VK_SUCCESS;
1138    /* optimized NIR */
1139    if (p < end) {
1140       p->isText = true;
1141       desc_copy(p->name, "NIR Shader(s)");
1142       desc_copy(p->description, "The optimized NIR shader(s)");
1143       if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
1144          result = VK_INCOMPLETE;
1145    }
1146    ++p;
1147 
1148    /* backend IR */
1149    if (p < end) {
1150       p->isText = true;
1151       if (radv_use_llvm_for_stage(pdev, stage)) {
1152          desc_copy(p->name, "LLVM IR");
1153          desc_copy(p->description, "The LLVM IR after some optimizations");
1154       } else {
1155          desc_copy(p->name, "ACO IR");
1156          desc_copy(p->description, "The ACO IR after some optimizations");
1157       }
1158       if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
1159          result = VK_INCOMPLETE;
1160    }
1161    ++p;
1162 
1163    /* Disassembler */
1164    if (p < end && shader->disasm_string) {
1165       p->isText = true;
1166       desc_copy(p->name, "Assembly");
1167       desc_copy(p->description, "Final Assembly");
1168       if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
1169          result = VK_INCOMPLETE;
1170    }
1171    ++p;
1172 
1173    if (!pInternalRepresentations)
1174       *pInternalRepresentationCount = p - pInternalRepresentations;
1175    else if (p > end) {
1176       result = VK_INCOMPLETE;
1177       *pInternalRepresentationCount = end - pInternalRepresentations;
1178    } else {
1179       *pInternalRepresentationCount = p - pInternalRepresentations;
1180    }
1181 
1182    return result;
1183 }
1184 
1185 static void
vk_shader_module_finish(void * _module)1186 vk_shader_module_finish(void *_module)
1187 {
1188    struct vk_shader_module *module = _module;
1189    vk_object_base_finish(&module->base);
1190 }
1191 
1192 VkPipelineShaderStageCreateInfo *
radv_copy_shader_stage_create_info(struct radv_device * device,uint32_t stageCount,const VkPipelineShaderStageCreateInfo * pStages,void * mem_ctx)1193 radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
1194                                    const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx)
1195 {
1196    VkPipelineShaderStageCreateInfo *new_stages;
1197 
1198    size_t size = sizeof(VkPipelineShaderStageCreateInfo) * stageCount;
1199    new_stages = ralloc_size(mem_ctx, size);
1200    if (!new_stages)
1201       return NULL;
1202 
1203    if (size)
1204       memcpy(new_stages, pStages, size);
1205 
1206    for (uint32_t i = 0; i < stageCount; i++) {
1207       VK_FROM_HANDLE(vk_shader_module, module, new_stages[i].module);
1208 
1209       const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO);
1210 
1211       if (module) {
1212          struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size);
1213          if (!new_module)
1214             return NULL;
1215 
1216          ralloc_set_destructor(new_module, vk_shader_module_finish);
1217          vk_object_base_init(&device->vk, &new_module->base, VK_OBJECT_TYPE_SHADER_MODULE);
1218 
1219          new_module->nir = NULL;
1220          memcpy(new_module->hash, module->hash, sizeof(module->hash));
1221          new_module->size = module->size;
1222          memcpy(new_module->data, module->data, module->size);
1223 
1224          module = new_module;
1225       } else if (minfo) {
1226          module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + minfo->codeSize);
1227          if (!module)
1228             return NULL;
1229 
1230          vk_shader_module_init(&device->vk, module, minfo);
1231       }
1232 
1233       if (module) {
1234          const VkSpecializationInfo *spec = new_stages[i].pSpecializationInfo;
1235          if (spec) {
1236             VkSpecializationInfo *new_spec = ralloc(mem_ctx, VkSpecializationInfo);
1237             if (!new_spec)
1238                return NULL;
1239 
1240             new_spec->mapEntryCount = spec->mapEntryCount;
1241             uint32_t map_entries_size = sizeof(VkSpecializationMapEntry) * spec->mapEntryCount;
1242             new_spec->pMapEntries = ralloc_size(mem_ctx, map_entries_size);
1243             if (!new_spec->pMapEntries)
1244                return NULL;
1245             memcpy((void *)new_spec->pMapEntries, spec->pMapEntries, map_entries_size);
1246 
1247             new_spec->dataSize = spec->dataSize;
1248             new_spec->pData = ralloc_size(mem_ctx, spec->dataSize);
1249             if (!new_spec->pData)
1250                return NULL;
1251             memcpy((void *)new_spec->pData, spec->pData, spec->dataSize);
1252 
1253             new_stages[i].pSpecializationInfo = new_spec;
1254          }
1255 
1256          new_stages[i].module = vk_shader_module_to_handle(module);
1257          new_stages[i].pName = ralloc_strdup(mem_ctx, new_stages[i].pName);
1258          if (!new_stages[i].pName)
1259             return NULL;
1260          new_stages[i].pNext = NULL;
1261       }
1262    }
1263 
1264    return new_stages;
1265 }
1266 
1267 void
radv_pipeline_hash(const struct radv_device * device,const struct radv_pipeline_layout * pipeline_layout,struct mesa_sha1 * ctx)1268 radv_pipeline_hash(const struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout,
1269                    struct mesa_sha1 *ctx)
1270 {
1271    _mesa_sha1_update(ctx, device->cache_hash, sizeof(device->cache_hash));
1272    if (pipeline_layout)
1273       _mesa_sha1_update(ctx, pipeline_layout->hash, sizeof(pipeline_layout->hash));
1274 }
1275 
1276 void
radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_shader_stage_key * stage_key,struct mesa_sha1 * ctx)1277 radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2KHR pipeline_flags,
1278                                 const VkPipelineShaderStageCreateInfo *sinfo,
1279                                 const struct radv_shader_stage_key *stage_key, struct mesa_sha1 *ctx)
1280 {
1281    unsigned char shader_sha1[SHA1_DIGEST_LENGTH];
1282 
1283    vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, shader_sha1);
1284 
1285    _mesa_sha1_update(ctx, shader_sha1, sizeof(shader_sha1));
1286    _mesa_sha1_update(ctx, stage_key, sizeof(*stage_key));
1287 }
1288