1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include "radv_pipeline.h"
12 #include "meta/radv_meta.h"
13 #include "nir/nir.h"
14 #include "nir/nir_builder.h"
15 #include "nir/nir_serialize.h"
16 #include "nir/radv_nir.h"
17 #include "spirv/nir_spirv.h"
18 #include "util/disk_cache.h"
19 #include "util/os_time.h"
20 #include "util/u_atomic.h"
21 #include "radv_cs.h"
22 #include "radv_debug.h"
23 #include "radv_pipeline_rt.h"
24 #include "radv_rmv.h"
25 #include "radv_shader.h"
26 #include "radv_shader_args.h"
27 #include "vk_pipeline.h"
28 #include "vk_render_pass.h"
29 #include "vk_util.h"
30
31 #include "util/u_debug.h"
32 #include "ac_binary.h"
33 #include "ac_nir.h"
34 #include "ac_shader_util.h"
35 #include "aco_interface.h"
36 #include "sid.h"
37 #include "vk_format.h"
38 #include "vk_nir_convert_ycbcr.h"
39 #include "vk_ycbcr_conversion.h"
40 #if AMD_LLVM_AVAILABLE
41 #include "ac_llvm_util.h"
42 #endif
43
44 bool
radv_shader_need_indirect_descriptor_sets(const struct radv_shader * shader)45 radv_shader_need_indirect_descriptor_sets(const struct radv_shader *shader)
46 {
47 const struct radv_userdata_info *loc = radv_get_user_sgpr_info(shader, AC_UD_INDIRECT_DESCRIPTOR_SETS);
48 return loc->sgpr_idx != -1;
49 }
50
51 bool
radv_pipeline_capture_shaders(const struct radv_device * device,VkPipelineCreateFlags2KHR flags)52 radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2KHR flags)
53 {
54 const struct radv_physical_device *pdev = radv_device_physical(device);
55 const struct radv_instance *instance = radv_physical_device_instance(pdev);
56
57 return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
58 (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info;
59 }
60
61 bool
radv_pipeline_capture_shader_stats(const struct radv_device * device,VkPipelineCreateFlags2KHR flags)62 radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2KHR flags)
63 {
64 const struct radv_physical_device *pdev = radv_device_physical(device);
65 const struct radv_instance *instance = radv_physical_device_instance(pdev);
66
67 return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) ||
68 (instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || device->keep_shader_info;
69 }
70
71 void
radv_pipeline_init(struct radv_device * device,struct radv_pipeline * pipeline,enum radv_pipeline_type type)72 radv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, enum radv_pipeline_type type)
73 {
74 vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
75
76 pipeline->type = type;
77 }
78
79 void
radv_pipeline_destroy(struct radv_device * device,struct radv_pipeline * pipeline,const VkAllocationCallbacks * allocator)80 radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
81 const VkAllocationCallbacks *allocator)
82 {
83 if (pipeline->cache_object)
84 vk_pipeline_cache_object_unref(&device->vk, pipeline->cache_object);
85
86 switch (pipeline->type) {
87 case RADV_PIPELINE_GRAPHICS:
88 radv_destroy_graphics_pipeline(device, radv_pipeline_to_graphics(pipeline));
89 break;
90 case RADV_PIPELINE_GRAPHICS_LIB:
91 radv_destroy_graphics_lib_pipeline(device, radv_pipeline_to_graphics_lib(pipeline));
92 break;
93 case RADV_PIPELINE_COMPUTE:
94 radv_destroy_compute_pipeline(device, radv_pipeline_to_compute(pipeline));
95 break;
96 case RADV_PIPELINE_RAY_TRACING:
97 radv_destroy_ray_tracing_pipeline(device, radv_pipeline_to_ray_tracing(pipeline));
98 break;
99 default:
100 unreachable("invalid pipeline type");
101 }
102
103 radv_rmv_log_resource_destroy(device, (uint64_t)radv_pipeline_to_handle(pipeline));
104 vk_object_base_finish(&pipeline->base);
105 vk_free2(&device->vk.alloc, allocator, pipeline);
106 }
107
108 VKAPI_ATTR void VKAPI_CALL
radv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)109 radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, const VkAllocationCallbacks *pAllocator)
110 {
111 VK_FROM_HANDLE(radv_device, device, _device);
112 VK_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
113
114 if (!_pipeline)
115 return;
116
117 radv_pipeline_destroy(device, pipeline, pAllocator);
118 }
119
120 struct radv_shader_stage_key
radv_pipeline_get_shader_key(const struct radv_device * device,const VkPipelineShaderStageCreateInfo * stage,VkPipelineCreateFlags2KHR flags,const void * pNext)121 radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage,
122 VkPipelineCreateFlags2KHR flags, const void *pNext)
123 {
124 const struct radv_physical_device *pdev = radv_device_physical(device);
125 const struct radv_instance *instance = radv_physical_device_instance(pdev);
126 gl_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
127 struct vk_pipeline_robustness_state rs;
128 struct radv_shader_stage_key key = {0};
129
130 key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags);
131
132 if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT_KHR)
133 key.optimisations_disabled = 1;
134
135 if (flags & VK_PIPELINE_CREATE_2_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR)
136 key.view_index_from_device_index = 1;
137
138 if (flags & VK_PIPELINE_CREATE_INDIRECT_BINDABLE_BIT_NV)
139 key.indirect_bindable = 1;
140
141 if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
142 key.version = instance->drirc.override_graphics_shader_version;
143 } else if (stage->stage & RADV_RT_STAGE_BITS) {
144 key.version = instance->drirc.override_ray_tracing_shader_version;
145 } else {
146 assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT);
147 key.version = instance->drirc.override_compute_shader_version;
148 }
149
150 vk_pipeline_robustness_state_fill(&device->vk, &rs, pNext, stage->pNext);
151
152 radv_set_stage_key_robustness(&rs, s, &key);
153
154 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *const subgroup_size =
155 vk_find_struct_const(stage->pNext, PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO);
156
157 if (subgroup_size) {
158 if (subgroup_size->requiredSubgroupSize == 32)
159 key.subgroup_required_size = RADV_REQUIRED_WAVE32;
160 else if (subgroup_size->requiredSubgroupSize == 64)
161 key.subgroup_required_size = RADV_REQUIRED_WAVE64;
162 else
163 unreachable("Unsupported required subgroup size.");
164 }
165
166 if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) {
167 key.subgroup_require_full = 1;
168 }
169
170 return key;
171 }
172
173 void
radv_pipeline_stage_init(VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_pipeline_layout * pipeline_layout,const struct radv_shader_stage_key * stage_key,struct radv_shader_stage * out_stage)174 radv_pipeline_stage_init(VkPipelineCreateFlags2KHR pipeline_flags,
175 const VkPipelineShaderStageCreateInfo *sinfo,
176 const struct radv_pipeline_layout *pipeline_layout,
177 const struct radv_shader_stage_key *stage_key, struct radv_shader_stage *out_stage)
178 {
179 const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO);
180 const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo =
181 vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT);
182
183 if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo)
184 return;
185
186 memset(out_stage, 0, sizeof(*out_stage));
187
188 out_stage->stage = vk_to_mesa_shader_stage(sinfo->stage);
189 out_stage->next_stage = MESA_SHADER_NONE;
190 out_stage->entrypoint = sinfo->pName;
191 out_stage->spec_info = sinfo->pSpecializationInfo;
192 out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
193 out_stage->key = *stage_key;
194
195 if (sinfo->module != VK_NULL_HANDLE) {
196 struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module);
197
198 out_stage->spirv.data = module->data;
199 out_stage->spirv.size = module->size;
200 out_stage->spirv.object = &module->base;
201
202 if (module->nir)
203 out_stage->internal_nir = module->nir;
204 } else if (minfo) {
205 out_stage->spirv.data = (const char *)minfo->pCode;
206 out_stage->spirv.size = minfo->codeSize;
207 }
208
209 radv_shader_layout_init(pipeline_layout, out_stage->stage, &out_stage->layout);
210
211 vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, out_stage->shader_sha1);
212 }
213
214 void
radv_shader_layout_init(const struct radv_pipeline_layout * pipeline_layout,gl_shader_stage stage,struct radv_shader_layout * layout)215 radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, gl_shader_stage stage,
216 struct radv_shader_layout *layout)
217 {
218 layout->num_sets = pipeline_layout->num_sets;
219 for (unsigned i = 0; i < pipeline_layout->num_sets; i++) {
220 layout->set[i].layout = pipeline_layout->set[i].layout;
221 layout->set[i].dynamic_offset_start = pipeline_layout->set[i].dynamic_offset_start;
222 }
223
224 layout->push_constant_size = pipeline_layout->push_constant_size;
225 layout->use_dynamic_descriptors = pipeline_layout->dynamic_offset_count &&
226 (pipeline_layout->dynamic_shader_stages & mesa_to_vk_shader_stage(stage));
227 }
228
229 static const struct vk_ycbcr_conversion_state *
ycbcr_conversion_lookup(const void * data,uint32_t set,uint32_t binding,uint32_t array_index)230 ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32_t array_index)
231 {
232 const struct radv_shader_layout *layout = data;
233
234 const struct radv_descriptor_set_layout *set_layout = layout->set[set].layout;
235 const struct vk_ycbcr_conversion_state *ycbcr_samplers = radv_immutable_ycbcr_samplers(set_layout, binding);
236
237 if (!ycbcr_samplers)
238 return NULL;
239
240 return ycbcr_samplers + array_index;
241 }
242
243 static unsigned
lower_bit_size_callback(const nir_instr * instr,void * _)244 lower_bit_size_callback(const nir_instr *instr, void *_)
245 {
246 struct radv_device *device = _;
247 const struct radv_physical_device *pdev = radv_device_physical(device);
248 enum amd_gfx_level chip = pdev->info.gfx_level;
249
250 if (instr->type != nir_instr_type_alu)
251 return 0;
252 nir_alu_instr *alu = nir_instr_as_alu(instr);
253
254 /* If an instruction is not scalarized by this point,
255 * it can be emitted as packed instruction */
256 if (alu->def.num_components > 1)
257 return 0;
258
259 if (alu->def.bit_size & (8 | 16)) {
260 unsigned bit_size = alu->def.bit_size;
261 switch (alu->op) {
262 case nir_op_bitfield_select:
263 case nir_op_imul_high:
264 case nir_op_umul_high:
265 case nir_op_uadd_carry:
266 case nir_op_usub_borrow:
267 return 32;
268 case nir_op_iabs:
269 case nir_op_imax:
270 case nir_op_umax:
271 case nir_op_imin:
272 case nir_op_umin:
273 case nir_op_ishr:
274 case nir_op_ushr:
275 case nir_op_ishl:
276 case nir_op_isign:
277 case nir_op_uadd_sat:
278 case nir_op_usub_sat:
279 return (bit_size == 8 || !(chip >= GFX8 && alu->def.divergent)) ? 32 : 0;
280 case nir_op_iadd_sat:
281 case nir_op_isub_sat:
282 return bit_size == 8 || !alu->def.divergent ? 32 : 0;
283
284 default:
285 return 0;
286 }
287 }
288
289 if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
290 unsigned bit_size = nir_src_bit_size(alu->src[0].src);
291 switch (alu->op) {
292 case nir_op_bit_count:
293 case nir_op_find_lsb:
294 case nir_op_ufind_msb:
295 return 32;
296 case nir_op_ilt:
297 case nir_op_ige:
298 case nir_op_ieq:
299 case nir_op_ine:
300 case nir_op_ult:
301 case nir_op_uge:
302 case nir_op_bitz:
303 case nir_op_bitnz:
304 return (bit_size == 8 || !(chip >= GFX8 && alu->def.divergent)) ? 32 : 0;
305 default:
306 return 0;
307 }
308 }
309
310 return 0;
311 }
312
313 static uint8_t
opt_vectorize_callback(const nir_instr * instr,const void * _)314 opt_vectorize_callback(const nir_instr *instr, const void *_)
315 {
316 if (instr->type != nir_instr_type_alu)
317 return 0;
318
319 const struct radv_device *device = _;
320 const struct radv_physical_device *pdev = radv_device_physical(device);
321 enum amd_gfx_level chip = pdev->info.gfx_level;
322 if (chip < GFX9)
323 return 1;
324
325 const nir_alu_instr *alu = nir_instr_as_alu(instr);
326 const unsigned bit_size = alu->def.bit_size;
327 if (bit_size != 16)
328 return 1;
329
330 return aco_nir_op_supports_packed_math_16bit(alu) ? 2 : 1;
331 }
332
333 static nir_component_mask_t
non_uniform_access_callback(const nir_src * src,void * _)334 non_uniform_access_callback(const nir_src *src, void *_)
335 {
336 if (src->ssa->num_components == 1)
337 return 0x1;
338 return nir_chase_binding(*src).success ? 0x2 : 0x3;
339 }
340
341 void
radv_postprocess_nir(struct radv_device * device,const struct radv_graphics_state_key * gfx_state,struct radv_shader_stage * stage)342 radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
343 struct radv_shader_stage *stage)
344 {
345 const struct radv_physical_device *pdev = radv_device_physical(device);
346 const struct radv_instance *instance = radv_physical_device_instance(pdev);
347 enum amd_gfx_level gfx_level = pdev->info.gfx_level;
348 bool progress;
349
350 /* Wave and workgroup size should already be filled. */
351 assert(stage->info.wave_size && stage->info.workgroup_size);
352
353 if (stage->stage == MESA_SHADER_FRAGMENT) {
354 if (!stage->key.optimisations_disabled) {
355 NIR_PASS(_, stage->nir, nir_opt_cse);
356 }
357 NIR_PASS(_, stage->nir, radv_nir_lower_fs_intrinsics, stage, gfx_state);
358 }
359
360 /* LLVM could support more of these in theory. */
361 bool use_llvm = radv_use_llvm_for_stage(pdev, stage->stage);
362 bool has_inverse_ballot = true;
363 #if AMD_LLVM_AVAILABLE
364 has_inverse_ballot = !use_llvm || LLVM_VERSION_MAJOR >= 17;
365 #endif
366 radv_nir_opt_tid_function_options tid_options = {
367 .use_masked_swizzle_amd = true,
368 .use_dpp16_shift_amd = !use_llvm && gfx_level >= GFX8,
369 .use_clustered_rotate = !use_llvm,
370 .hw_subgroup_size = stage->info.wave_size,
371 .hw_ballot_bit_size = has_inverse_ballot ? stage->info.wave_size : 0,
372 .hw_ballot_num_comp = has_inverse_ballot ? 1 : 0,
373 };
374 NIR_PASS(_, stage->nir, radv_nir_opt_tid_function, &tid_options);
375
376 enum nir_lower_non_uniform_access_type lower_non_uniform_access_types =
377 nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | nir_lower_non_uniform_texture_access |
378 nir_lower_non_uniform_image_access;
379
380 /* In practice, most shaders do not have non-uniform-qualified
381 * accesses (see
382 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17558#note_1475069)
383 * thus a cheaper and likely to fail check is run first.
384 */
385 if (nir_has_non_uniform_access(stage->nir, lower_non_uniform_access_types)) {
386 if (!stage->key.optimisations_disabled) {
387 NIR_PASS(_, stage->nir, nir_opt_non_uniform_access);
388 }
389
390 if (!radv_use_llvm_for_stage(pdev, stage->stage)) {
391 nir_lower_non_uniform_access_options options = {
392 .types = lower_non_uniform_access_types,
393 .callback = &non_uniform_access_callback,
394 .callback_data = NULL,
395 };
396 NIR_PASS(_, stage->nir, nir_lower_non_uniform_access, &options);
397 }
398 }
399 NIR_PASS(_, stage->nir, nir_lower_memory_model);
400
401 nir_load_store_vectorize_options vectorize_opts = {
402 .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_shared | nir_var_mem_global |
403 nir_var_shader_temp,
404 .callback = ac_nir_mem_vectorize_callback,
405 .cb_data = &gfx_level,
406 .robust_modes = 0,
407 /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if
408 * the final offset is not.
409 */
410 .has_shared2_amd = gfx_level >= GFX7,
411 };
412
413 if (stage->key.uniform_robustness2)
414 vectorize_opts.robust_modes |= nir_var_mem_ubo;
415
416 if (stage->key.storage_robustness2)
417 vectorize_opts.robust_modes |= nir_var_mem_ssbo;
418
419 if (!stage->key.optimisations_disabled) {
420 progress = false;
421 NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
422 if (progress) {
423 NIR_PASS(_, stage->nir, nir_copy_prop);
424 NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !instance->drirc.disable_shrink_image_store);
425
426 /* Ensure vectorized load_push_constant still have constant offsets, for
427 * radv_nir_apply_pipeline_layout. */
428 if (stage->args.ac.inline_push_const_mask)
429 NIR_PASS(_, stage->nir, nir_opt_constant_folding);
430
431 /* Gather info again, to update whether 8/16-bit are used. */
432 nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
433 }
434 }
435
436 NIR_PASS(
437 _, stage->nir, ac_nir_lower_subdword_loads,
438 (ac_nir_lower_subdword_options){.modes_1_comp = nir_var_mem_ubo | nir_var_mem_push_const,
439 .modes_N_comps = nir_var_mem_ubo | nir_var_mem_push_const | nir_var_mem_ssbo});
440
441 progress = false;
442 NIR_PASS(progress, stage->nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, &stage->layout);
443 /* Gather info in the case that nir_vk_lower_ycbcr_tex might have emitted resinfo instructions. */
444 if (progress)
445 nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
446
447 bool fix_derivs_in_divergent_cf =
448 stage->stage == MESA_SHADER_FRAGMENT && !radv_use_llvm_for_stage(pdev, stage->stage);
449 if (fix_derivs_in_divergent_cf) {
450 NIR_PASS(_, stage->nir, nir_convert_to_lcssa, true, true);
451 nir_divergence_analysis(stage->nir);
452 }
453 NIR_PASS(_, stage->nir, ac_nir_lower_tex,
454 &(ac_nir_lower_tex_options){
455 .gfx_level = gfx_level,
456 .lower_array_layer_round_even = !pdev->info.conformant_trunc_coord || device->disable_trunc_coord,
457 .fix_derivs_in_divergent_cf = fix_derivs_in_divergent_cf,
458 .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
459 });
460 if (fix_derivs_in_divergent_cf)
461 NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
462
463 if (stage->nir->info.uses_resource_info_query)
464 NIR_PASS(_, stage->nir, ac_nir_lower_resinfo, gfx_level);
465
466 NIR_PASS_V(stage->nir, radv_nir_apply_pipeline_layout, device, stage);
467
468 if (!stage->key.optimisations_disabled) {
469 NIR_PASS(_, stage->nir, nir_opt_shrink_vectors, true);
470 }
471
472 NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
473
474 nir_move_options sink_opts = nir_move_const_undef | nir_move_copies;
475
476 if (!stage->key.optimisations_disabled) {
477 NIR_PASS(_, stage->nir, nir_opt_licm);
478 if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
479 sink_opts |= nir_move_load_input;
480
481 NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
482 NIR_PASS(_, stage->nir, nir_opt_move, nir_move_load_input | nir_move_const_undef | nir_move_copies);
483 }
484
485 /* Lower VS inputs. We need to do this after nir_opt_sink, because
486 * load_input can be reordered, but buffer loads can't.
487 */
488 if (stage->stage == MESA_SHADER_VERTEX) {
489 NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
490 }
491
492 /* Lower I/O intrinsics to memory instructions. */
493 bool is_last_vgt_stage = radv_is_last_vgt_stage(stage);
494 bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
495 bool lowered_ngg = stage->info.is_ngg && is_last_vgt_stage;
496 if (lowered_ngg) {
497 radv_lower_ngg(device, stage, gfx_state);
498 } else if (is_last_vgt_stage) {
499 if (stage->stage != MESA_SHADER_GEOMETRY) {
500 NIR_PASS_V(stage->nir, ac_nir_lower_legacy_vs, gfx_level,
501 stage->info.outinfo.clip_dist_mask | stage->info.outinfo.cull_dist_mask,
502 stage->info.outinfo.vs_output_param_offset, stage->info.outinfo.param_exports,
503 stage->info.outinfo.export_prim_id, false, false, false, stage->info.force_vrs_per_vertex);
504
505 } else {
506 bool emulate_ngg_gs_query_pipeline_stat = pdev->emulate_ngg_gs_query_pipeline_stat;
507
508 ac_nir_gs_output_info gs_out_info = {
509 .streams = stage->info.gs.output_streams,
510 .usage_mask = stage->info.gs.output_usage_mask,
511 };
512 NIR_PASS_V(stage->nir, ac_nir_lower_legacy_gs, false, emulate_ngg_gs_query_pipeline_stat, &gs_out_info);
513 }
514 } else if (stage->stage == MESA_SHADER_FRAGMENT) {
515 ac_nir_lower_ps_options options = {
516 .gfx_level = gfx_level,
517 .family = pdev->info.family,
518 .use_aco = !radv_use_llvm_for_stage(pdev, stage->stage),
519 .uses_discard = true,
520 .alpha_func = COMPARE_FUNC_ALWAYS,
521 .no_color_export = stage->info.ps.has_epilog,
522 .no_depth_export = stage->info.ps.exports_mrtz_via_epilog,
523
524 .bc_optimize_for_persp = G_0286CC_PERSP_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
525 G_0286CC_PERSP_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
526 .bc_optimize_for_linear = G_0286CC_LINEAR_CENTER_ENA(stage->info.ps.spi_ps_input_ena) &&
527 G_0286CC_LINEAR_CENTROID_ENA(stage->info.ps.spi_ps_input_ena),
528 };
529
530 if (!options.no_color_export) {
531 options.dual_src_blend_swizzle = gfx_state->ps.epilog.mrt0_is_dual_src && gfx_level >= GFX11;
532 options.color_is_int8 = gfx_state->ps.epilog.color_is_int8;
533 options.color_is_int10 = gfx_state->ps.epilog.color_is_int10;
534 options.enable_mrt_output_nan_fixup =
535 gfx_state->ps.epilog.enable_mrt_output_nan_fixup && !stage->nir->info.internal;
536 /* Need to filter out unwritten color slots. */
537 options.spi_shader_col_format = gfx_state->ps.epilog.spi_shader_col_format & stage->info.ps.colors_written;
538 options.alpha_to_one = gfx_state->ps.epilog.alpha_to_one;
539 }
540
541 if (!options.no_depth_export) {
542 /* Compared to gfx_state.ps.alpha_to_coverage_via_mrtz,
543 * radv_shader_info.ps.writes_mrt0_alpha need any depth/stencil/sample_mask exist.
544 * ac_nir_lower_ps() require this field to reflect whether alpha via mrtz is really
545 * present.
546 */
547 options.alpha_to_coverage_via_mrtz = stage->info.ps.writes_mrt0_alpha;
548 }
549
550 NIR_PASS_V(stage->nir, ac_nir_lower_ps, &options);
551 }
552
553 if (radv_shader_should_clear_lds(device, stage->nir)) {
554 const unsigned chunk_size = 16; /* max single store size */
555 const unsigned shared_size = ALIGN(stage->nir->info.shared_size, chunk_size);
556 NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
557 }
558
559 NIR_PASS(_, stage->nir, nir_lower_int64);
560
561 NIR_PASS(_, stage->nir, nir_opt_idiv_const, 8);
562
563 NIR_PASS(_, stage->nir, nir_lower_idiv,
564 &(nir_lower_idiv_options){
565 .allow_fp16 = gfx_level >= GFX9,
566 });
567
568 if (radv_use_llvm_for_stage(pdev, stage->stage))
569 NIR_PASS_V(stage->nir, nir_lower_io_to_scalar, nir_var_mem_global, NULL, NULL);
570
571 NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
572 NIR_PASS_V(stage->nir, ac_nir_lower_intrinsics_to_args, gfx_level, radv_select_hw_stage(&stage->info, gfx_level),
573 &stage->args.ac);
574 NIR_PASS_V(stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
575 radv_optimize_nir_algebraic(
576 stage->nir, io_to_mem || lowered_ngg || stage->stage == MESA_SHADER_COMPUTE || stage->stage == MESA_SHADER_TASK,
577 gfx_level >= GFX7);
578
579 NIR_PASS(_, stage->nir, nir_lower_fp16_casts, nir_lower_fp16_split_fp64);
580
581 if (stage->nir->info.bit_sizes_int & (8 | 16)) {
582 if (gfx_level >= GFX8) {
583 NIR_PASS(_, stage->nir, nir_convert_to_lcssa, true, true);
584 nir_divergence_analysis(stage->nir);
585 }
586
587 if (nir_lower_bit_size(stage->nir, lower_bit_size_callback, device)) {
588 NIR_PASS(_, stage->nir, nir_opt_constant_folding);
589 }
590
591 if (gfx_level >= GFX8)
592 NIR_PASS(_, stage->nir, nir_opt_remove_phis); /* cleanup LCSSA phis */
593 }
594 if (gfx_level >= GFX9) {
595 bool separate_g16 = gfx_level >= GFX10;
596 struct nir_opt_tex_srcs_options opt_srcs_options[] = {
597 {
598 .sampler_dims = ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)),
599 .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | (1 << nir_tex_src_bias) |
600 (1 << nir_tex_src_min_lod) | (1 << nir_tex_src_ms_index) |
601 (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)),
602 },
603 {
604 .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE),
605 .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy),
606 },
607 };
608 struct nir_opt_16bit_tex_image_options opt_16bit_options = {
609 .rounding_mode = nir_rounding_mode_undef,
610 .opt_tex_dest_types = nir_type_float | nir_type_int | nir_type_uint,
611 .opt_image_dest_types = nir_type_float | nir_type_int | nir_type_uint,
612 .integer_dest_saturates = true,
613 .opt_image_store_data = true,
614 .opt_image_srcs = true,
615 .opt_srcs_options_count = separate_g16 ? 2 : 1,
616 .opt_srcs_options = opt_srcs_options,
617 };
618 NIR_PASS(_, stage->nir, nir_opt_16bit_tex_image, &opt_16bit_options);
619
620 if (!stage->key.optimisations_disabled &&
621 ((stage->nir->info.bit_sizes_int | stage->nir->info.bit_sizes_float) & 16)) {
622 NIR_PASS(_, stage->nir, nir_opt_vectorize, opt_vectorize_callback, device);
623 }
624 }
625
626 /* cleanup passes */
627 NIR_PASS(_, stage->nir, nir_lower_alu_width, opt_vectorize_callback, device);
628
629 /* This pass changes the global float control mode to RTZ, so can't be used
630 * with LLVM, which only supports RTNE, or RT, where the mode needs to match
631 * across separately compiled stages.
632 */
633 if (!radv_use_llvm_for_stage(pdev, stage->stage) && !gl_shader_stage_is_rt(stage->stage))
634 NIR_PASS(_, stage->nir, ac_nir_opt_pack_half, gfx_level);
635
636 NIR_PASS(_, stage->nir, nir_lower_load_const_to_scalar);
637 NIR_PASS(_, stage->nir, nir_copy_prop);
638 NIR_PASS(_, stage->nir, nir_opt_dce);
639
640 if (!stage->key.optimisations_disabled) {
641 sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo | nir_move_alu;
642 NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
643
644 nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
645 nir_move_comparisons | nir_move_copies | nir_move_alu;
646 NIR_PASS(_, stage->nir, nir_opt_move, move_opts);
647
648 /* Run nir_opt_move again to make sure that comparision are as close as possible to the first use to prevent SCC
649 * spilling.
650 */
651 NIR_PASS(_, stage->nir, nir_opt_move, nir_move_comparisons);
652 }
653 }
654
655 bool
radv_shader_should_clear_lds(const struct radv_device * device,const nir_shader * shader)656 radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
657 {
658 const struct radv_physical_device *pdev = radv_device_physical(device);
659 const struct radv_instance *instance = radv_physical_device_instance(pdev);
660
661 return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
662 shader->info.stage == MESA_SHADER_TASK) &&
663 shader->info.shared_size > 0 && instance->drirc.clear_lds;
664 }
665
666 static uint32_t
radv_get_executable_count(struct radv_pipeline * pipeline)667 radv_get_executable_count(struct radv_pipeline *pipeline)
668 {
669 uint32_t ret = 0;
670
671 if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
672 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
673 for (uint32_t i = 0; i < rt_pipeline->stage_count; i++)
674 ret += rt_pipeline->stages[i].shader ? 1 : 0;
675 }
676
677 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
678 if (!pipeline->shaders[i])
679 continue;
680
681 ret += 1u;
682 if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
683 ret += 1u;
684 }
685 }
686
687 return ret;
688 }
689
690 static struct radv_shader *
radv_get_shader_from_executable_index(struct radv_pipeline * pipeline,int index,gl_shader_stage * stage)691 radv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
692 {
693 if (pipeline->type == RADV_PIPELINE_RAY_TRACING) {
694 struct radv_ray_tracing_pipeline *rt_pipeline = radv_pipeline_to_ray_tracing(pipeline);
695 for (uint32_t i = 0; i < rt_pipeline->stage_count; i++) {
696 struct radv_ray_tracing_stage *rt_stage = &rt_pipeline->stages[i];
697 if (!rt_stage->shader)
698 continue;
699
700 if (!index) {
701 *stage = rt_stage->stage;
702 return rt_stage->shader;
703 }
704
705 index--;
706 }
707 }
708
709 for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
710 if (!pipeline->shaders[i])
711 continue;
712 if (!index) {
713 *stage = i;
714 return pipeline->shaders[i];
715 }
716
717 --index;
718
719 if (i == MESA_SHADER_GEOMETRY && pipeline->gs_copy_shader) {
720 if (!index) {
721 *stage = i;
722 return pipeline->gs_copy_shader;
723 }
724 --index;
725 }
726 }
727
728 *stage = -1;
729 return NULL;
730 }
731
732 /* Basically strlcpy (which does not exist on linux) specialized for
733 * descriptions. */
734 static void
desc_copy(char * desc,const char * src)735 desc_copy(char *desc, const char *src)
736 {
737 int len = strlen(src);
738 assert(len < VK_MAX_DESCRIPTION_SIZE);
739 memcpy(desc, src, len);
740 memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
741 }
742
743 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutablePropertiesKHR(VkDevice _device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)744 radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
745 uint32_t *pExecutableCount, VkPipelineExecutablePropertiesKHR *pProperties)
746 {
747 VK_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
748 const uint32_t total_count = radv_get_executable_count(pipeline);
749
750 if (!pProperties) {
751 *pExecutableCount = total_count;
752 return VK_SUCCESS;
753 }
754
755 const uint32_t count = MIN2(total_count, *pExecutableCount);
756 for (uint32_t executable_idx = 0; executable_idx < count; executable_idx++) {
757 gl_shader_stage stage;
758 struct radv_shader *shader = radv_get_shader_from_executable_index(pipeline, executable_idx, &stage);
759
760 pProperties[executable_idx].stages = mesa_to_vk_shader_stage(stage);
761
762 const char *name = _mesa_shader_stage_to_string(stage);
763 const char *description = NULL;
764 switch (stage) {
765 case MESA_SHADER_VERTEX:
766 description = "Vulkan Vertex Shader";
767 break;
768 case MESA_SHADER_TESS_CTRL:
769 if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
770 pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
771 name = "vertex + tessellation control";
772 description = "Combined Vulkan Vertex and Tessellation Control Shaders";
773 } else {
774 description = "Vulkan Tessellation Control Shader";
775 }
776 break;
777 case MESA_SHADER_TESS_EVAL:
778 description = "Vulkan Tessellation Evaluation Shader";
779 break;
780 case MESA_SHADER_GEOMETRY:
781 if (shader->info.type == RADV_SHADER_TYPE_GS_COPY) {
782 name = "geometry copy";
783 description = "Extra shader stage that loads the GS output ringbuffer into the rasterizer";
784 break;
785 }
786
787 if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
788 pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
789 name = "tessellation evaluation + geometry";
790 description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
791 } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) {
792 pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
793 name = "vertex + geometry";
794 description = "Combined Vulkan Vertex and Geometry Shaders";
795 } else {
796 description = "Vulkan Geometry Shader";
797 }
798 break;
799 case MESA_SHADER_FRAGMENT:
800 description = "Vulkan Fragment Shader";
801 break;
802 case MESA_SHADER_COMPUTE:
803 description = "Vulkan Compute Shader";
804 break;
805 case MESA_SHADER_MESH:
806 description = "Vulkan Mesh Shader";
807 break;
808 case MESA_SHADER_TASK:
809 description = "Vulkan Task Shader";
810 break;
811 case MESA_SHADER_RAYGEN:
812 description = "Vulkan Ray Generation Shader";
813 break;
814 case MESA_SHADER_ANY_HIT:
815 description = "Vulkan Any-Hit Shader";
816 break;
817 case MESA_SHADER_CLOSEST_HIT:
818 description = "Vulkan Closest-Hit Shader";
819 break;
820 case MESA_SHADER_MISS:
821 description = "Vulkan Miss Shader";
822 break;
823 case MESA_SHADER_INTERSECTION:
824 description = "Shader responsible for traversing the acceleration structure";
825 break;
826 case MESA_SHADER_CALLABLE:
827 description = "Vulkan Callable Shader";
828 break;
829 default:
830 unreachable("Unsupported shader stage");
831 }
832
833 pProperties[executable_idx].subgroupSize = shader->info.wave_size;
834 desc_copy(pProperties[executable_idx].name, name);
835 desc_copy(pProperties[executable_idx].description, description);
836 }
837
838 VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
839 *pExecutableCount = count;
840 return result;
841 }
842
843 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)844 radv_GetPipelineExecutableStatisticsKHR(VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
845 uint32_t *pStatisticCount, VkPipelineExecutableStatisticKHR *pStatistics)
846 {
847 VK_FROM_HANDLE(radv_device, device, _device);
848 VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
849 gl_shader_stage stage;
850 struct radv_shader *shader =
851 radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
852
853 const struct radv_physical_device *pdev = radv_device_physical(device);
854 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
855
856 unsigned lds_increment =
857 gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT ? 1024 : pdev->info.lds_encode_granularity;
858
859 VkPipelineExecutableStatisticKHR *s = pStatistics;
860 VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
861 VkResult result = VK_SUCCESS;
862
863 if (s < end) {
864 desc_copy(s->name, "Driver pipeline hash");
865 desc_copy(s->description, "Driver pipeline hash used by RGP");
866 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
867 s->value.u64 = pipeline->pipeline_hash;
868 }
869 ++s;
870
871 if (s < end) {
872 desc_copy(s->name, "SGPRs");
873 desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
874 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
875 s->value.u64 = shader->config.num_sgprs;
876 }
877 ++s;
878
879 if (s < end) {
880 desc_copy(s->name, "VGPRs");
881 desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
882 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
883 s->value.u64 = shader->config.num_vgprs;
884 }
885 ++s;
886
887 if (s < end) {
888 desc_copy(s->name, "Spilled SGPRs");
889 desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
890 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
891 s->value.u64 = shader->config.spilled_sgprs;
892 }
893 ++s;
894
895 if (s < end) {
896 desc_copy(s->name, "Spilled VGPRs");
897 desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
898 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
899 s->value.u64 = shader->config.spilled_vgprs;
900 }
901 ++s;
902
903 if (s < end) {
904 desc_copy(s->name, "Code size");
905 desc_copy(s->description, "Code size in bytes");
906 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
907 s->value.u64 = shader->exec_size;
908 }
909 ++s;
910
911 if (s < end) {
912 desc_copy(s->name, "LDS size");
913 desc_copy(s->description, "LDS size in bytes per workgroup");
914 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
915 s->value.u64 = shader->config.lds_size * lds_increment;
916 }
917 ++s;
918
919 if (s < end) {
920 desc_copy(s->name, "Scratch size");
921 desc_copy(s->description, "Private memory in bytes per subgroup");
922 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
923 s->value.u64 = shader->config.scratch_bytes_per_wave;
924 }
925 ++s;
926
927 if (s < end) {
928 desc_copy(s->name, "Subgroups per SIMD");
929 desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
930 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
931 s->value.u64 = shader->max_waves;
932 }
933 ++s;
934
935 if (s < end) {
936 desc_copy(s->name, "Combined inputs");
937 desc_copy(s->description, "Number of input slots reserved for the shader (including merged stages)");
938 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
939 s->value.u64 = 0;
940
941 switch (stage) {
942 case MESA_SHADER_VERTEX:
943 if (gfx_level <= GFX8 || (!shader->info.vs.as_es && !shader->info.vs.as_ls)) {
944 /* VS inputs when VS is a separate stage */
945 s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
946 }
947 break;
948
949 case MESA_SHADER_TESS_CTRL:
950 if (gfx_level >= GFX9) {
951 /* VS inputs when pipeline has tess */
952 s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
953 }
954
955 /* VS -> TCS inputs */
956 s->value.u64 += shader->info.tcs.num_linked_inputs;
957 break;
958
959 case MESA_SHADER_TESS_EVAL:
960 if (gfx_level <= GFX8 || !shader->info.tes.as_es) {
961 /* TCS -> TES inputs when TES is a separate stage */
962 s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
963 }
964 break;
965
966 case MESA_SHADER_GEOMETRY:
967 /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
968 if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
969 break;
970
971 if (gfx_level >= GFX9) {
972 if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
973 /* VS inputs when pipeline has GS but no tess */
974 s->value.u64 += util_bitcount(shader->info.vs.input_slot_usage_mask);
975 } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
976 /* TCS -> TES inputs when pipeline has GS */
977 s->value.u64 += shader->info.tes.num_linked_inputs + shader->info.tes.num_linked_patch_inputs;
978 }
979 }
980
981 /* VS -> GS or TES -> GS inputs */
982 s->value.u64 += shader->info.gs.num_linked_inputs;
983 break;
984
985 case MESA_SHADER_FRAGMENT:
986 s->value.u64 += shader->info.ps.num_interp + shader->info.ps.num_prim_interp;
987 break;
988
989 default:
990 /* Other stages don't have IO or we are not interested in them. */
991 break;
992 }
993 }
994 ++s;
995
996 if (s < end) {
997 desc_copy(s->name, "Combined outputs");
998 desc_copy(s->description, "Number of output slots reserved for the shader (including merged stages)");
999 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1000 s->value.u64 = 0;
1001
1002 switch (stage) {
1003 case MESA_SHADER_VERTEX:
1004 if (!shader->info.vs.as_ls && !shader->info.vs.as_es) {
1005 /* VS -> FS outputs. */
1006 s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1007 shader->info.outinfo.prim_param_exports;
1008 } else if (gfx_level <= GFX8) {
1009 /* VS -> TCS, VS -> GS outputs on GFX6-8 */
1010 s->value.u64 += shader->info.vs.num_linked_outputs;
1011 }
1012 break;
1013
1014 case MESA_SHADER_TESS_CTRL:
1015 if (gfx_level >= GFX9) {
1016 /* VS -> TCS outputs on GFX9+ */
1017 s->value.u64 += shader->info.vs.num_linked_outputs;
1018 }
1019
1020 /* TCS -> TES outputs */
1021 s->value.u64 += shader->info.tcs.num_linked_outputs + shader->info.tcs.num_linked_patch_outputs;
1022 break;
1023
1024 case MESA_SHADER_TESS_EVAL:
1025 if (!shader->info.tes.as_es) {
1026 /* TES -> FS outputs */
1027 s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1028 shader->info.outinfo.prim_param_exports;
1029 } else if (gfx_level <= GFX8) {
1030 /* TES -> GS outputs on GFX6-8 */
1031 s->value.u64 += shader->info.tes.num_linked_outputs;
1032 }
1033 break;
1034
1035 case MESA_SHADER_GEOMETRY:
1036 /* The IO stats of the GS copy shader are already reflected by GS and FS, so leave it empty. */
1037 if (shader->info.type == RADV_SHADER_TYPE_GS_COPY)
1038 break;
1039
1040 if (gfx_level >= GFX9) {
1041 if (shader->info.gs.es_type == MESA_SHADER_VERTEX) {
1042 /* VS -> GS outputs on GFX9+ */
1043 s->value.u64 += shader->info.vs.num_linked_outputs;
1044 } else if (shader->info.gs.es_type == MESA_SHADER_TESS_EVAL) {
1045 /* TES -> GS outputs on GFX9+ */
1046 s->value.u64 += shader->info.tes.num_linked_outputs;
1047 }
1048 }
1049
1050 if (shader->info.is_ngg) {
1051 /* GS -> FS outputs (GFX10+ NGG) */
1052 s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1053 shader->info.outinfo.prim_param_exports;
1054 } else {
1055 /* GS -> FS outputs (GFX6-10.3 legacy) */
1056 s->value.u64 += shader->info.gs.gsvs_vertex_size / 16;
1057 }
1058 break;
1059
1060 case MESA_SHADER_MESH:
1061 /* MS -> FS outputs */
1062 s->value.u64 += shader->info.outinfo.pos_exports + shader->info.outinfo.param_exports +
1063 shader->info.outinfo.prim_param_exports;
1064 break;
1065
1066 case MESA_SHADER_FRAGMENT:
1067 s->value.u64 += shader->info.ps.colors_written + !!shader->info.ps.writes_z +
1068 !!shader->info.ps.writes_stencil + !!shader->info.ps.writes_sample_mask +
1069 !!shader->info.ps.writes_mrt0_alpha;
1070 break;
1071
1072 default:
1073 /* Other stages don't have IO or we are not interested in them. */
1074 break;
1075 }
1076 }
1077 ++s;
1078
1079 if (shader->statistics) {
1080 for (unsigned i = 0; i < aco_num_statistics; i++) {
1081 const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
1082 if (s < end) {
1083 desc_copy(s->name, info->name);
1084 desc_copy(s->description, info->desc);
1085 s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
1086 s->value.u64 = shader->statistics[i];
1087 }
1088 ++s;
1089 }
1090 }
1091
1092 if (!pStatistics)
1093 *pStatisticCount = s - pStatistics;
1094 else if (s > end) {
1095 *pStatisticCount = end - pStatistics;
1096 result = VK_INCOMPLETE;
1097 } else {
1098 *pStatisticCount = s - pStatistics;
1099 }
1100
1101 return result;
1102 }
1103
1104 static VkResult
radv_copy_representation(void * data,size_t * data_size,const char * src)1105 radv_copy_representation(void *data, size_t *data_size, const char *src)
1106 {
1107 size_t total_size = strlen(src) + 1;
1108
1109 if (!data) {
1110 *data_size = total_size;
1111 return VK_SUCCESS;
1112 }
1113
1114 size_t size = MIN2(total_size, *data_size);
1115
1116 memcpy(data, src, size);
1117 if (size)
1118 *((char *)data + size - 1) = 0;
1119 return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
1120 }
1121
1122 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice _device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)1123 radv_GetPipelineExecutableInternalRepresentationsKHR(
1124 VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, uint32_t *pInternalRepresentationCount,
1125 VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
1126 {
1127 VK_FROM_HANDLE(radv_device, device, _device);
1128 VK_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
1129 const struct radv_physical_device *pdev = radv_device_physical(device);
1130 gl_shader_stage stage;
1131 struct radv_shader *shader =
1132 radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
1133
1134 VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
1135 VkPipelineExecutableInternalRepresentationKHR *end =
1136 p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
1137 VkResult result = VK_SUCCESS;
1138 /* optimized NIR */
1139 if (p < end) {
1140 p->isText = true;
1141 desc_copy(p->name, "NIR Shader(s)");
1142 desc_copy(p->description, "The optimized NIR shader(s)");
1143 if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
1144 result = VK_INCOMPLETE;
1145 }
1146 ++p;
1147
1148 /* backend IR */
1149 if (p < end) {
1150 p->isText = true;
1151 if (radv_use_llvm_for_stage(pdev, stage)) {
1152 desc_copy(p->name, "LLVM IR");
1153 desc_copy(p->description, "The LLVM IR after some optimizations");
1154 } else {
1155 desc_copy(p->name, "ACO IR");
1156 desc_copy(p->description, "The ACO IR after some optimizations");
1157 }
1158 if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
1159 result = VK_INCOMPLETE;
1160 }
1161 ++p;
1162
1163 /* Disassembler */
1164 if (p < end && shader->disasm_string) {
1165 p->isText = true;
1166 desc_copy(p->name, "Assembly");
1167 desc_copy(p->description, "Final Assembly");
1168 if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
1169 result = VK_INCOMPLETE;
1170 }
1171 ++p;
1172
1173 if (!pInternalRepresentations)
1174 *pInternalRepresentationCount = p - pInternalRepresentations;
1175 else if (p > end) {
1176 result = VK_INCOMPLETE;
1177 *pInternalRepresentationCount = end - pInternalRepresentations;
1178 } else {
1179 *pInternalRepresentationCount = p - pInternalRepresentations;
1180 }
1181
1182 return result;
1183 }
1184
1185 static void
vk_shader_module_finish(void * _module)1186 vk_shader_module_finish(void *_module)
1187 {
1188 struct vk_shader_module *module = _module;
1189 vk_object_base_finish(&module->base);
1190 }
1191
1192 VkPipelineShaderStageCreateInfo *
radv_copy_shader_stage_create_info(struct radv_device * device,uint32_t stageCount,const VkPipelineShaderStageCreateInfo * pStages,void * mem_ctx)1193 radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
1194 const VkPipelineShaderStageCreateInfo *pStages, void *mem_ctx)
1195 {
1196 VkPipelineShaderStageCreateInfo *new_stages;
1197
1198 size_t size = sizeof(VkPipelineShaderStageCreateInfo) * stageCount;
1199 new_stages = ralloc_size(mem_ctx, size);
1200 if (!new_stages)
1201 return NULL;
1202
1203 if (size)
1204 memcpy(new_stages, pStages, size);
1205
1206 for (uint32_t i = 0; i < stageCount; i++) {
1207 VK_FROM_HANDLE(vk_shader_module, module, new_stages[i].module);
1208
1209 const VkShaderModuleCreateInfo *minfo = vk_find_struct_const(pStages[i].pNext, SHADER_MODULE_CREATE_INFO);
1210
1211 if (module) {
1212 struct vk_shader_module *new_module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + module->size);
1213 if (!new_module)
1214 return NULL;
1215
1216 ralloc_set_destructor(new_module, vk_shader_module_finish);
1217 vk_object_base_init(&device->vk, &new_module->base, VK_OBJECT_TYPE_SHADER_MODULE);
1218
1219 new_module->nir = NULL;
1220 memcpy(new_module->hash, module->hash, sizeof(module->hash));
1221 new_module->size = module->size;
1222 memcpy(new_module->data, module->data, module->size);
1223
1224 module = new_module;
1225 } else if (minfo) {
1226 module = ralloc_size(mem_ctx, sizeof(struct vk_shader_module) + minfo->codeSize);
1227 if (!module)
1228 return NULL;
1229
1230 vk_shader_module_init(&device->vk, module, minfo);
1231 }
1232
1233 if (module) {
1234 const VkSpecializationInfo *spec = new_stages[i].pSpecializationInfo;
1235 if (spec) {
1236 VkSpecializationInfo *new_spec = ralloc(mem_ctx, VkSpecializationInfo);
1237 if (!new_spec)
1238 return NULL;
1239
1240 new_spec->mapEntryCount = spec->mapEntryCount;
1241 uint32_t map_entries_size = sizeof(VkSpecializationMapEntry) * spec->mapEntryCount;
1242 new_spec->pMapEntries = ralloc_size(mem_ctx, map_entries_size);
1243 if (!new_spec->pMapEntries)
1244 return NULL;
1245 memcpy((void *)new_spec->pMapEntries, spec->pMapEntries, map_entries_size);
1246
1247 new_spec->dataSize = spec->dataSize;
1248 new_spec->pData = ralloc_size(mem_ctx, spec->dataSize);
1249 if (!new_spec->pData)
1250 return NULL;
1251 memcpy((void *)new_spec->pData, spec->pData, spec->dataSize);
1252
1253 new_stages[i].pSpecializationInfo = new_spec;
1254 }
1255
1256 new_stages[i].module = vk_shader_module_to_handle(module);
1257 new_stages[i].pName = ralloc_strdup(mem_ctx, new_stages[i].pName);
1258 if (!new_stages[i].pName)
1259 return NULL;
1260 new_stages[i].pNext = NULL;
1261 }
1262 }
1263
1264 return new_stages;
1265 }
1266
1267 void
radv_pipeline_hash(const struct radv_device * device,const struct radv_pipeline_layout * pipeline_layout,struct mesa_sha1 * ctx)1268 radv_pipeline_hash(const struct radv_device *device, const struct radv_pipeline_layout *pipeline_layout,
1269 struct mesa_sha1 *ctx)
1270 {
1271 _mesa_sha1_update(ctx, device->cache_hash, sizeof(device->cache_hash));
1272 if (pipeline_layout)
1273 _mesa_sha1_update(ctx, pipeline_layout->hash, sizeof(pipeline_layout->hash));
1274 }
1275
1276 void
radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2KHR pipeline_flags,const VkPipelineShaderStageCreateInfo * sinfo,const struct radv_shader_stage_key * stage_key,struct mesa_sha1 * ctx)1277 radv_pipeline_hash_shader_stage(VkPipelineCreateFlags2KHR pipeline_flags,
1278 const VkPipelineShaderStageCreateInfo *sinfo,
1279 const struct radv_shader_stage_key *stage_key, struct mesa_sha1 *ctx)
1280 {
1281 unsigned char shader_sha1[SHA1_DIGEST_LENGTH];
1282
1283 vk_pipeline_hash_shader_stage(pipeline_flags, sinfo, NULL, shader_sha1);
1284
1285 _mesa_sha1_update(ctx, shader_sha1, sizeof(shader_sha1));
1286 _mesa_sha1_update(ctx, stage_key, sizeof(*stage_key));
1287 }
1288