1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include <fcntl.h>
12
13 #ifdef MAJOR_IN_SYSMACROS
14 #include <sys/sysmacros.h>
15 #endif
16
17 #include "vk_log.h"
18 #include "vk_shader_module.h"
19
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/u_debug.h"
23 #include "radv_android.h"
24 #include "radv_debug.h"
25 #include "radv_entrypoints.h"
26 #include "radv_instance.h"
27 #include "radv_physical_device.h"
28 #include "radv_pipeline_rt.h"
29 #include "radv_video.h"
30 #include "radv_wsi.h"
31
32 #ifdef _WIN32
33 typedef void *drmDevicePtr;
34 #include <io.h>
35 #else
36 #include <amdgpu.h>
37 #include <xf86drm.h>
38 #include "drm-uapi/amdgpu_drm.h"
39 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
40 #endif
41 #include "winsys/null/radv_null_winsys_public.h"
42 #include "git_sha1.h"
43
44 #if AMD_LLVM_AVAILABLE
45 #include "ac_llvm_util.h"
46 #endif
47
48 #ifdef _WIN32
49 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
50 #else
51 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
52 #endif
53
54 static bool
radv_perf_query_supported(const struct radv_physical_device * pdev)55 radv_perf_query_supported(const struct radv_physical_device *pdev)
56 {
57 const struct radv_instance *instance = radv_physical_device_instance(pdev);
58
59 /* SQTT / SPM interfere with the register states for perf counters, and
60 * the code has only been tested on GFX10.3 */
61 return pdev->info.gfx_level == GFX10_3 && !(instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
62 }
63
64 static bool
radv_taskmesh_enabled(const struct radv_physical_device * pdev)65 radv_taskmesh_enabled(const struct radv_physical_device *pdev)
66 {
67 const struct radv_instance *instance = radv_physical_device_instance(pdev);
68
69 if (instance->debug_flags & RADV_DEBUG_NO_MESH_SHADER)
70 return false;
71
72 return pdev->use_ngg && !pdev->use_llvm && pdev->info.gfx_level >= GFX10_3 &&
73 !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdev->info.has_gang_submit;
74 }
75
76 static bool
radv_transfer_queue_enabled(const struct radv_physical_device * pdev)77 radv_transfer_queue_enabled(const struct radv_physical_device *pdev)
78 {
79 const struct radv_instance *instance = radv_physical_device_instance(pdev);
80
81 /* Check if the GPU has SDMA support and transfer queues are allowed. */
82 if (pdev->info.sdma_ip_version == SDMA_UNKNOWN || !pdev->info.ip[AMD_IP_SDMA].num_queues ||
83 !(instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
84 return false;
85
86 return pdev->info.gfx_level >= GFX9;
87 }
88
89 static bool
radv_vrs_attachment_enabled(const struct radv_physical_device * pdev)90 radv_vrs_attachment_enabled(const struct radv_physical_device *pdev)
91 {
92 const struct radv_instance *instance = radv_physical_device_instance(pdev);
93 return pdev->info.gfx_level >= GFX11 || !(instance->debug_flags & RADV_DEBUG_NO_HIZ);
94 }
95
96 static bool
radv_calibrated_timestamps_enabled(const struct radv_physical_device * pdev)97 radv_calibrated_timestamps_enabled(const struct radv_physical_device *pdev)
98 {
99 return RADV_SUPPORT_CALIBRATED_TIMESTAMPS && !(pdev->info.family == CHIP_RAVEN || pdev->info.family == CHIP_RAVEN2);
100 }
101
102 static bool
radv_filter_minmax_enabled(const struct radv_physical_device * pdev)103 radv_filter_minmax_enabled(const struct radv_physical_device *pdev)
104 {
105 /* Tahiti and Verde only: reduction mode is unsupported due to a bug
106 * (it might work sometimes, but that's not enough)
107 */
108 return !(pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_VERDE);
109 }
110
111 bool
radv_enable_rt(const struct radv_physical_device * pdev,bool rt_pipelines)112 radv_enable_rt(const struct radv_physical_device *pdev, bool rt_pipelines)
113 {
114 if (pdev->info.gfx_level < GFX10_3 && !radv_emulate_rt(pdev))
115 return false;
116
117 if (rt_pipelines && pdev->use_llvm)
118 return false;
119
120 return true;
121 }
122
123 bool
radv_emulate_rt(const struct radv_physical_device * pdev)124 radv_emulate_rt(const struct radv_physical_device *pdev)
125 {
126 const struct radv_instance *instance = radv_physical_device_instance(pdev);
127 return instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
128 }
129
130 static bool
radv_is_conformant(const struct radv_physical_device * pdev)131 radv_is_conformant(const struct radv_physical_device *pdev)
132 {
133 return pdev->info.gfx_level >= GFX8 && pdev->info.gfx_level <= GFX10_3;
134 }
135
136 static void
parse_hex(char * out,const char * in,unsigned length)137 parse_hex(char *out, const char *in, unsigned length)
138 {
139 for (unsigned i = 0; i < length; ++i)
140 out[i] = 0;
141
142 for (unsigned i = 0; i < 2 * length; ++i) {
143 unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
144 out[i / 2] |= v << (4 * (1 - i % 2));
145 }
146 }
147
148 static void
radv_physical_device_init_cache_key(struct radv_physical_device * pdev)149 radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
150 {
151 const struct radv_instance *instance = radv_physical_device_instance(pdev);
152 struct radv_physical_device_cache_key *key = &pdev->cache_key;
153
154 key->family = pdev->info.family;
155 key->ptr_size = sizeof(void *);
156 key->conformant_trunc_coord = pdev->info.conformant_trunc_coord;
157
158 key->clear_lds = instance->drirc.clear_lds;
159 key->cs_wave32 = pdev->cs_wave_size == 32;
160 key->disable_aniso_single_level = instance->drirc.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
161 key->disable_shrink_image_store = instance->drirc.disable_shrink_image_store;
162 key->disable_sinking_load_input_fs = instance->drirc.disable_sinking_load_input_fs;
163 key->dual_color_blend_by_location = instance->drirc.dual_color_blend_by_location;
164 key->emulate_rt = !!(instance->perftest_flags & RADV_PERFTEST_EMULATE_RT);
165 key->ge_wave32 = pdev->ge_wave_size == 32;
166 key->invariant_geom = !!(instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
167 key->lower_discard_to_demote = !!(instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
168 key->no_fmask = !!(instance->debug_flags & RADV_DEBUG_NO_FMASK);
169 key->no_ngg_gs = !!(instance->debug_flags & RADV_DEBUG_NO_NGG_GS);
170 key->no_rt = !!(instance->debug_flags & RADV_DEBUG_NO_RT);
171 key->ps_wave32 = pdev->ps_wave_size == 32;
172 key->rt_wave64 = pdev->rt_wave_size == 64;
173 key->split_fma = !!(instance->debug_flags & RADV_DEBUG_SPLIT_FMA);
174 key->ssbo_non_uniform = instance->drirc.ssbo_non_uniform;
175 key->tex_non_uniform = instance->drirc.tex_non_uniform;
176 key->use_llvm = pdev->use_llvm;
177 key->use_ngg = pdev->use_ngg;
178 key->use_ngg_culling = pdev->use_ngg_culling;
179 }
180
181 static int
radv_device_get_cache_uuid(struct radv_physical_device * pdev,void * uuid)182 radv_device_get_cache_uuid(struct radv_physical_device *pdev, void *uuid)
183 {
184 struct mesa_sha1 ctx;
185 unsigned char sha1[20];
186
187 memset(uuid, 0, VK_UUID_SIZE);
188 _mesa_sha1_init(&ctx);
189
190 #ifdef RADV_BUILD_ID_OVERRIDE
191 {
192 unsigned size = strlen(RADV_BUILD_ID_OVERRIDE) / 2;
193 char *data = alloca(size);
194 parse_hex(data, RADV_BUILD_ID_OVERRIDE, size);
195 _mesa_sha1_update(&ctx, data, size);
196 }
197 #else
198 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx))
199 return -1;
200 #endif
201
202 #if AMD_LLVM_AVAILABLE
203 if (pdev->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
204 return -1;
205 #endif
206
207 _mesa_sha1_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key));
208 _mesa_sha1_final(&ctx, sha1);
209
210 memcpy(uuid, sha1, VK_UUID_SIZE);
211 return 0;
212 }
213
214 static void
radv_get_driver_uuid(void * uuid)215 radv_get_driver_uuid(void *uuid)
216 {
217 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
218 }
219
220 static void
radv_get_device_uuid(const struct radeon_info * gpu_info,void * uuid)221 radv_get_device_uuid(const struct radeon_info *gpu_info, void *uuid)
222 {
223 ac_compute_device_uuid(gpu_info, uuid, VK_UUID_SIZE);
224 }
225
226 static void
radv_physical_device_init_queue_table(struct radv_physical_device * pdev)227 radv_physical_device_init_queue_table(struct radv_physical_device *pdev)
228 {
229 const struct radv_instance *instance = radv_physical_device_instance(pdev);
230 int idx = 0;
231 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
232 idx++;
233
234 for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
235 pdev->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
236
237 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
238 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
239 idx++;
240 }
241
242 if (pdev->video_decode_enabled) {
243 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
244 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_DEC;
245 idx++;
246 }
247 }
248
249 if (radv_transfer_queue_enabled(pdev)) {
250 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
251 idx++;
252 }
253
254 if (pdev->video_encode_enabled) {
255 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
256 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_ENC;
257 idx++;
258 }
259 }
260
261 if (radv_sparse_queue_enabled(pdev)) {
262 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_SPARSE;
263 idx++;
264 }
265
266 pdev->num_queues = idx;
267 }
268
269 enum radv_heap {
270 RADV_HEAP_VRAM = 1 << 0,
271 RADV_HEAP_GTT = 1 << 1,
272 RADV_HEAP_VRAM_VIS = 1 << 2,
273 RADV_HEAP_MAX = 1 << 3,
274 };
275
276 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * pdev)277 radv_get_adjusted_vram_size(struct radv_physical_device *pdev)
278 {
279 const struct radv_instance *instance = radv_physical_device_instance(pdev);
280 int ov = instance->drirc.override_vram_size;
281 if (ov >= 0)
282 return MIN2((uint64_t)pdev->info.vram_size_kb * 1024, (uint64_t)ov << 20);
283 return (uint64_t)pdev->info.vram_size_kb * 1024;
284 }
285
286 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * pdev)287 radv_get_visible_vram_size(struct radv_physical_device *pdev)
288 {
289 return MIN2(radv_get_adjusted_vram_size(pdev), (uint64_t)pdev->info.vram_vis_size_kb * 1024);
290 }
291
292 static uint64_t
radv_get_vram_size(struct radv_physical_device * pdev)293 radv_get_vram_size(struct radv_physical_device *pdev)
294 {
295 uint64_t total_size = radv_get_adjusted_vram_size(pdev);
296 return total_size - MIN2(total_size, (uint64_t)pdev->info.vram_vis_size_kb * 1024);
297 }
298
299 static void
radv_physical_device_init_mem_types(struct radv_physical_device * pdev)300 radv_physical_device_init_mem_types(struct radv_physical_device *pdev)
301 {
302 const struct radv_instance *instance = radv_physical_device_instance(pdev);
303 uint64_t visible_vram_size = radv_get_visible_vram_size(pdev);
304 uint64_t vram_size = radv_get_vram_size(pdev);
305 uint64_t gtt_size = (uint64_t)pdev->info.gart_size_kb * 1024;
306 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
307
308 pdev->memory_properties.memoryHeapCount = 0;
309 pdev->heaps = 0;
310
311 if (!pdev->info.has_dedicated_vram) {
312 const uint64_t total_size = gtt_size + visible_vram_size;
313
314 if (instance->drirc.enable_unified_heap_on_apu) {
315 /* Some applications seem better when the driver exposes only one heap of VRAM on APUs. */
316 visible_vram_size = total_size;
317 gtt_size = 0;
318 } else {
319 /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
320 * greater than it. To workaround this, we compute the total available memory size (GTT +
321 * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
322 */
323 visible_vram_size = align64((total_size * 2) / 3, pdev->info.gart_page_size);
324 gtt_size = total_size - visible_vram_size;
325 }
326
327 vram_size = 0;
328 }
329
330 /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
331 * remainder above visible VRAM. */
332 if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
333 vram_index = pdev->memory_properties.memoryHeapCount++;
334 pdev->heaps |= RADV_HEAP_VRAM;
335 pdev->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
336 .size = vram_size,
337 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
338 };
339 }
340
341 if (gtt_size > 0) {
342 gart_index = pdev->memory_properties.memoryHeapCount++;
343 pdev->heaps |= RADV_HEAP_GTT;
344 pdev->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
345 .size = gtt_size,
346 .flags = 0,
347 };
348 }
349
350 if (visible_vram_size) {
351 visible_vram_index = pdev->memory_properties.memoryHeapCount++;
352 pdev->heaps |= RADV_HEAP_VRAM_VIS;
353 pdev->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
354 .size = visible_vram_size,
355 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
356 };
357 }
358
359 unsigned type_count = 0;
360
361 if (vram_index >= 0 || visible_vram_index >= 0) {
362 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
363 pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
364 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
365 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
366 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
367 };
368
369 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
370 pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT;
371 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
372 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
373 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
374 };
375 }
376
377 if (gart_index >= 0) {
378 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
379 pdev->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
380 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
381 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
382 .heapIndex = gart_index,
383 };
384 }
385 if (visible_vram_index >= 0) {
386 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
387 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
388 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
389 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
390 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
391 .heapIndex = visible_vram_index,
392 };
393
394 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
395 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
396 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
397 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
398 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
399 .heapIndex = visible_vram_index,
400 };
401 }
402
403 if (gart_index >= 0) {
404 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
405 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
406 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
407 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
408 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
409 .heapIndex = gart_index,
410 };
411
412 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
413 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
414 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
415 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
416 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
417 .heapIndex = gart_index,
418 };
419 }
420 pdev->memory_properties.memoryTypeCount = type_count;
421
422 if (pdev->info.has_l2_uncached) {
423 for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
424 VkMemoryType mem_type = pdev->memory_properties.memoryTypes[i];
425
426 if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
427 mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
428 !(pdev->memory_flags[i] & RADEON_FLAG_32BIT)) {
429
430 VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
431 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
432
433 pdev->memory_domains[type_count] = pdev->memory_domains[i];
434 pdev->memory_flags[type_count] = pdev->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
435 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
436 .propertyFlags = property_flags,
437 .heapIndex = mem_type.heapIndex,
438 };
439 }
440 }
441 pdev->memory_properties.memoryTypeCount = type_count;
442 }
443
444 for (unsigned i = 0; i < type_count; ++i) {
445 if (pdev->memory_flags[i] & RADEON_FLAG_32BIT)
446 pdev->memory_types_32bit |= BITFIELD_BIT(i);
447 }
448 }
449
450 uint32_t
radv_find_memory_index(const struct radv_physical_device * pdev,VkMemoryPropertyFlags flags)451 radv_find_memory_index(const struct radv_physical_device *pdev, VkMemoryPropertyFlags flags)
452 {
453 const VkPhysicalDeviceMemoryProperties *mem_properties = &pdev->memory_properties;
454 for (uint32_t i = 0; i < mem_properties->memoryTypeCount; ++i) {
455 if (mem_properties->memoryTypes[i].propertyFlags == flags) {
456 return i;
457 }
458 }
459 unreachable("invalid memory properties");
460 }
461
462 static void
radv_get_binning_settings(const struct radv_physical_device * pdev,struct radv_binning_settings * settings)463 radv_get_binning_settings(const struct radv_physical_device *pdev, struct radv_binning_settings *settings)
464 {
465 if ((pdev->info.has_dedicated_vram && pdev->info.max_render_backends > 4) || pdev->info.gfx_level >= GFX10) {
466 /* Using higher settings on GFX10+ can cause random GPU hangs. */
467 settings->context_states_per_bin = 1;
468 settings->persistent_states_per_bin = 1;
469 } else {
470 settings->context_states_per_bin = pdev->info.has_gfx9_scissor_bug ? 1 : 3;
471 settings->persistent_states_per_bin = 1;
472 }
473
474 settings->fpovs_per_batch = 63;
475 }
476
477 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * pdev,struct vk_device_extension_table * out_ext)478 radv_physical_device_get_supported_extensions(const struct radv_physical_device *pdev,
479 struct vk_device_extension_table *out_ext)
480 {
481 const struct radv_instance *instance = radv_physical_device_instance(pdev);
482 const struct vk_device_extension_table ext = {
483 .KHR_8bit_storage = true,
484 .KHR_16bit_storage = true,
485 .KHR_acceleration_structure = radv_enable_rt(pdev, false),
486 .KHR_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
487 .KHR_compute_shader_derivatives = true,
488 .KHR_cooperative_matrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
489 .KHR_bind_memory2 = true,
490 .KHR_buffer_device_address = true,
491 .KHR_copy_commands2 = true,
492 .KHR_create_renderpass2 = true,
493 .KHR_dedicated_allocation = true,
494 .KHR_deferred_host_operations = true,
495 .KHR_depth_stencil_resolve = true,
496 .KHR_descriptor_update_template = true,
497 .KHR_device_group = true,
498 .KHR_draw_indirect_count = true,
499 .KHR_driver_properties = true,
500 .KHR_dynamic_rendering = true,
501 .KHR_dynamic_rendering_local_read = true,
502 .KHR_external_fence = true,
503 .KHR_external_fence_fd = true,
504 .KHR_external_memory = true,
505 .KHR_external_memory_fd = true,
506 .KHR_external_semaphore = true,
507 .KHR_external_semaphore_fd = true,
508 .KHR_format_feature_flags2 = true,
509 .KHR_fragment_shader_barycentric = pdev->info.gfx_level >= GFX10_3,
510 .KHR_fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
511 .KHR_get_memory_requirements2 = true,
512 .KHR_global_priority = true,
513 .KHR_image_format_list = true,
514 .KHR_imageless_framebuffer = true,
515 #ifdef RADV_USE_WSI_PLATFORM
516 .KHR_incremental_present = true,
517 #endif
518 .KHR_index_type_uint8 = pdev->info.gfx_level >= GFX8,
519 .KHR_line_rasterization = true,
520 .KHR_load_store_op_none = true,
521 .KHR_maintenance1 = true,
522 .KHR_maintenance2 = true,
523 .KHR_maintenance3 = true,
524 .KHR_maintenance4 = true,
525 .KHR_maintenance5 = true,
526 .KHR_maintenance6 = true,
527 .KHR_maintenance7 = true,
528 .KHR_map_memory2 = true,
529 .KHR_multiview = true,
530 .KHR_performance_query = radv_perf_query_supported(pdev),
531 .KHR_pipeline_binary = true,
532 .KHR_pipeline_executable_properties = true,
533 .KHR_pipeline_library = !pdev->use_llvm,
534 /* Hide these behind dri configs for now since we cannot implement it reliably on
535 * all surfaces yet. There is no surface capability query for present wait/id,
536 * but the feature is useful enough to hide behind an opt-in mechanism for now.
537 * If the instance only enables surface extensions that unconditionally support present wait,
538 * we can also expose the extension that way. */
539 .KHR_present_id =
540 instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
541 .KHR_present_wait =
542 instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
543 .KHR_push_descriptor = true,
544 .KHR_ray_query = radv_enable_rt(pdev, false),
545 .KHR_ray_tracing_maintenance1 = radv_enable_rt(pdev, false),
546 .KHR_ray_tracing_pipeline = radv_enable_rt(pdev, true),
547 .KHR_ray_tracing_position_fetch = radv_enable_rt(pdev, false),
548 .KHR_relaxed_block_layout = true,
549 .KHR_sampler_mirror_clamp_to_edge = true,
550 .KHR_sampler_ycbcr_conversion = true,
551 .KHR_separate_depth_stencil_layouts = true,
552 .KHR_shader_atomic_int64 = true,
553 .KHR_shader_clock = true,
554 .KHR_shader_draw_parameters = true,
555 .KHR_shader_expect_assume = true,
556 .KHR_shader_float16_int8 = true,
557 .KHR_shader_float_controls = true,
558 .KHR_shader_float_controls2 = true,
559 .KHR_shader_integer_dot_product = true,
560 .KHR_shader_maximal_reconvergence = true,
561 .KHR_shader_non_semantic_info = true,
562 .KHR_shader_quad_control = true,
563 .KHR_shader_relaxed_extended_instruction = true,
564 .KHR_shader_subgroup_extended_types = true,
565 .KHR_shader_subgroup_rotate = true,
566 .KHR_shader_subgroup_uniform_control_flow = true,
567 .KHR_shader_terminate_invocation = true,
568 .KHR_spirv_1_4 = true,
569 .KHR_storage_buffer_storage_class = true,
570 #ifdef RADV_USE_WSI_PLATFORM
571 .KHR_swapchain = true,
572 .KHR_swapchain_mutable_format = true,
573 #endif
574 .KHR_synchronization2 = true,
575 .KHR_timeline_semaphore = true,
576 .KHR_uniform_buffer_standard_layout = true,
577 .KHR_variable_pointers = true,
578 .KHR_vertex_attribute_divisor = true,
579 .KHR_video_maintenance1 = true,
580 .KHR_video_queue = pdev->video_decode_enabled || pdev->video_encode_enabled,
581 .KHR_video_decode_av1 = (pdev->info.vcn_ip_version >= VCN_3_0_0 && pdev->info.vcn_ip_version != VCN_3_0_33 &&
582 VIDEO_CODEC_AV1DEC && pdev->video_decode_enabled),
583 .KHR_video_decode_queue = pdev->video_decode_enabled,
584 .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && pdev->video_decode_enabled,
585 .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && pdev->video_decode_enabled,
586 .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && pdev->video_encode_enabled,
587 .KHR_video_encode_h265 = VIDEO_CODEC_H265ENC && pdev->video_encode_enabled,
588 .KHR_video_encode_queue = pdev->video_encode_enabled,
589 .KHR_vulkan_memory_model = true,
590 .KHR_workgroup_memory_explicit_layout = true,
591 .KHR_zero_initialize_workgroup_memory = true,
592 .EXT_4444_formats = true,
593 .EXT_attachment_feedback_loop_dynamic_state = true,
594 .EXT_attachment_feedback_loop_layout = true,
595 .EXT_border_color_swizzle = pdev->info.gfx_level >= GFX10,
596 .EXT_buffer_device_address = true,
597 .EXT_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
598 .EXT_color_write_enable = true,
599 .EXT_conditional_rendering = true,
600 .EXT_conservative_rasterization = pdev->info.gfx_level >= GFX9,
601 .EXT_custom_border_color = true,
602 .EXT_debug_marker = instance->vk.trace_mode & RADV_TRACE_MODE_RGP,
603 .EXT_depth_bias_control = true,
604 .EXT_depth_clamp_zero_one = true,
605 .EXT_depth_clip_control = true,
606 .EXT_depth_clip_enable = true,
607 .EXT_depth_range_unrestricted = true,
608 .EXT_descriptor_buffer = true,
609 .EXT_descriptor_indexing = true,
610 .EXT_device_address_binding_report = true,
611 .EXT_device_fault = pdev->info.has_gpuvm_fault_query,
612 .EXT_discard_rectangles = true,
613 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
614 .EXT_display_control = true,
615 #endif
616 .EXT_dynamic_rendering_unused_attachments = true,
617 .EXT_extended_dynamic_state = true,
618 .EXT_extended_dynamic_state2 = true,
619 .EXT_extended_dynamic_state3 = true,
620 .EXT_external_memory_acquire_unmodified = true,
621 .EXT_external_memory_dma_buf = true,
622 .EXT_external_memory_host = pdev->info.has_userptr,
623 .EXT_fragment_shader_interlock = radv_has_pops(pdev),
624 .EXT_global_priority = true,
625 .EXT_global_priority_query = true,
626 .EXT_graphics_pipeline_library = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_GPL),
627 .EXT_host_query_reset = true,
628 .EXT_image_2d_view_of_3d = true,
629 .EXT_image_compression_control = true,
630 .EXT_image_drm_format_modifier = pdev->info.gfx_level >= GFX9,
631 .EXT_image_robustness = true,
632 .EXT_image_sliced_view_of_3d = pdev->info.gfx_level >= GFX10,
633 .EXT_image_view_min_lod = true,
634 .EXT_index_type_uint8 = pdev->info.gfx_level >= GFX8,
635 .EXT_inline_uniform_block = true,
636 .EXT_legacy_vertex_attributes = !pdev->use_llvm,
637 .EXT_line_rasterization = true,
638 .EXT_load_store_op_none = true,
639 .EXT_map_memory_placed = true,
640 .EXT_memory_budget = true,
641 .EXT_memory_priority = true,
642 .EXT_mesh_shader = radv_taskmesh_enabled(pdev),
643 .EXT_multi_draw = true,
644 .EXT_mutable_descriptor_type = true, /* Trivial promotion from VALVE. */
645 .EXT_nested_command_buffer = true,
646 .EXT_non_seamless_cube_map = true,
647 .EXT_pci_bus_info = true,
648 #ifndef _WIN32
649 .EXT_physical_device_drm = true,
650 #endif
651 .EXT_pipeline_creation_cache_control = true,
652 .EXT_pipeline_creation_feedback = true,
653 .EXT_pipeline_library_group_handles = radv_enable_rt(pdev, true),
654 .EXT_pipeline_robustness = !pdev->use_llvm,
655 .EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
656 .EXT_primitive_topology_list_restart = true,
657 .EXT_primitives_generated_query = true,
658 .EXT_private_data = true,
659 .EXT_provoking_vertex = true,
660 .EXT_queue_family_foreign = true,
661 .EXT_robustness2 = true,
662 .EXT_sample_locations = pdev->info.gfx_level < GFX10,
663 .EXT_sampler_filter_minmax = radv_filter_minmax_enabled(pdev),
664 .EXT_scalar_block_layout = pdev->info.gfx_level >= GFX7,
665 .EXT_separate_stencil_usage = true,
666 .EXT_shader_atomic_float = true,
667 .EXT_shader_atomic_float2 = true,
668 .EXT_shader_demote_to_helper_invocation = true,
669 .EXT_shader_image_atomic_int64 = true,
670 .EXT_shader_module_identifier = true,
671 .EXT_shader_object = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_ESO),
672 .EXT_shader_replicated_composites = true,
673 .EXT_shader_stencil_export = true,
674 .EXT_shader_subgroup_ballot = true,
675 .EXT_shader_subgroup_vote = true,
676 .EXT_shader_viewport_index_layer = true,
677 .EXT_subgroup_size_control = true,
678 #ifdef RADV_USE_WSI_PLATFORM
679 .EXT_swapchain_maintenance1 = true,
680 #endif
681 .EXT_texel_buffer_alignment = true,
682 .EXT_tooling_info = true,
683 .EXT_transform_feedback = true,
684 .EXT_vertex_attribute_divisor = true,
685 .EXT_vertex_input_dynamic_state = !pdev->use_llvm,
686 .EXT_ycbcr_image_arrays = true,
687 .AMD_buffer_marker = true,
688 .AMD_device_coherent_memory = true,
689 .AMD_draw_indirect_count = true,
690 .AMD_gcn_shader = true,
691 .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
692 .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
693 .AMD_memory_overallocation_behavior = true,
694 .AMD_mixed_attachment_samples = true,
695 .AMD_rasterization_order = pdev->info.has_out_of_order_rast,
696 .AMD_shader_ballot = true,
697 .AMD_shader_core_properties = true,
698 .AMD_shader_core_properties2 = true,
699 .AMD_shader_early_and_late_fragment_tests = true,
700 .AMD_shader_explicit_vertex_parameter = true,
701 .AMD_shader_fragment_mask = pdev->use_fmask,
702 .AMD_shader_image_load_store_lod = true,
703 .AMD_shader_trinary_minmax = true,
704 .AMD_texture_gather_bias_lod = pdev->info.gfx_level < GFX11,
705 #if DETECT_OS_ANDROID
706 .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
707 .ANDROID_native_buffer = true,
708 #endif
709 .GOOGLE_decorate_string = true,
710 .GOOGLE_hlsl_functionality1 = true,
711 .GOOGLE_user_type = true,
712 .INTEL_shader_integer_functions2 = true,
713 .MESA_image_alignment_control = pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level <= GFX11_5,
714 .NV_compute_shader_derivatives = true,
715 .NV_device_generated_commands = instance->drirc.enable_dgc,
716 .NV_device_generated_commands_compute = instance->drirc.enable_dgc,
717 /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
718 * using it.
719 */
720 .VALVE_descriptor_set_host_mapping =
721 pdev->vk.instance->app_info.engine_name && strcmp(pdev->vk.instance->app_info.engine_name, "vkd3d") == 0,
722 .VALVE_mutable_descriptor_type = true,
723 };
724 *out_ext = ext;
725 }
726
727 static void
radv_physical_device_get_features(const struct radv_physical_device * pdev,struct vk_features * features)728 radv_physical_device_get_features(const struct radv_physical_device *pdev, struct vk_features *features)
729 {
730 const struct radv_instance *instance = radv_physical_device_instance(pdev);
731 bool taskmesh_en = radv_taskmesh_enabled(pdev);
732 bool has_perf_query = radv_perf_query_supported(pdev);
733 bool has_shader_image_float_minmax = pdev->info.gfx_level != GFX8 && pdev->info.gfx_level != GFX9 &&
734 pdev->info.gfx_level != GFX11 && pdev->info.gfx_level != GFX11_5;
735 bool has_fragment_shader_interlock = radv_has_pops(pdev);
736
737 *features = (struct vk_features){
738 /* Vulkan 1.0 */
739 .robustBufferAccess = true,
740 .fullDrawIndexUint32 = true,
741 .imageCubeArray = true,
742 .independentBlend = true,
743 .geometryShader = true,
744 .tessellationShader = true,
745 .sampleRateShading = true,
746 .dualSrcBlend = true,
747 .logicOp = true,
748 .multiDrawIndirect = true,
749 .drawIndirectFirstInstance = true,
750 .depthClamp = true,
751 .depthBiasClamp = true,
752 .fillModeNonSolid = true,
753 .depthBounds = true,
754 .wideLines = true,
755 .largePoints = true,
756 .alphaToOne = true,
757 .multiViewport = true,
758 .samplerAnisotropy = true,
759 .textureCompressionETC2 = pdev->info.has_etc_support || pdev->emulate_etc2,
760 .textureCompressionASTC_LDR = pdev->emulate_astc,
761 .textureCompressionBC = true,
762 .occlusionQueryPrecise = true,
763 .pipelineStatisticsQuery = true,
764 .vertexPipelineStoresAndAtomics = true,
765 .fragmentStoresAndAtomics = true,
766 .shaderTessellationAndGeometryPointSize = true,
767 .shaderImageGatherExtended = true,
768 .shaderStorageImageExtendedFormats = true,
769 .shaderStorageImageMultisample = true,
770 .shaderUniformBufferArrayDynamicIndexing = true,
771 .shaderSampledImageArrayDynamicIndexing = true,
772 .shaderStorageBufferArrayDynamicIndexing = true,
773 .shaderStorageImageArrayDynamicIndexing = true,
774 .shaderStorageImageReadWithoutFormat = true,
775 .shaderStorageImageWriteWithoutFormat = true,
776 .shaderClipDistance = true,
777 .shaderCullDistance = true,
778 .shaderFloat64 = true,
779 .shaderInt64 = true,
780 .shaderInt16 = true,
781 .sparseBinding = true,
782 .sparseResidencyBuffer = pdev->info.family >= CHIP_POLARIS10,
783 .sparseResidencyImage2D = pdev->info.family >= CHIP_POLARIS10,
784 .sparseResidencyImage3D = pdev->info.family >= CHIP_POLARIS10,
785 .sparseResidencyAliased = pdev->info.family >= CHIP_POLARIS10,
786 .variableMultisampleRate = true,
787 .shaderResourceMinLod = true,
788 .shaderResourceResidency = true,
789 .inheritedQueries = true,
790
791 /* Vulkan 1.1 */
792 .storageBuffer16BitAccess = true,
793 .uniformAndStorageBuffer16BitAccess = true,
794 .storagePushConstant16 = true,
795 .storageInputOutput16 = pdev->info.has_packed_math_16bit,
796 .multiview = true,
797 .multiviewGeometryShader = true,
798 .multiviewTessellationShader = true,
799 .variablePointersStorageBuffer = true,
800 .variablePointers = true,
801 .protectedMemory = false,
802 .samplerYcbcrConversion = true,
803 .shaderDrawParameters = true,
804
805 /* Vulkan 1.2 */
806 .samplerMirrorClampToEdge = true,
807 .drawIndirectCount = true,
808 .storageBuffer8BitAccess = true,
809 .uniformAndStorageBuffer8BitAccess = true,
810 .storagePushConstant8 = true,
811 .shaderBufferInt64Atomics = true,
812 .shaderSharedInt64Atomics = true,
813 .shaderFloat16 = pdev->info.has_packed_math_16bit,
814 .shaderInt8 = true,
815
816 .descriptorIndexing = true,
817 .shaderInputAttachmentArrayDynamicIndexing = true,
818 .shaderUniformTexelBufferArrayDynamicIndexing = true,
819 .shaderStorageTexelBufferArrayDynamicIndexing = true,
820 .shaderUniformBufferArrayNonUniformIndexing = true,
821 .shaderSampledImageArrayNonUniformIndexing = true,
822 .shaderStorageBufferArrayNonUniformIndexing = true,
823 .shaderStorageImageArrayNonUniformIndexing = true,
824 .shaderInputAttachmentArrayNonUniformIndexing = true,
825 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
826 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
827 .descriptorBindingUniformBufferUpdateAfterBind = true,
828 .descriptorBindingSampledImageUpdateAfterBind = true,
829 .descriptorBindingStorageImageUpdateAfterBind = true,
830 .descriptorBindingStorageBufferUpdateAfterBind = true,
831 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
832 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
833 .descriptorBindingUpdateUnusedWhilePending = true,
834 .descriptorBindingPartiallyBound = true,
835 .descriptorBindingVariableDescriptorCount = true,
836 .runtimeDescriptorArray = true,
837
838 .samplerFilterMinmax = true,
839 .scalarBlockLayout = pdev->info.gfx_level >= GFX7,
840 .imagelessFramebuffer = true,
841 .uniformBufferStandardLayout = true,
842 .shaderSubgroupExtendedTypes = true,
843 .separateDepthStencilLayouts = true,
844 .hostQueryReset = true,
845 .timelineSemaphore = true,
846 .bufferDeviceAddress = true,
847 .bufferDeviceAddressCaptureReplay = true,
848 .bufferDeviceAddressMultiDevice = false,
849 .vulkanMemoryModel = true,
850 .vulkanMemoryModelDeviceScope = true,
851 .vulkanMemoryModelAvailabilityVisibilityChains = false,
852 .shaderOutputViewportIndex = true,
853 .shaderOutputLayer = true,
854 .subgroupBroadcastDynamicId = true,
855
856 /* Vulkan 1.3 */
857 .robustImageAccess = true,
858 .inlineUniformBlock = true,
859 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
860 .pipelineCreationCacheControl = true,
861 .privateData = true,
862 .shaderDemoteToHelperInvocation = true,
863 .shaderTerminateInvocation = true,
864 .subgroupSizeControl = true,
865 .computeFullSubgroups = true,
866 .synchronization2 = true,
867 .textureCompressionASTC_HDR = false,
868 .shaderZeroInitializeWorkgroupMemory = true,
869 .dynamicRendering = true,
870 .shaderIntegerDotProduct = true,
871 .maintenance4 = true,
872
873 /* VK_EXT_conditional_rendering */
874 .conditionalRendering = true,
875 .inheritedConditionalRendering = false,
876
877 /* VK_KHR_vertex_attribute_divisor */
878 .vertexAttributeInstanceRateDivisor = true,
879 .vertexAttributeInstanceRateZeroDivisor = true,
880
881 /* VK_EXT_transform_feedback */
882 .transformFeedback = true,
883 .geometryStreams = true,
884
885 /* VK_EXT_memory_priority */
886 .memoryPriority = true,
887
888 /* VK_EXT_depth_clip_enable */
889 .depthClipEnable = true,
890
891 /* VK_KHR_compute_shader_derivatives */
892 .computeDerivativeGroupQuads = false,
893 .computeDerivativeGroupLinear = true,
894
895 /* VK_EXT_ycbcr_image_arrays */
896 .ycbcrImageArrays = true,
897
898 /* VK_KHR_index_type_uint8 */
899 .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
900
901 /* VK_KHR_pipeline_executable_properties */
902 .pipelineExecutableInfo = true,
903
904 /* VK_KHR_shader_clock */
905 .shaderSubgroupClock = true,
906 .shaderDeviceClock = pdev->info.gfx_level >= GFX8,
907
908 /* VK_EXT_texel_buffer_alignment */
909 .texelBufferAlignment = true,
910
911 /* VK_AMD_device_coherent_memory */
912 .deviceCoherentMemory = pdev->info.has_l2_uncached,
913
914 /* VK_KHR_line_rasterization */
915 .rectangularLines = true,
916 .bresenhamLines = true,
917 .smoothLines = true,
918 .stippledRectangularLines = false,
919 .stippledBresenhamLines = true,
920 .stippledSmoothLines = false,
921
922 /* VK_EXT_robustness2 */
923 .robustBufferAccess2 = true,
924 .robustImageAccess2 = true,
925 .nullDescriptor = true,
926
927 /* VK_EXT_custom_border_color */
928 .customBorderColors = true,
929 .customBorderColorWithoutFormat = true,
930
931 /* VK_EXT_extended_dynamic_state */
932 .extendedDynamicState = true,
933
934 /* VK_EXT_shader_atomic_float */
935 .shaderBufferFloat32Atomics = true,
936 .shaderBufferFloat32AtomicAdd = pdev->info.gfx_level >= GFX11,
937 .shaderBufferFloat64Atomics = true,
938 .shaderBufferFloat64AtomicAdd = false,
939 .shaderSharedFloat32Atomics = true,
940 .shaderSharedFloat32AtomicAdd = pdev->info.gfx_level >= GFX8,
941 .shaderSharedFloat64Atomics = true,
942 .shaderSharedFloat64AtomicAdd = false,
943 .shaderImageFloat32Atomics = true,
944 .shaderImageFloat32AtomicAdd = false,
945 .sparseImageFloat32Atomics = true,
946 .sparseImageFloat32AtomicAdd = false,
947
948 /* VK_EXT_4444_formats */
949 .formatA4R4G4B4 = true,
950 .formatA4B4G4R4 = true,
951
952 /* VK_EXT_shader_image_atomic_int64 */
953 .shaderImageInt64Atomics = true,
954 .sparseImageInt64Atomics = true,
955
956 /* VK_EXT_mutable_descriptor_type */
957 .mutableDescriptorType = true,
958
959 /* VK_KHR_fragment_shading_rate */
960 .pipelineFragmentShadingRate = true,
961 .primitiveFragmentShadingRate = true,
962 .attachmentFragmentShadingRate = radv_vrs_attachment_enabled(pdev),
963
964 /* VK_KHR_workgroup_memory_explicit_layout */
965 .workgroupMemoryExplicitLayout = true,
966 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
967 .workgroupMemoryExplicitLayout8BitAccess = true,
968 .workgroupMemoryExplicitLayout16BitAccess = true,
969
970 /* VK_EXT_provoking_vertex */
971 .provokingVertexLast = true,
972 .transformFeedbackPreservesProvokingVertex = true,
973
974 /* VK_EXT_extended_dynamic_state2 */
975 .extendedDynamicState2 = true,
976 .extendedDynamicState2LogicOp = true,
977 .extendedDynamicState2PatchControlPoints = true,
978
979 /* VK_EXT_global_priority_query */
980 .globalPriorityQuery = true,
981
982 /* VK_KHR_acceleration_structure */
983 .accelerationStructure = true,
984 .accelerationStructureCaptureReplay = true,
985 .accelerationStructureIndirectBuild = false,
986 .accelerationStructureHostCommands = false,
987 .descriptorBindingAccelerationStructureUpdateAfterBind = true,
988
989 /* VK_EXT_buffer_device_address */
990 .bufferDeviceAddressCaptureReplayEXT = true,
991
992 /* VK_KHR_shader_subgroup_uniform_control_flow */
993 .shaderSubgroupUniformControlFlow = true,
994
995 /* VK_EXT_map_memory_placed */
996 .memoryMapPlaced = true,
997 .memoryMapRangePlaced = false,
998 .memoryUnmapReserve = true,
999
1000 /* VK_EXT_multi_draw */
1001 .multiDraw = true,
1002
1003 /* VK_EXT_color_write_enable */
1004 .colorWriteEnable = true,
1005
1006 /* VK_EXT_shader_atomic_float2 */
1007 .shaderBufferFloat16Atomics = false,
1008 .shaderBufferFloat16AtomicAdd = false,
1009 .shaderBufferFloat16AtomicMinMax = false,
1010 .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 32),
1011 .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 64),
1012 .shaderSharedFloat16Atomics = false,
1013 .shaderSharedFloat16AtomicAdd = false,
1014 .shaderSharedFloat16AtomicMinMax = false,
1015 .shaderSharedFloat32AtomicMinMax = true,
1016 .shaderSharedFloat64AtomicMinMax = true,
1017 .shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1018 .sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1019
1020 /* VK_KHR_present_id */
1021 .presentId = pdev->vk.supported_extensions.KHR_present_id,
1022
1023 /* VK_KHR_present_wait */
1024 .presentWait = pdev->vk.supported_extensions.KHR_present_wait,
1025
1026 /* VK_EXT_primitive_topology_list_restart */
1027 .primitiveTopologyListRestart = true,
1028 .primitiveTopologyPatchListRestart = false,
1029
1030 /* VK_KHR_ray_query */
1031 .rayQuery = true,
1032
1033 /* VK_EXT_pipeline_library_group_handles */
1034 .pipelineLibraryGroupHandles = true,
1035
1036 /* VK_KHR_ray_tracing_pipeline */
1037 .rayTracingPipeline = true,
1038 .rayTracingPipelineShaderGroupHandleCaptureReplay = true,
1039 .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1040 .rayTracingPipelineTraceRaysIndirect = true,
1041 .rayTraversalPrimitiveCulling = true,
1042
1043 /* VK_KHR_ray_tracing_maintenance1 */
1044 .rayTracingMaintenance1 = true,
1045 .rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdev, true),
1046
1047 /* VK_KHR_ray_tracing_position_fetch */
1048 .rayTracingPositionFetch = true,
1049
1050 /* VK_EXT_vertex_input_dynamic_state */
1051 .vertexInputDynamicState = true,
1052
1053 /* VK_EXT_image_view_min_lod */
1054 .minLod = true,
1055
1056 /* VK_EXT_mesh_shader */
1057 .meshShader = taskmesh_en,
1058 .taskShader = taskmesh_en,
1059 .multiviewMeshShader = taskmesh_en,
1060 .primitiveFragmentShadingRateMeshShader = taskmesh_en,
1061 .meshShaderQueries = false,
1062
1063 /* VK_VALVE_descriptor_set_host_mapping */
1064 .descriptorSetHostMapping = true,
1065
1066 /* VK_EXT_depth_clip_control */
1067 .depthClipControl = true,
1068
1069 /* VK_EXT_image_2d_view_of_3d */
1070 .image2DViewOf3D = true,
1071 .sampler2DViewOf3D = false,
1072
1073 /* VK_INTEL_shader_integer_functions2 */
1074 .shaderIntegerFunctions2 = true,
1075
1076 /* VK_EXT_primitives_generated_query */
1077 .primitivesGeneratedQuery = true,
1078 .primitivesGeneratedQueryWithRasterizerDiscard = true,
1079 .primitivesGeneratedQueryWithNonZeroStreams = true,
1080
1081 /* VK_EXT_non_seamless_cube_map */
1082 .nonSeamlessCubeMap = true,
1083
1084 /* VK_EXT_border_color_swizzle */
1085 .borderColorSwizzle = true,
1086 .borderColorSwizzleFromImage = true,
1087
1088 /* VK_EXT_shader_module_identifier */
1089 .shaderModuleIdentifier = true,
1090
1091 /* VK_KHR_performance_query */
1092 .performanceCounterQueryPools = has_perf_query,
1093 .performanceCounterMultipleQueryPools = has_perf_query,
1094
1095 /* VK_NV_device_generated_commands */
1096 .deviceGeneratedCommandsNV = true,
1097
1098 /* VK_EXT_attachment_feedback_loop_layout */
1099 .attachmentFeedbackLoopLayout = true,
1100
1101 /* VK_EXT_graphics_pipeline_library */
1102 .graphicsPipelineLibrary = true,
1103
1104 /* VK_EXT_extended_dynamic_state3 */
1105 .extendedDynamicState3TessellationDomainOrigin = true,
1106 .extendedDynamicState3PolygonMode = true,
1107 .extendedDynamicState3SampleMask = true,
1108 .extendedDynamicState3AlphaToCoverageEnable = !pdev->use_llvm,
1109 .extendedDynamicState3LogicOpEnable = true,
1110 .extendedDynamicState3LineStippleEnable = true,
1111 .extendedDynamicState3ColorBlendEnable = !pdev->use_llvm,
1112 .extendedDynamicState3DepthClipEnable = true,
1113 .extendedDynamicState3ConservativeRasterizationMode = pdev->info.gfx_level >= GFX9,
1114 .extendedDynamicState3DepthClipNegativeOneToOne = true,
1115 .extendedDynamicState3ProvokingVertexMode = true,
1116 .extendedDynamicState3DepthClampEnable = true,
1117 .extendedDynamicState3ColorWriteMask = !pdev->use_llvm,
1118 .extendedDynamicState3RasterizationSamples = true,
1119 .extendedDynamicState3ColorBlendEquation = !pdev->use_llvm,
1120 .extendedDynamicState3SampleLocationsEnable = pdev->info.gfx_level < GFX10,
1121 .extendedDynamicState3LineRasterizationMode = true,
1122 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1123 .extendedDynamicState3AlphaToOneEnable = !pdev->use_llvm,
1124 .extendedDynamicState3RasterizationStream = false,
1125 .extendedDynamicState3ColorBlendAdvanced = false,
1126 .extendedDynamicState3ViewportWScalingEnable = false,
1127 .extendedDynamicState3ViewportSwizzle = false,
1128 .extendedDynamicState3CoverageToColorEnable = false,
1129 .extendedDynamicState3CoverageToColorLocation = false,
1130 .extendedDynamicState3CoverageModulationMode = false,
1131 .extendedDynamicState3CoverageModulationTableEnable = false,
1132 .extendedDynamicState3CoverageModulationTable = false,
1133 .extendedDynamicState3CoverageReductionMode = false,
1134 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1135 .extendedDynamicState3ShadingRateImageEnable = false,
1136
1137 /* VK_EXT_descriptor_buffer */
1138 .descriptorBuffer = true,
1139 .descriptorBufferCaptureReplay = false,
1140 .descriptorBufferImageLayoutIgnored = true,
1141 .descriptorBufferPushDescriptors = true,
1142
1143 /* VK_AMD_shader_early_and_late_fragment_tests */
1144 .shaderEarlyAndLateFragmentTests = true,
1145
1146 /* VK_EXT_image_sliced_view_of_3d */
1147 .imageSlicedViewOf3D = true,
1148
1149 #ifdef RADV_USE_WSI_PLATFORM
1150 /* VK_EXT_swapchain_maintenance1 */
1151 .swapchainMaintenance1 = true,
1152 #endif
1153
1154 /* VK_EXT_attachment_feedback_loop_dynamic_state */
1155 .attachmentFeedbackLoopDynamicState = true,
1156
1157 /* VK_EXT_dynamic_rendering_unused_attachments */
1158 .dynamicRenderingUnusedAttachments = true,
1159
1160 /* VK_KHR_fragment_shader_barycentric */
1161 .fragmentShaderBarycentric = true,
1162
1163 /* VK_EXT_depth_bias_control */
1164 .depthBiasControl = true,
1165 .leastRepresentableValueForceUnormRepresentation = true,
1166 .floatRepresentation = true,
1167 .depthBiasExact = true,
1168
1169 /* VK_EXT_fragment_shader_interlock */
1170 .fragmentShaderSampleInterlock = has_fragment_shader_interlock,
1171 .fragmentShaderPixelInterlock = has_fragment_shader_interlock,
1172 .fragmentShaderShadingRateInterlock = false,
1173
1174 /* VK_EXT_pipeline_robustness */
1175 .pipelineRobustness = true,
1176
1177 /* VK_KHR_maintenance5 */
1178 .maintenance5 = true,
1179
1180 /* VK_NV_device_generated_commands_compute */
1181 .deviceGeneratedCompute = true,
1182 .deviceGeneratedComputePipelines = true,
1183 .deviceGeneratedComputeCaptureReplay = false,
1184
1185 /* VK_KHR_cooperative_matrix */
1186 .cooperativeMatrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
1187 .cooperativeMatrixRobustBufferAccess = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
1188
1189 /* VK_EXT_image_compression_control */
1190 .imageCompressionControl = true,
1191
1192 /* VK_EXT_device_fault */
1193 .deviceFault = true,
1194 .deviceFaultVendorBinary = instance->debug_flags & RADV_DEBUG_HANG,
1195
1196 /* VK_EXT_depth_clamp_zero_one */
1197 .depthClampZeroOne = true,
1198
1199 /* VK_KHR_maintenance6 */
1200 .maintenance6 = true,
1201
1202 /* VK_KHR_shader_subgroup_rotate */
1203 .shaderSubgroupRotate = true,
1204 .shaderSubgroupRotateClustered = true,
1205
1206 /* VK_EXT_shader_object */
1207 .shaderObject = true,
1208
1209 /* VK_KHR_shader_expect_assume */
1210 .shaderExpectAssume = true,
1211
1212 /* VK_KHR_shader_maximal_reconvergence */
1213 .shaderMaximalReconvergence = true,
1214
1215 /* VK_KHR_shader_quad_control */
1216 .shaderQuadControl = true,
1217
1218 /* VK_EXT_address_binding_report */
1219 .reportAddressBinding = true,
1220
1221 /* VK_EXT_nested_command_buffer */
1222 .nestedCommandBuffer = true,
1223 .nestedCommandBufferRendering = true,
1224 .nestedCommandBufferSimultaneousUse = true,
1225
1226 /* VK_KHR_dynamic_rendering_local_read */
1227 .dynamicRenderingLocalRead = true,
1228
1229 /* VK_EXT_legacy_vertex_attributes */
1230 .legacyVertexAttributes = true,
1231
1232 /* VK_MESA_image_alignment_control */
1233 .imageAlignmentControl = true,
1234
1235 /* VK_EXT_shader_replicated_composites */
1236 .shaderReplicatedComposites = true,
1237
1238 /* VK_KHR_maintenance7 */
1239 .maintenance7 = true,
1240
1241 /* VK_KHR_video_maintenance1 */
1242 .videoMaintenance1 = true,
1243
1244 /* VK_KHR_pipeline_binary */
1245 .pipelineBinaries = true,
1246
1247 /* VK_KHR_shader_relaxed_extended_instruction */
1248 .shaderRelaxedExtendedInstruction = true,
1249
1250 /* VK_KHR_shader_float_controls2 */
1251 .shaderFloatControls2 = true,
1252 };
1253 }
1254
1255 static size_t
radv_max_descriptor_set_size()1256 radv_max_descriptor_set_size()
1257 {
1258 /* make sure that the entire descriptor set is addressable with a signed
1259 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1260 * be at most 2 GiB. the combined image & samples object count as one of
1261 * both. This limit is for the pipeline layout, not for the set layout, but
1262 * there is no set limit, so we just set a pipeline limit. I don't think
1263 * any app is going to hit this soon. */
1264 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1265 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1266 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1267 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
1268 }
1269
1270 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdev)1271 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdev)
1272 {
1273 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1274 uint32_t uniform_offset_alignment = instance->drirc.override_uniform_offset_alignment;
1275 if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1276 fprintf(stderr,
1277 "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1278 "not a power of two\n",
1279 uniform_offset_alignment);
1280 uniform_offset_alignment = 0;
1281 }
1282
1283 /* Take at least the hardware limit. */
1284 return MAX2(uniform_offset_alignment, 4);
1285 }
1286
1287 static const char *
radv_get_compiler_string(struct radv_physical_device * pdev)1288 radv_get_compiler_string(struct radv_physical_device *pdev)
1289 {
1290 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1291
1292 if (!pdev->use_llvm) {
1293 /* Some games like SotTR apply shader workarounds if the LLVM
1294 * version is too old or if the LLVM version string is
1295 * missing. This gives 2-5% performance with SotTR and ACO.
1296 */
1297 if (instance->drirc.report_llvm9_version_string) {
1298 return " (LLVM 9.0.1)";
1299 }
1300
1301 return "";
1302 }
1303
1304 #if AMD_LLVM_AVAILABLE
1305 return " (LLVM " MESA_LLVM_VERSION_STRING ")";
1306 #else
1307 unreachable("LLVM is not available");
1308 #endif
1309 }
1310
1311 static void
radv_get_physical_device_properties(struct radv_physical_device * pdev)1312 radv_get_physical_device_properties(struct radv_physical_device *pdev)
1313 {
1314 VkSampleCountFlags sample_counts = 0xf;
1315
1316 size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1317
1318 VkPhysicalDeviceType device_type;
1319 if (pdev->info.has_dedicated_vram) {
1320 device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1321 } else {
1322 device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1323 }
1324
1325 pdev->vk.properties = (struct vk_properties){
1326 .apiVersion = RADV_API_VERSION,
1327 .driverVersion = vk_get_driver_version(),
1328 .vendorID = ATI_VENDOR_ID,
1329 .deviceID = pdev->info.pci_id,
1330 .deviceType = device_type,
1331 .maxImageDimension1D = (1 << 14),
1332 .maxImageDimension2D = (1 << 14),
1333 .maxImageDimension3D = (1 << 11),
1334 .maxImageDimensionCube = (1 << 14),
1335 .maxImageArrayLayers = (1 << 11),
1336 .maxTexelBufferElements = UINT32_MAX,
1337 .maxUniformBufferRange = UINT32_MAX,
1338 .maxStorageBufferRange = UINT32_MAX,
1339 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1340 .maxMemoryAllocationCount = UINT32_MAX,
1341 .maxSamplerAllocationCount = 64 * 1024,
1342 .bufferImageGranularity = 1,
1343 .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1344 .maxBoundDescriptorSets = MAX_SETS,
1345 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1346 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1347 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1348 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1349 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1350 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1351 .maxPerStageResources = max_descriptor_set_size,
1352 .maxDescriptorSetSamplers = max_descriptor_set_size,
1353 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1354 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1355 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1356 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1357 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1358 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1359 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1360 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1361 .maxVertexInputBindings = MAX_VBS,
1362 .maxVertexInputAttributeOffset = UINT32_MAX,
1363 .maxVertexInputBindingStride = 2048,
1364 .maxVertexOutputComponents = 128,
1365 .maxTessellationGenerationLevel = 64,
1366 .maxTessellationPatchSize = 32,
1367 .maxTessellationControlPerVertexInputComponents = 128,
1368 .maxTessellationControlPerVertexOutputComponents = 128,
1369 .maxTessellationControlPerPatchOutputComponents = 120,
1370 .maxTessellationControlTotalOutputComponents = 4096,
1371 .maxTessellationEvaluationInputComponents = 128,
1372 .maxTessellationEvaluationOutputComponents = 128,
1373 .maxGeometryShaderInvocations = 127,
1374 .maxGeometryInputComponents = 64,
1375 .maxGeometryOutputComponents = 128,
1376 .maxGeometryOutputVertices = 256,
1377 .maxGeometryTotalOutputComponents = 1024,
1378 .maxFragmentInputComponents = 128,
1379 .maxFragmentOutputAttachments = 8,
1380 .maxFragmentDualSrcAttachments = 1,
1381 .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1382 .maxComputeSharedMemorySize = pdev->max_shared_size,
1383 .maxComputeWorkGroupCount = {65535, 65535, 65535},
1384 .maxComputeWorkGroupInvocations = 1024,
1385 .maxComputeWorkGroupSize = {1024, 1024, 1024},
1386 .subPixelPrecisionBits = 8,
1387 .subTexelPrecisionBits = 8,
1388 .mipmapPrecisionBits = 8,
1389 .maxDrawIndexedIndexValue = UINT32_MAX,
1390 .maxDrawIndirectCount = UINT32_MAX,
1391 .maxSamplerLodBias = 16,
1392 .maxSamplerAnisotropy = 16,
1393 .maxViewports = MAX_VIEWPORTS,
1394 .maxViewportDimensions = {(1 << 14), (1 << 14)},
1395 .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1396 .viewportSubPixelBits = 8,
1397 .minMemoryMapAlignment = 4096, /* A page */
1398 .minTexelBufferOffsetAlignment = 4,
1399 .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdev),
1400 .minStorageBufferOffsetAlignment = 4,
1401 .minTexelOffset = -32,
1402 .maxTexelOffset = 31,
1403 .minTexelGatherOffset = -32,
1404 .maxTexelGatherOffset = 31,
1405 .minInterpolationOffset = -2,
1406 .maxInterpolationOffset = 2,
1407 .subPixelInterpolationOffsetBits = 8,
1408 .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1409 .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1410 .maxFramebufferLayers = (1 << 10),
1411 .framebufferColorSampleCounts = sample_counts,
1412 .framebufferDepthSampleCounts = sample_counts,
1413 .framebufferStencilSampleCounts = sample_counts,
1414 .framebufferNoAttachmentsSampleCounts = sample_counts,
1415 .maxColorAttachments = MAX_RTS,
1416 .sampledImageColorSampleCounts = sample_counts,
1417 .sampledImageIntegerSampleCounts = sample_counts,
1418 .sampledImageDepthSampleCounts = sample_counts,
1419 .sampledImageStencilSampleCounts = sample_counts,
1420 .storageImageSampleCounts = sample_counts,
1421 .maxSampleMaskWords = 1,
1422 .timestampComputeAndGraphics = true,
1423 .timestampPeriod = 1000000.0 / pdev->info.clock_crystal_freq,
1424 .maxClipDistances = 8,
1425 .maxCullDistances = 8,
1426 .maxCombinedClipAndCullDistances = 8,
1427 .discreteQueuePriorities = 2,
1428 .pointSizeRange = {0.0, 8191.875},
1429 .lineWidthRange = {0.0, 8.0},
1430 .pointSizeGranularity = (1.0 / 8.0),
1431 .lineWidthGranularity = (1.0 / 8.0),
1432 .strictLines = false, /* FINISHME */
1433 .standardSampleLocations = true,
1434 .optimalBufferCopyOffsetAlignment = 1,
1435 .optimalBufferCopyRowPitchAlignment = 1,
1436 .nonCoherentAtomSize = 64,
1437 .sparseResidencyNonResidentStrict = pdev->info.family >= CHIP_POLARIS10,
1438 .sparseResidencyStandard2DBlockShape = pdev->info.family >= CHIP_POLARIS10,
1439 .sparseResidencyStandard3DBlockShape = pdev->info.gfx_level >= GFX9,
1440 };
1441
1442 struct vk_properties *p = &pdev->vk.properties;
1443
1444 /* Vulkan 1.1 */
1445 strcpy(p->deviceName, pdev->marketing_name);
1446 memcpy(p->pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1447
1448 memcpy(p->deviceUUID, pdev->device_uuid, VK_UUID_SIZE);
1449 memcpy(p->driverUUID, pdev->driver_uuid, VK_UUID_SIZE);
1450 memset(p->deviceLUID, 0, VK_LUID_SIZE);
1451 /* The LUID is for Windows. */
1452 p->deviceLUIDValid = false;
1453 p->deviceNodeMask = 0;
1454
1455 p->subgroupSize = RADV_SUBGROUP_SIZE;
1456 p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1457 if (radv_taskmesh_enabled(pdev))
1458 p->subgroupSupportedStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1459
1460 if (radv_enable_rt(pdev, true))
1461 p->subgroupSupportedStages |= RADV_RT_STAGE_BITS;
1462 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1463 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1464 VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1465 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1466 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
1467 p->subgroupQuadOperationsInAllStages = true;
1468
1469 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1470 p->maxMultiviewViewCount = MAX_VIEWS;
1471 p->maxMultiviewInstanceIndex = INT_MAX;
1472 p->protectedNoFault = false;
1473 p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1474 p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1475
1476 /* Vulkan 1.2 */
1477 p->driverID = VK_DRIVER_ID_MESA_RADV;
1478 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1479 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1480 radv_get_compiler_string(pdev));
1481
1482 if (radv_is_conformant(pdev)) {
1483 if (pdev->info.gfx_level >= GFX10_3) {
1484 p->conformanceVersion = (VkConformanceVersion){
1485 .major = 1,
1486 .minor = 3,
1487 .subminor = 0,
1488 .patch = 0,
1489 };
1490 } else {
1491 p->conformanceVersion = (VkConformanceVersion){
1492 .major = 1,
1493 .minor = 2,
1494 .subminor = 7,
1495 .patch = 1,
1496 };
1497 }
1498 } else {
1499 p->conformanceVersion = (VkConformanceVersion){
1500 .major = 0,
1501 .minor = 0,
1502 .subminor = 0,
1503 .patch = 0,
1504 };
1505 }
1506
1507 /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1508 * controlled by the same config register.
1509 */
1510 if (pdev->info.has_packed_math_16bit) {
1511 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1512 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1513 } else {
1514 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1515 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1516 }
1517
1518 /* With LLVM, do not allow both preserving and flushing denorms because
1519 * different shaders in the same pipeline can have different settings and
1520 * this won't work for merged shaders. To make it work, this requires LLVM
1521 * support for changing the register. The same logic applies for the
1522 * rounding modes because they are configured with the same config
1523 * register.
1524 */
1525 p->shaderDenormFlushToZeroFloat32 = true;
1526 p->shaderDenormPreserveFloat32 = !pdev->use_llvm;
1527 p->shaderRoundingModeRTEFloat32 = true;
1528 p->shaderRoundingModeRTZFloat32 = !pdev->use_llvm;
1529 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1530
1531 p->shaderDenormFlushToZeroFloat16 = pdev->info.has_packed_math_16bit && !pdev->use_llvm;
1532 p->shaderDenormPreserveFloat16 = pdev->info.has_packed_math_16bit;
1533 p->shaderRoundingModeRTEFloat16 = pdev->info.has_packed_math_16bit;
1534 p->shaderRoundingModeRTZFloat16 = pdev->info.has_packed_math_16bit && !pdev->use_llvm;
1535 p->shaderSignedZeroInfNanPreserveFloat16 = pdev->info.has_packed_math_16bit;
1536
1537 p->shaderDenormFlushToZeroFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm;
1538 p->shaderDenormPreserveFloat64 = pdev->info.gfx_level >= GFX8;
1539 p->shaderRoundingModeRTEFloat64 = pdev->info.gfx_level >= GFX8;
1540 p->shaderRoundingModeRTZFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm;
1541 p->shaderSignedZeroInfNanPreserveFloat64 = pdev->info.gfx_level >= GFX8;
1542
1543 p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1544 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1545 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1546 p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1547 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1548 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1549 p->robustBufferAccessUpdateAfterBind = true;
1550 p->quadDivergentImplicitLod = false;
1551
1552 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1553 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1554 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1555 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1556 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1557 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1558 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1559 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1560 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1561 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1562 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1563 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1564 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1565 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1566 p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1567
1568 /* We support all of the depth resolve modes */
1569 p->supportedDepthResolveModes =
1570 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1571
1572 /* Average doesn't make sense for stencil so we don't support that */
1573 p->supportedStencilResolveModes =
1574 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1575
1576 p->independentResolveNone = true;
1577 p->independentResolve = true;
1578
1579 /* GFX6-8 only support single channel min/max filter. */
1580 p->filterMinmaxImageComponentMapping = pdev->info.gfx_level >= GFX9;
1581 p->filterMinmaxSingleComponentFormats = true;
1582
1583 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1584
1585 p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1586
1587 /* Vulkan 1.3 */
1588 p->minSubgroupSize = 64;
1589 p->maxSubgroupSize = 64;
1590 p->maxComputeWorkgroupSubgroups = UINT32_MAX;
1591 p->requiredSubgroupSizeStages = 0;
1592 if (pdev->info.gfx_level >= GFX10) {
1593 /* Only GFX10+ supports wave32. */
1594 p->minSubgroupSize = 32;
1595 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1596
1597 if (radv_taskmesh_enabled(pdev)) {
1598 p->requiredSubgroupSizeStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1599 }
1600 }
1601
1602 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1603 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1604 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1605 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1606 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1607 p->maxInlineUniformTotalSize = UINT16_MAX;
1608
1609 bool accel_dot = pdev->info.has_accelerated_dot_product;
1610 bool gfx11plus = pdev->info.gfx_level >= GFX11;
1611 p->integerDotProduct8BitUnsignedAccelerated = accel_dot;
1612 p->integerDotProduct8BitSignedAccelerated = accel_dot;
1613 p->integerDotProduct8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1614 p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel_dot;
1615 p->integerDotProduct4x8BitPackedSignedAccelerated = accel_dot;
1616 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1617 p->integerDotProduct16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1618 p->integerDotProduct16BitSignedAccelerated = accel_dot && !gfx11plus;
1619 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1620 p->integerDotProduct32BitUnsignedAccelerated = false;
1621 p->integerDotProduct32BitSignedAccelerated = false;
1622 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1623 p->integerDotProduct64BitUnsignedAccelerated = false;
1624 p->integerDotProduct64BitSignedAccelerated = false;
1625 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1626 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel_dot;
1627 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel_dot;
1628 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1629 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel_dot;
1630 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel_dot;
1631 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1632 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1633 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel_dot && !gfx11plus;
1634 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1635 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1636 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1637 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1638 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1639 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1640 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1641
1642 p->storageTexelBufferOffsetAlignmentBytes = 4;
1643 p->storageTexelBufferOffsetSingleTexelAlignment = true;
1644 p->uniformTexelBufferOffsetAlignmentBytes = 4;
1645 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1646
1647 p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1648
1649 /* VK_KHR_push_descriptor */
1650 p->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1651
1652 /* VK_EXT_discard_rectangles */
1653 p->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1654
1655 /* VK_EXT_external_memory_host */
1656 p->minImportedHostPointerAlignment = 4096;
1657
1658 /* VK_AMD_shader_core_properties */
1659 /* Shader engines. */
1660 p->shaderEngineCount = pdev->info.max_se;
1661 p->shaderArraysPerEngineCount = pdev->info.max_sa_per_se;
1662 p->computeUnitsPerShaderArray = pdev->info.min_good_cu_per_sa;
1663 p->simdPerComputeUnit = pdev->info.num_simd_per_compute_unit;
1664 p->wavefrontsPerSimd = pdev->info.max_waves_per_simd;
1665 p->wavefrontSize = 64;
1666
1667 /* SGPR. */
1668 p->sgprsPerSimd = pdev->info.num_physical_sgprs_per_simd;
1669 p->minSgprAllocation = pdev->info.min_sgpr_alloc;
1670 p->maxSgprAllocation = pdev->info.max_sgpr_alloc;
1671 p->sgprAllocationGranularity = pdev->info.sgpr_alloc_granularity;
1672
1673 /* VGPR. */
1674 p->vgprsPerSimd = pdev->info.num_physical_wave64_vgprs_per_simd;
1675 p->minVgprAllocation = pdev->info.min_wave64_vgpr_alloc;
1676 p->maxVgprAllocation = pdev->info.max_vgpr_alloc;
1677 p->vgprAllocationGranularity = pdev->info.wave64_vgpr_alloc_granularity;
1678
1679 /* VK_AMD_shader_core_properties2 */
1680 p->shaderCoreFeatures = 0;
1681 p->activeComputeUnitCount = pdev->info.num_cu;
1682
1683 /* VK_KHR_vertex_attribute_divisor */
1684 p->maxVertexAttribDivisor = UINT32_MAX;
1685 p->supportsNonZeroFirstInstance = true;
1686
1687 /* VK_EXT_conservative_rasterization */
1688 p->primitiveOverestimationSize = 0;
1689 p->maxExtraPrimitiveOverestimationSize = 0;
1690 p->extraPrimitiveOverestimationSizeGranularity = 0;
1691 p->primitiveUnderestimation = true;
1692 p->conservativePointAndLineRasterization = false;
1693 p->degenerateTrianglesRasterized = true;
1694 p->degenerateLinesRasterized = false;
1695 p->fullyCoveredFragmentShaderInputVariable = true;
1696 p->conservativeRasterizationPostDepthCoverage = false;
1697
1698 /* VK_EXT_pci_bus_info */
1699 #ifndef _WIN32
1700 p->pciDomain = pdev->bus_info.domain;
1701 p->pciBus = pdev->bus_info.bus;
1702 p->pciDevice = pdev->bus_info.dev;
1703 p->pciFunction = pdev->bus_info.func;
1704 #endif
1705
1706 /* VK_EXT_transform_feedback */
1707 p->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1708 p->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1709 p->maxTransformFeedbackBufferSize = UINT32_MAX;
1710 p->maxTransformFeedbackStreamDataSize = 512;
1711 p->maxTransformFeedbackBufferDataSize = 512;
1712 p->maxTransformFeedbackBufferDataStride = 512;
1713 p->transformFeedbackQueries = true;
1714 p->transformFeedbackStreamsLinesTriangles = true;
1715 p->transformFeedbackRasterizationStreamSelect = false;
1716 p->transformFeedbackDraw = true;
1717
1718 /* VK_EXT_sample_locations */
1719 p->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
1720 p->maxSampleLocationGridSize = (VkExtent2D){2, 2};
1721 p->sampleLocationCoordinateRange[0] = 0.0f;
1722 p->sampleLocationCoordinateRange[1] = 0.9375f;
1723 p->sampleLocationSubPixelBits = 4;
1724 p->variableSampleLocations = false;
1725
1726 /* VK_KHR_line_rasterization */
1727 p->lineSubPixelPrecisionBits = 4;
1728
1729 /* VK_EXT_robustness2 */
1730 p->robustStorageBufferAccessSizeAlignment = 4;
1731 p->robustUniformBufferAccessSizeAlignment = 4;
1732
1733 /* VK_EXT_custom_border_color */
1734 p->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
1735
1736 /* VK_KHR_fragment_shading_rate */
1737 if (radv_vrs_attachment_enabled(pdev)) {
1738 p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1739 p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1740 } else {
1741 p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1742 p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1743 }
1744 p->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1745 p->primitiveFragmentShadingRateWithMultipleViewports = true;
1746 p->layeredShadingRateAttachments = false; /* TODO */
1747 p->fragmentShadingRateNonTrivialCombinerOps = true;
1748 p->maxFragmentSize = (VkExtent2D){2, 2};
1749 p->maxFragmentSizeAspectRatio = 2;
1750 p->maxFragmentShadingRateCoverageSamples = 32;
1751 p->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
1752 p->fragmentShadingRateWithShaderDepthStencilWrites = !pdev->info.has_vrs_ds_export_bug;
1753 p->fragmentShadingRateWithSampleMask = true;
1754 p->fragmentShadingRateWithShaderSampleMask = false;
1755 p->fragmentShadingRateWithConservativeRasterization = true;
1756 p->fragmentShadingRateWithFragmentShaderInterlock = pdev->info.gfx_level >= GFX11 && radv_has_pops(pdev);
1757 p->fragmentShadingRateWithCustomSampleLocations = false;
1758 p->fragmentShadingRateStrictMultiplyCombiner = true;
1759
1760 /* VK_EXT_provoking_vertex */
1761 p->provokingVertexModePerPipeline = true;
1762 p->transformFeedbackPreservesTriangleFanProvokingVertex = true;
1763
1764 /* VK_KHR_acceleration_structure */
1765 p->maxGeometryCount = (1 << 24) - 1;
1766 p->maxInstanceCount = (1 << 24) - 1;
1767 p->maxPrimitiveCount = (1 << 29) - 1;
1768 p->maxPerStageDescriptorAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1769 p->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1770 p->maxDescriptorSetAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1771 p->maxDescriptorSetUpdateAfterBindAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1772 p->minAccelerationStructureScratchOffsetAlignment = 128;
1773
1774 /* VK_EXT_physical_device_drm */
1775 #ifndef _WIN32
1776 if (pdev->available_nodes & (1 << DRM_NODE_PRIMARY)) {
1777 p->drmHasPrimary = true;
1778 p->drmPrimaryMajor = (int64_t)major(pdev->primary_devid);
1779 p->drmPrimaryMinor = (int64_t)minor(pdev->primary_devid);
1780 } else {
1781 p->drmHasPrimary = false;
1782 }
1783 if (pdev->available_nodes & (1 << DRM_NODE_RENDER)) {
1784 p->drmHasRender = true;
1785 p->drmRenderMajor = (int64_t)major(pdev->render_devid);
1786 p->drmRenderMinor = (int64_t)minor(pdev->render_devid);
1787 } else {
1788 p->drmHasRender = false;
1789 }
1790 #endif
1791
1792 /* VK_EXT_multi_draw */
1793 p->maxMultiDrawCount = 2048;
1794
1795 /* VK_KHR_ray_tracing_pipeline */
1796
1797 p->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
1798 p->maxRayRecursionDepth = 31; /* Minimum allowed for DXR. */
1799 p->maxShaderGroupStride = 16384; /* dummy */
1800 /* This isn't strictly necessary, but Doom Eternal breaks if the
1801 * alignment is any lower. */
1802 p->shaderGroupBaseAlignment = RADV_RT_HANDLE_SIZE;
1803 p->shaderGroupHandleCaptureReplaySize = sizeof(struct radv_rt_capture_replay_handle);
1804 p->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
1805 p->shaderGroupHandleAlignment = 16;
1806 p->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
1807
1808 /* VK_EXT_shader_module_identifier */
1809 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1810 memcpy(p->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
1811 sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1812
1813 /* VK_KHR_performance_query */
1814 p->allowCommandBufferQueryCopies = false;
1815
1816 /* VK_NV_device_generated_commands */
1817 p->maxIndirectCommandsStreamCount = 1;
1818 p->maxIndirectCommandsStreamStride = UINT32_MAX;
1819 p->maxIndirectCommandsTokenCount = 512;
1820 p->maxIndirectCommandsTokenOffset = UINT16_MAX;
1821 p->minIndirectCommandsBufferOffsetAlignment = 4;
1822 p->minSequencesCountBufferOffsetAlignment = 4;
1823 p->minSequencesIndexBufferOffsetAlignment = 4;
1824 /* Don't support even a shader group count = 1 until we support shader
1825 * overrides during pipeline creation. */
1826 p->maxGraphicsShaderGroupCount = 0;
1827 /* MSB reserved for signalling indirect count enablement. */
1828 p->maxIndirectSequenceCount = UINT32_MAX >> 1;
1829
1830 /* VK_EXT_graphics_pipeline_library */
1831 p->graphicsPipelineLibraryFastLinking = true;
1832 p->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1833
1834 /* VK_EXT_mesh_shader */
1835 p->maxTaskWorkGroupTotalCount = 4194304; /* 2^22 min required */
1836 p->maxTaskWorkGroupCount[0] = 65535;
1837 p->maxTaskWorkGroupCount[1] = 65535;
1838 p->maxTaskWorkGroupCount[2] = 65535;
1839 p->maxTaskWorkGroupInvocations = 1024;
1840 p->maxTaskWorkGroupSize[0] = 1024;
1841 p->maxTaskWorkGroupSize[1] = 1024;
1842 p->maxTaskWorkGroupSize[2] = 1024;
1843 p->maxTaskPayloadSize = 16384; /* 16K min required */
1844 p->maxTaskSharedMemorySize = 65536;
1845 p->maxTaskPayloadAndSharedMemorySize = 65536;
1846
1847 p->maxMeshWorkGroupTotalCount = 4194304; /* 2^22 min required */
1848 p->maxMeshWorkGroupCount[0] = 65535;
1849 p->maxMeshWorkGroupCount[1] = 65535;
1850 p->maxMeshWorkGroupCount[2] = 65535;
1851 p->maxMeshWorkGroupInvocations = 256; /* Max NGG HW limit */
1852 p->maxMeshWorkGroupSize[0] = 256;
1853 p->maxMeshWorkGroupSize[1] = 256;
1854 p->maxMeshWorkGroupSize[2] = 256;
1855 p->maxMeshOutputMemorySize = 32 * 1024; /* 32K min required */
1856 p->maxMeshSharedMemorySize = 28672; /* 28K min required */
1857 p->maxMeshPayloadAndSharedMemorySize = p->maxTaskPayloadSize + p->maxMeshSharedMemorySize; /* 28K min required */
1858 p->maxMeshPayloadAndOutputMemorySize = p->maxTaskPayloadSize + p->maxMeshOutputMemorySize; /* 47K min required */
1859 p->maxMeshOutputComponents = 128; /* 32x vec4 min required */
1860 p->maxMeshOutputVertices = 256;
1861 p->maxMeshOutputPrimitives = 256;
1862 p->maxMeshOutputLayers = 8;
1863 p->maxMeshMultiviewViewCount = MAX_VIEWS;
1864 p->meshOutputPerVertexGranularity = 1;
1865 p->meshOutputPerPrimitiveGranularity = 1;
1866
1867 p->maxPreferredTaskWorkGroupInvocations = 64;
1868 p->maxPreferredMeshWorkGroupInvocations = 128;
1869 p->prefersLocalInvocationVertexOutput = true;
1870 p->prefersLocalInvocationPrimitiveOutput = true;
1871 p->prefersCompactVertexOutput = true;
1872 p->prefersCompactPrimitiveOutput = false;
1873
1874 /* VK_EXT_extended_dynamic_state3 */
1875 p->dynamicPrimitiveTopologyUnrestricted = false;
1876
1877 /* VK_EXT_descriptor_buffer */
1878 p->combinedImageSamplerDescriptorSingleArray = true;
1879 p->bufferlessPushDescriptors = true;
1880 p->allowSamplerImageViewPostSubmitCreation = false;
1881 p->descriptorBufferOffsetAlignment = 4;
1882 p->maxDescriptorBufferBindings = MAX_SETS;
1883 p->maxResourceDescriptorBufferBindings = MAX_SETS;
1884 p->maxSamplerDescriptorBufferBindings = MAX_SETS;
1885 p->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1886 p->maxEmbeddedImmutableSamplers = radv_max_descriptor_set_size();
1887 p->bufferCaptureReplayDescriptorDataSize = 0;
1888 p->imageCaptureReplayDescriptorDataSize = 0;
1889 p->imageViewCaptureReplayDescriptorDataSize = 0;
1890 p->samplerCaptureReplayDescriptorDataSize = 0;
1891 p->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1892 p->samplerDescriptorSize = 16;
1893 p->combinedImageSamplerDescriptorSize = 96;
1894 p->sampledImageDescriptorSize = 64;
1895 p->storageImageDescriptorSize = 32;
1896 p->uniformTexelBufferDescriptorSize = 16;
1897 p->robustUniformTexelBufferDescriptorSize = 16;
1898 p->storageTexelBufferDescriptorSize = 16;
1899 p->robustStorageTexelBufferDescriptorSize = 16;
1900 p->uniformBufferDescriptorSize = 16;
1901 p->robustUniformBufferDescriptorSize = 16;
1902 p->storageBufferDescriptorSize = 16;
1903 p->robustStorageBufferDescriptorSize = 16;
1904 p->inputAttachmentDescriptorSize = 64;
1905 p->accelerationStructureDescriptorSize = 16;
1906 p->maxSamplerDescriptorBufferRange = UINT32_MAX;
1907 p->maxResourceDescriptorBufferRange = UINT32_MAX;
1908 p->samplerDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1909 p->resourceDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1910 p->descriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1911
1912 /* VK_KHR_fragment_shader_barycentric */
1913 p->triStripVertexOrderIndependentOfProvokingVertex = false;
1914
1915 /* VK_EXT_pipeline_robustness */
1916 p->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1917 p->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1918 p->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1919 p->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1920
1921 /* VK_KHR_maintenance5 */
1922 p->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1923 p->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1924 p->depthStencilSwizzleOneSupport = true;
1925 p->polygonModePointSize = true;
1926 p->nonStrictSinglePixelWideLinesUseParallelogram = true;
1927 p->nonStrictWideLinesUseParallelogram = true;
1928
1929 /* VK_KHR_cooperative_matrix */
1930 p->cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
1931
1932 /* VK_KHR_maintenance6 */
1933 p->blockTexelViewCompatibleMultipleLayers = true;
1934 p->maxCombinedImageSamplerDescriptorCount = 1;
1935 p->fragmentShadingRateClampCombinerInputs = true;
1936
1937 /* VK_EXT_shader_object */
1938 radv_device_get_cache_uuid(pdev, p->shaderBinaryUUID);
1939 p->shaderBinaryVersion = 1;
1940
1941 /* VK_EXT_map_memory_placed */
1942 uint64_t os_page_size = 4096;
1943 os_get_page_size(&os_page_size);
1944 p->minPlacedMemoryMapAlignment = os_page_size;
1945
1946 /* VK_EXT_nested_command_buffer */
1947 p->maxCommandBufferNestingLevel = UINT32_MAX;
1948
1949 /* VK_EXT_legacy_vertex_attributes */
1950 p->nativeUnalignedPerformance = false;
1951
1952 /* VK_MESA_image_alignment_control */
1953 p->supportedImageAlignmentMask = (4 * 1024) | (64 * 1024);
1954 if (gfx11plus)
1955 p->supportedImageAlignmentMask |= 256 * 1024;
1956
1957 /* VK_KHR_maintenance7 */
1958 p->robustFragmentShadingRateAttachmentAccess = true;
1959 p->separateDepthStencilAttachmentAccess = true;
1960 p->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1961 p->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1962 p->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1963 p->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1964 p->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1965 p->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1966
1967 /* VK_KHR_pipeline_binary */
1968 p->pipelineBinaryInternalCache = true;
1969 p->pipelineBinaryInternalCacheControl = true;
1970 p->pipelineBinaryPrefersInternalCache = false;
1971 p->pipelineBinaryPrecompiledInternalCache = false;
1972 p->pipelineBinaryCompressedData = false;
1973
1974 /* VK_KHR_compute_shader_derivatives */
1975 p->meshAndTaskShaderDerivatives = radv_taskmesh_enabled(pdev);
1976 }
1977
1978 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** pdev_out)1979 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
1980 struct radv_physical_device **pdev_out)
1981 {
1982 VkResult result;
1983 int fd = -1;
1984 int master_fd = -1;
1985
1986 #ifdef _WIN32
1987 assert(drm_device == NULL);
1988 #else
1989 if (drm_device) {
1990 const char *path = drm_device->nodes[DRM_NODE_RENDER];
1991 drmVersionPtr version;
1992
1993 fd = open(path, O_RDWR | O_CLOEXEC);
1994 if (fd < 0) {
1995 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
1996 }
1997
1998 version = drmGetVersion(fd);
1999 if (!version) {
2000 close(fd);
2001
2002 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2003 "Could not get the kernel driver version for device %s: %m", path);
2004 }
2005
2006 if (strcmp(version->name, "amdgpu")) {
2007 drmFreeVersion(version);
2008 close(fd);
2009
2010 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2011 "Device '%s' is not using the AMDGPU kernel driver: %m", path);
2012 }
2013 drmFreeVersion(version);
2014
2015 if (instance->debug_flags & RADV_DEBUG_STARTUP)
2016 fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
2017 }
2018 #endif
2019
2020 struct radv_physical_device *pdev =
2021 vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*pdev), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2022 if (!pdev) {
2023 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2024 goto fail_fd;
2025 }
2026
2027 struct vk_physical_device_dispatch_table dispatch_table;
2028 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
2029 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
2030
2031 result = vk_physical_device_init(&pdev->vk, &instance->vk, NULL, NULL, NULL, &dispatch_table);
2032 if (result != VK_SUCCESS) {
2033 goto fail_alloc;
2034 }
2035
2036 #ifdef _WIN32
2037 pdev->ws = radv_null_winsys_create();
2038 #else
2039 if (drm_device) {
2040 bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2041
2042 pdev->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid);
2043 } else {
2044 pdev->ws = radv_null_winsys_create();
2045 }
2046 #endif
2047
2048 if (!pdev->ws) {
2049 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
2050 goto fail_base;
2051 }
2052
2053 pdev->vk.supported_sync_types = pdev->ws->get_sync_types(pdev->ws);
2054
2055 #ifndef _WIN32
2056 if (drm_device && instance->vk.enabled_extensions.KHR_display) {
2057 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
2058 if (master_fd >= 0) {
2059 uint32_t accel_working = 0;
2060 struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
2061 .return_size = sizeof(accel_working),
2062 .query = AMDGPU_INFO_ACCEL_WORKING};
2063
2064 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
2065 !accel_working) {
2066 close(master_fd);
2067 master_fd = -1;
2068 }
2069 }
2070 }
2071 #endif
2072
2073 pdev->master_fd = master_fd;
2074 pdev->local_fd = fd;
2075 pdev->ws->query_info(pdev->ws, &pdev->info);
2076
2077 pdev->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
2078 #if !AMD_LLVM_AVAILABLE
2079 if (pdev->use_llvm) {
2080 fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
2081 "enabled at build time.\n");
2082 abort();
2083 }
2084 #endif
2085
2086 #if DETECT_OS_ANDROID
2087 pdev->emulate_etc2 = !pdev->info.has_etc_support;
2088 pdev->emulate_astc = true;
2089 #else
2090 pdev->emulate_etc2 = !pdev->info.has_etc_support && instance->drirc.vk_require_etc2;
2091 pdev->emulate_astc = instance->drirc.vk_require_astc;
2092 #endif
2093
2094 snprintf(pdev->name, sizeof(pdev->name), "AMD RADV %s%s", pdev->info.name, radv_get_compiler_string(pdev));
2095
2096 const char *marketing_name = pdev->ws->get_chip_name(pdev->ws);
2097 snprintf(pdev->marketing_name, sizeof(pdev->name), "%s (RADV %s%s)", marketing_name ? marketing_name : "AMD Unknown",
2098 pdev->info.name, radv_get_compiler_string(pdev));
2099
2100 if (!radv_is_conformant(pdev))
2101 vk_warn_non_conformant_implementation("radv");
2102
2103 radv_get_driver_uuid(&pdev->driver_uuid);
2104 radv_get_device_uuid(&pdev->info, &pdev->device_uuid);
2105
2106 pdev->dcc_msaa_allowed = (instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
2107
2108 pdev->use_fmask = pdev->info.gfx_level < GFX11 && !(instance->debug_flags & RADV_DEBUG_NO_FMASK);
2109
2110 pdev->use_ngg = (pdev->info.gfx_level >= GFX10 && pdev->info.family != CHIP_NAVI14 &&
2111 !(instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
2112 pdev->info.gfx_level >= GFX11;
2113
2114 /* TODO: Investigate if NGG culling helps on GFX11. */
2115 pdev->use_ngg_culling = pdev->use_ngg && pdev->info.max_render_backends > 1 &&
2116 (pdev->info.gfx_level == GFX10_3 || (instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
2117 !(instance->debug_flags & RADV_DEBUG_NO_NGGC);
2118
2119 pdev->use_ngg_streamout = pdev->info.gfx_level >= GFX11;
2120
2121 pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
2122
2123 pdev->mesh_fast_launch_2 = pdev->info.gfx_level >= GFX11;
2124
2125 pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;
2126
2127 /* Determine the number of threads per wave for all stages. */
2128 pdev->cs_wave_size = 64;
2129 pdev->ps_wave_size = 64;
2130 pdev->ge_wave_size = 64;
2131 pdev->rt_wave_size = 64;
2132
2133 if (pdev->info.gfx_level >= GFX10) {
2134 if (instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
2135 pdev->cs_wave_size = 32;
2136
2137 /* For pixel shaders, wave64 is recommended. */
2138 if (instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
2139 pdev->ps_wave_size = 32;
2140
2141 if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
2142 pdev->ge_wave_size = 32;
2143
2144 /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
2145 * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
2146 * wave32 VOPD for VALU dependent code.
2147 * (as well as the SALU count becoming more problematic with wave32)
2148 */
2149 if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11)
2150 pdev->rt_wave_size = 32;
2151
2152 if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || instance->drirc.force_rt_wave64)
2153 pdev->rt_wave_size = 64;
2154 }
2155
2156 radv_probe_video_decode(pdev);
2157 radv_probe_video_encode(pdev);
2158
2159 pdev->max_shared_size = pdev->info.gfx_level >= GFX7 ? 65536 : 32768;
2160
2161 radv_physical_device_init_mem_types(pdev);
2162
2163 radv_physical_device_get_supported_extensions(pdev, &pdev->vk.supported_extensions);
2164 radv_physical_device_get_features(pdev, &pdev->vk.supported_features);
2165
2166 radv_get_nir_options(pdev);
2167
2168 #ifndef _WIN32
2169 if (drm_device) {
2170 struct stat primary_stat = {0}, render_stat = {0};
2171
2172 pdev->available_nodes = drm_device->available_nodes;
2173 pdev->bus_info = *drm_device->businfo.pci;
2174
2175 if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
2176 stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
2177 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
2178 drm_device->nodes[DRM_NODE_PRIMARY]);
2179 goto fail_perfcounters;
2180 }
2181 pdev->primary_devid = primary_stat.st_rdev;
2182
2183 if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
2184 stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
2185 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
2186 drm_device->nodes[DRM_NODE_RENDER]);
2187 goto fail_perfcounters;
2188 }
2189 pdev->render_devid = render_stat.st_rdev;
2190 }
2191 #endif
2192
2193 radv_physical_device_init_cache_key(pdev);
2194
2195 if (radv_device_get_cache_uuid(pdev, pdev->cache_uuid)) {
2196 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
2197 goto fail_wsi;
2198 }
2199
2200 /* The gpu id is already embedded in the uuid so we just pass "radv"
2201 * when creating the cache.
2202 */
2203 char buf[VK_UUID_SIZE * 2 + 1];
2204 mesa_bytes_to_hex(buf, pdev->cache_uuid, VK_UUID_SIZE);
2205 pdev->vk.disk_cache = disk_cache_create(pdev->name, buf, 0);
2206
2207 radv_get_physical_device_properties(pdev);
2208
2209 if ((instance->debug_flags & RADV_DEBUG_INFO))
2210 ac_print_gpu_info(&pdev->info, stdout);
2211
2212 radv_init_physical_device_decoder(pdev);
2213 radv_init_physical_device_encoder(pdev);
2214
2215 radv_physical_device_init_queue_table(pdev);
2216
2217 /* We don't check the error code, but later check if it is initialized. */
2218 ac_init_perfcounters(&pdev->info, false, false, &pdev->ac_perfcounters);
2219
2220 /* The WSI is structured as a layer on top of the driver, so this has
2221 * to be the last part of initialization (at least until we get other
2222 * semi-layers).
2223 */
2224 result = radv_init_wsi(pdev);
2225 if (result != VK_SUCCESS) {
2226 vk_error(instance, result);
2227 goto fail_perfcounters;
2228 }
2229
2230 pdev->gs_table_depth = ac_get_gs_table_depth(pdev->info.gfx_level, pdev->info.family);
2231
2232 ac_get_hs_info(&pdev->info, &pdev->hs);
2233 ac_get_task_info(&pdev->info, &pdev->task_info);
2234 radv_get_binning_settings(pdev, &pdev->binning_settings);
2235
2236 if (pdev->info.has_distributed_tess) {
2237 if (pdev->info.family == CHIP_FIJI || pdev->info.family >= CHIP_POLARIS10)
2238 pdev->tess_distribution_mode = V_028B6C_TRAPEZOIDS;
2239 else
2240 pdev->tess_distribution_mode = V_028B6C_DONUTS;
2241 } else {
2242 pdev->tess_distribution_mode = V_028B6C_NO_DIST;
2243 }
2244
2245 *pdev_out = pdev;
2246
2247 return VK_SUCCESS;
2248
2249 fail_perfcounters:
2250 ac_destroy_perfcounters(&pdev->ac_perfcounters);
2251 disk_cache_destroy(pdev->vk.disk_cache);
2252 fail_wsi:
2253 pdev->ws->destroy(pdev->ws);
2254 fail_base:
2255 vk_physical_device_finish(&pdev->vk);
2256 fail_alloc:
2257 vk_free(&instance->vk.alloc, pdev);
2258 fail_fd:
2259 if (fd != -1)
2260 close(fd);
2261 if (master_fd != -1)
2262 close(master_fd);
2263 return result;
2264 }
2265
2266 VkResult
create_null_physical_device(struct vk_instance * vk_instance)2267 create_null_physical_device(struct vk_instance *vk_instance)
2268 {
2269 struct radv_instance *instance = container_of(vk_instance, struct radv_instance, vk);
2270 struct radv_physical_device *pdev;
2271
2272 VkResult result = radv_physical_device_try_create(instance, NULL, &pdev);
2273 if (result != VK_SUCCESS)
2274 return result;
2275
2276 list_addtail(&pdev->vk.link, &instance->vk.physical_devices.list);
2277 return VK_SUCCESS;
2278 }
2279
2280 VkResult
create_drm_physical_device(struct vk_instance * vk_instance,struct _drmDevice * device,struct vk_physical_device ** out)2281 create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
2282 {
2283 #ifndef _WIN32
2284 if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI ||
2285 device->deviceinfo.pci->vendor_id != ATI_VENDOR_ID)
2286 return VK_ERROR_INCOMPATIBLE_DRIVER;
2287
2288 return radv_physical_device_try_create((struct radv_instance *)vk_instance, device,
2289 (struct radv_physical_device **)out);
2290 #else
2291 return VK_SUCCESS;
2292 #endif
2293 }
2294
2295 void
radv_physical_device_destroy(struct vk_physical_device * vk_device)2296 radv_physical_device_destroy(struct vk_physical_device *vk_device)
2297 {
2298 struct radv_physical_device *pdev = container_of(vk_device, struct radv_physical_device, vk);
2299 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2300
2301 radv_finish_wsi(pdev);
2302 ac_destroy_perfcounters(&pdev->ac_perfcounters);
2303 pdev->ws->destroy(pdev->ws);
2304 disk_cache_destroy(pdev->vk.disk_cache);
2305 if (pdev->local_fd != -1)
2306 close(pdev->local_fd);
2307 if (pdev->master_fd != -1)
2308 close(pdev->master_fd);
2309 vk_physical_device_finish(&pdev->vk);
2310 vk_free(&instance->vk.alloc, pdev);
2311 }
2312
2313 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdev,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2314 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdev, uint32_t *pCount,
2315 VkQueueFamilyProperties **pQueueFamilyProperties)
2316 {
2317 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2318 int num_queue_families = 1;
2319 int idx;
2320 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2321 num_queue_families++;
2322
2323 if (pdev->video_decode_enabled) {
2324 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0)
2325 num_queue_families++;
2326 }
2327
2328 if (radv_transfer_queue_enabled(pdev)) {
2329 num_queue_families++;
2330 }
2331
2332 if (pdev->video_encode_enabled) {
2333 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0)
2334 num_queue_families++;
2335 }
2336
2337 if (radv_sparse_queue_enabled(pdev)) {
2338 num_queue_families++;
2339 }
2340
2341 if (pQueueFamilyProperties == NULL) {
2342 *pCount = num_queue_families;
2343 return;
2344 }
2345
2346 if (!*pCount)
2347 return;
2348
2349 idx = 0;
2350 if (*pCount >= 1) {
2351 VkQueueFlags gfx_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2352 if (!radv_sparse_queue_enabled(pdev))
2353 gfx_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2354 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2355 .queueFlags = gfx_flags,
2356 .queueCount = 1,
2357 .timestampValidBits = 64,
2358 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2359 };
2360 idx++;
2361 }
2362
2363 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2364 VkQueueFlags compute_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2365 if (!radv_sparse_queue_enabled(pdev))
2366 compute_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2367 if (*pCount > idx) {
2368 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2369 .queueFlags = compute_flags,
2370 .queueCount = pdev->info.ip[AMD_IP_COMPUTE].num_queues,
2371 .timestampValidBits = 64,
2372 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2373 };
2374 idx++;
2375 }
2376 }
2377
2378 if (pdev->video_decode_enabled) {
2379 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
2380 if (*pCount > idx) {
2381 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2382 .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2383 .queueCount = pdev->info.ip[pdev->vid_decode_ip].num_queues,
2384 .timestampValidBits = 0,
2385 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2386 };
2387 idx++;
2388 }
2389 }
2390 }
2391
2392 if (radv_transfer_queue_enabled(pdev)) {
2393 if (*pCount > idx) {
2394 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2395 .queueFlags = VK_QUEUE_TRANSFER_BIT,
2396 .queueCount = pdev->info.ip[AMD_IP_SDMA].num_queues,
2397 .timestampValidBits = 64,
2398 .minImageTransferGranularity = (VkExtent3D){16, 16, 8},
2399 };
2400 idx++;
2401 }
2402 }
2403
2404 if (pdev->video_encode_enabled) {
2405 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
2406 if (*pCount > idx) {
2407 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2408 .queueFlags = VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
2409 .queueCount = pdev->info.ip[AMD_IP_VCN_ENC].num_queues,
2410 .timestampValidBits = 0,
2411 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2412 };
2413 idx++;
2414 }
2415 }
2416 }
2417
2418 if (radv_sparse_queue_enabled(pdev)) {
2419 if (*pCount > idx) {
2420 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2421 .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
2422 .queueCount = 1,
2423 .timestampValidBits = 64,
2424 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2425 };
2426 idx++;
2427 }
2428 }
2429
2430 *pCount = idx;
2431 }
2432
2433 static const VkQueueGlobalPriorityKHR radv_global_queue_priorities[] = {
2434 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2435 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2436 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2437 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2438 };
2439
2440 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2441 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2442 VkQueueFamilyProperties2 *pQueueFamilyProperties)
2443 {
2444 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2445 if (!pQueueFamilyProperties) {
2446 radv_get_physical_device_queue_family_properties(pdev, pCount, NULL);
2447 return;
2448 }
2449 VkQueueFamilyProperties *properties[] = {
2450 &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
2451 &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
2452 &pQueueFamilyProperties[4].queueFamilyProperties, &pQueueFamilyProperties[5].queueFamilyProperties,
2453 };
2454 radv_get_physical_device_queue_family_properties(pdev, pCount, properties);
2455 assert(*pCount <= 6);
2456
2457 for (uint32_t i = 0; i < *pCount; i++) {
2458 vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
2459 switch (ext->sType) {
2460 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2461 VkQueueFamilyGlobalPriorityPropertiesKHR *prop = (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2462 STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
2463 prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2464 memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2465 break;
2466 }
2467 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2468 VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2469 prop->queryResultStatusSupport = VK_FALSE;
2470 break;
2471 }
2472 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2473 VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
2474 prop->videoCodecOperations = 0;
2475 if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2476 if (VIDEO_CODEC_H264DEC)
2477 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
2478 if (VIDEO_CODEC_H265DEC)
2479 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2480 if (VIDEO_CODEC_AV1DEC && pdev->info.vcn_ip_version >= VCN_3_0_0 &&
2481 pdev->info.vcn_ip_version != VCN_3_0_33)
2482 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2483 }
2484 if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2485 if (VIDEO_CODEC_H264ENC)
2486 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR;
2487 if (VIDEO_CODEC_H265ENC)
2488 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2489 }
2490 break;
2491 }
2492 default:
2493 break;
2494 }
2495 }
2496 }
2497 }
2498
2499 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2500 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2501 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2502 {
2503 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2504 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2505 VkPhysicalDeviceMemoryProperties *memory_properties = &pdev->memory_properties;
2506
2507 /* For all memory heaps, the computation of budget is as follow:
2508 * heap_budget = heap_size - global_heap_usage + app_heap_usage
2509 *
2510 * The Vulkan spec 1.1.97 says that the budget should include any
2511 * currently allocated device memory.
2512 *
2513 * Note that the application heap usages are not really accurate (eg.
2514 * in presence of shared buffers).
2515 */
2516 if (!pdev->info.has_dedicated_vram) {
2517 if (instance->drirc.enable_unified_heap_on_apu) {
2518 /* When the heaps are unified, only the visible VRAM heap is exposed on APUs. */
2519 assert(pdev->heaps == RADV_HEAP_VRAM_VIS);
2520 assert(pdev->memory_properties.memoryHeaps[0].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2521 const uint8_t vram_vis_heap_idx = 0;
2522
2523 /* Get the total heap size which is the visible VRAM heap size. */
2524 uint64_t total_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2525
2526 /* Get the different memory usages. */
2527 uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2528 pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2529 uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2530 uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2531 uint64_t total_system_usage =
2532 pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2533 uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2534
2535 /* Compute the total free space that can be allocated for this process across all heaps. */
2536 uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2537
2538 memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
2539 memoryBudget->heapUsage[vram_vis_heap_idx] = total_internal_usage;
2540 } else {
2541 /* On APUs, the driver exposes fake heaps to the application because usually the carveout
2542 * is too small for games but the budgets need to be redistributed accordingly.
2543 */
2544 assert(pdev->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2545 assert(pdev->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2546 assert(pdev->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2547 const uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2548
2549 /* Get the visible VRAM/GTT heap sizes and internal usages. */
2550 uint64_t gtt_heap_size = pdev->memory_properties.memoryHeaps[gtt_heap_idx].size;
2551 uint64_t vram_vis_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2552
2553 uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2554 pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2555 uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2556
2557 /* Compute the total heap size, internal and system usage. */
2558 uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2559 uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2560 uint64_t total_system_usage =
2561 pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2562
2563 uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2564
2565 /* Compute the total free space that can be allocated for this process across all heaps. */
2566 uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2567
2568 /* Compute the remaining visible VRAM size for this process. */
2569 uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2570
2571 /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
2572 * sizes, and align down to the page size to be conservative.
2573 */
2574 vram_vis_free_space =
2575 ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), pdev->info.gart_page_size);
2576 uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2577
2578 memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2579 memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2580 memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2581 memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2582 }
2583 } else {
2584 unsigned mask = pdev->heaps;
2585 unsigned heap = 0;
2586 while (mask) {
2587 uint64_t internal_usage = 0, system_usage = 0;
2588 unsigned type = 1u << u_bit_scan(&mask);
2589
2590 switch (type) {
2591 case RADV_HEAP_VRAM:
2592 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2593 system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_USAGE);
2594 break;
2595 case RADV_HEAP_VRAM_VIS:
2596 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS);
2597 if (!(pdev->heaps & RADV_HEAP_VRAM))
2598 internal_usage += pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2599 system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE);
2600 break;
2601 case RADV_HEAP_GTT:
2602 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2603 system_usage = pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2604 break;
2605 }
2606
2607 uint64_t total_usage = MAX2(internal_usage, system_usage);
2608
2609 uint64_t free_space = pdev->memory_properties.memoryHeaps[heap].size -
2610 MIN2(pdev->memory_properties.memoryHeaps[heap].size, total_usage);
2611 memoryBudget->heapBudget[heap] = free_space + internal_usage;
2612 memoryBudget->heapUsage[heap] = internal_usage;
2613 ++heap;
2614 }
2615
2616 assert(heap == memory_properties->memoryHeapCount);
2617 }
2618
2619 /* The heapBudget value must be less than or equal to VkMemoryHeap::size for each heap. */
2620 for (uint32_t i = 0; i < memory_properties->memoryHeapCount; i++) {
2621 memoryBudget->heapBudget[i] = MIN2(memory_properties->memoryHeaps[i].size, memoryBudget->heapBudget[i]);
2622 }
2623
2624 /* The heapBudget and heapUsage values must be zero for array elements
2625 * greater than or equal to
2626 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2627 */
2628 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2629 memoryBudget->heapBudget[i] = 0;
2630 memoryBudget->heapUsage[i] = 0;
2631 }
2632 }
2633
2634 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2635 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2636 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2637 {
2638 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2639
2640 pMemoryProperties->memoryProperties = pdev->memory_properties;
2641
2642 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2643 vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2644 if (memory_budget)
2645 radv_get_memory_budget_properties(physicalDevice, memory_budget);
2646 }
2647
2648 static const VkTimeDomainKHR radv_time_domains[] = {
2649 VK_TIME_DOMAIN_DEVICE_KHR,
2650 VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
2651 #ifdef CLOCK_MONOTONIC_RAW
2652 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
2653 #endif
2654 };
2655
2656 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)2657 radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice, uint32_t *pTimeDomainCount,
2658 VkTimeDomainKHR *pTimeDomains)
2659 {
2660 int d;
2661 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
2662
2663 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
2664 vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
2665 {
2666 *i = radv_time_domains[d];
2667 }
2668 }
2669
2670 return vk_outarray_status(&out);
2671 }
2672
2673 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2674 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
2675 VkMultisamplePropertiesEXT *pMultisampleProperties)
2676 {
2677 VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2678
2679 if (samples & supported_samples) {
2680 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2681 } else {
2682 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
2683 }
2684 }
2685
2686 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2687 radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
2688 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
2689 {
2690 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
2691 pFragmentShadingRateCount);
2692
2693 #define append_rate(w, h, s) \
2694 { \
2695 VkPhysicalDeviceFragmentShadingRateKHR rate = { \
2696 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
2697 .sampleCounts = s, \
2698 .fragmentSize = {.width = w, .height = h}, \
2699 }; \
2700 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
2701 }
2702
2703 for (uint32_t x = 2; x >= 1; x--) {
2704 for (uint32_t y = 2; y >= 1; y--) {
2705 VkSampleCountFlagBits samples;
2706
2707 if (x == 1 && y == 1) {
2708 samples = ~0;
2709 } else {
2710 samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2711 }
2712
2713 append_rate(x, y, samples);
2714 }
2715 }
2716 #undef append_rate
2717
2718 return vk_outarray_status(&out);
2719 }
2720
2721 /* VK_EXT_tooling_info */
2722 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice,uint32_t * pToolCount,VkPhysicalDeviceToolProperties * pToolProperties)2723 radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, uint32_t *pToolCount,
2724 VkPhysicalDeviceToolProperties *pToolProperties)
2725 {
2726 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2727 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2728 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount);
2729 bool rgp_enabled, rmv_enabled, rra_enabled;
2730 uint32_t tool_count = 0;
2731
2732 /* RGP */
2733 rgp_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2734 if (rgp_enabled)
2735 tool_count++;
2736
2737 /* RMV */
2738 rmv_enabled = instance->vk.trace_mode & VK_TRACE_MODE_RMV;
2739 if (rmv_enabled)
2740 tool_count++;
2741
2742 /* RRA */
2743 rra_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RRA;
2744 if (rra_enabled)
2745 tool_count++;
2746
2747 if (!pToolProperties) {
2748 *pToolCount = tool_count;
2749 return VK_SUCCESS;
2750 }
2751
2752 if (rgp_enabled) {
2753 VkPhysicalDeviceToolProperties tool = {
2754 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2755 .name = "Radeon GPU Profiler",
2756 .version = "1.15",
2757 .description = "A ground-breaking low-level optimization tool that provides detailed "
2758 "information on Radeon GPUs.",
2759 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT |
2760 /* VK_EXT_debug_marker is only exposed if SQTT is enabled. */
2761 VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT | VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT,
2762 };
2763 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2764 }
2765
2766 if (rmv_enabled) {
2767 VkPhysicalDeviceToolProperties tool = {
2768 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2769 .name = "Radeon Memory Visualizer",
2770 .version = "1.6",
2771 .description = "A tool to allow you to gain a deep understanding of how your application "
2772 "uses memory for graphics resources.",
2773 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2774 };
2775 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2776 }
2777
2778 if (rra_enabled) {
2779 VkPhysicalDeviceToolProperties tool = {
2780 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2781 .name = "Radeon Raytracing Analyzer",
2782 .version = "1.2",
2783 .description = "A tool to investigate the performance of your ray tracing applications and "
2784 "highlight potential bottlenecks.",
2785 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2786 };
2787 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2788 }
2789
2790 return vk_outarray_status(&out);
2791 }
2792
2793 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2794 radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
2795 VkCooperativeMatrixPropertiesKHR *pProperties)
2796 {
2797 VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2798
2799 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2800 {
2801 *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2802 .MSize = 16,
2803 .NSize = 16,
2804 .KSize = 16,
2805 .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2806 .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2807 .CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2808 .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2809 .saturatingAccumulation = false,
2810 .scope = VK_SCOPE_SUBGROUP_KHR};
2811 }
2812
2813 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2814 {
2815 *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2816 .MSize = 16,
2817 .NSize = 16,
2818 .KSize = 16,
2819 .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2820 .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2821 .CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2822 .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2823 .saturatingAccumulation = false,
2824 .scope = VK_SCOPE_SUBGROUP_KHR};
2825 }
2826
2827 for (unsigned asigned = 0; asigned < 2; asigned++) {
2828 for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
2829 for (unsigned csigned = 0; csigned < 2; csigned++) {
2830 for (unsigned saturate = 0; saturate < 2; saturate++) {
2831 if (!csigned && saturate)
2832 continue; /* The HW only supports signed acc. */
2833 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2834 {
2835 *p = (struct VkCooperativeMatrixPropertiesKHR){
2836 .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2837 .MSize = 16,
2838 .NSize = 16,
2839 .KSize = 16,
2840 .AType = asigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2841 .BType = bsigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2842 .CType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2843 .ResultType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2844 .saturatingAccumulation = saturate,
2845 .scope = VK_SCOPE_SUBGROUP_KHR};
2846 }
2847 }
2848 }
2849 }
2850 }
2851
2852 return vk_outarray_status(&out);
2853 }
2854