xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_physical_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include <fcntl.h>
12 
13 #ifdef MAJOR_IN_SYSMACROS
14 #include <sys/sysmacros.h>
15 #endif
16 
17 #include "vk_log.h"
18 #include "vk_shader_module.h"
19 
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/u_debug.h"
23 #include "radv_android.h"
24 #include "radv_debug.h"
25 #include "radv_entrypoints.h"
26 #include "radv_instance.h"
27 #include "radv_physical_device.h"
28 #include "radv_pipeline_rt.h"
29 #include "radv_video.h"
30 #include "radv_wsi.h"
31 
32 #ifdef _WIN32
33 typedef void *drmDevicePtr;
34 #include <io.h>
35 #else
36 #include <amdgpu.h>
37 #include <xf86drm.h>
38 #include "drm-uapi/amdgpu_drm.h"
39 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
40 #endif
41 #include "winsys/null/radv_null_winsys_public.h"
42 #include "git_sha1.h"
43 
44 #if AMD_LLVM_AVAILABLE
45 #include "ac_llvm_util.h"
46 #endif
47 
48 #ifdef _WIN32
49 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
50 #else
51 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
52 #endif
53 
54 static bool
radv_perf_query_supported(const struct radv_physical_device * pdev)55 radv_perf_query_supported(const struct radv_physical_device *pdev)
56 {
57    const struct radv_instance *instance = radv_physical_device_instance(pdev);
58 
59    /* SQTT / SPM interfere with the register states for perf counters, and
60     * the code has only been tested on GFX10.3 */
61    return pdev->info.gfx_level == GFX10_3 && !(instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
62 }
63 
64 static bool
radv_taskmesh_enabled(const struct radv_physical_device * pdev)65 radv_taskmesh_enabled(const struct radv_physical_device *pdev)
66 {
67    const struct radv_instance *instance = radv_physical_device_instance(pdev);
68 
69    if (instance->debug_flags & RADV_DEBUG_NO_MESH_SHADER)
70       return false;
71 
72    return pdev->use_ngg && !pdev->use_llvm && pdev->info.gfx_level >= GFX10_3 &&
73           !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdev->info.has_gang_submit;
74 }
75 
76 static bool
radv_transfer_queue_enabled(const struct radv_physical_device * pdev)77 radv_transfer_queue_enabled(const struct radv_physical_device *pdev)
78 {
79    const struct radv_instance *instance = radv_physical_device_instance(pdev);
80 
81    /* Check if the GPU has SDMA support and transfer queues are allowed. */
82    if (pdev->info.sdma_ip_version == SDMA_UNKNOWN || !pdev->info.ip[AMD_IP_SDMA].num_queues ||
83        !(instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
84       return false;
85 
86    return pdev->info.gfx_level >= GFX9;
87 }
88 
89 static bool
radv_vrs_attachment_enabled(const struct radv_physical_device * pdev)90 radv_vrs_attachment_enabled(const struct radv_physical_device *pdev)
91 {
92    const struct radv_instance *instance = radv_physical_device_instance(pdev);
93    return pdev->info.gfx_level >= GFX11 || !(instance->debug_flags & RADV_DEBUG_NO_HIZ);
94 }
95 
96 static bool
radv_calibrated_timestamps_enabled(const struct radv_physical_device * pdev)97 radv_calibrated_timestamps_enabled(const struct radv_physical_device *pdev)
98 {
99    return RADV_SUPPORT_CALIBRATED_TIMESTAMPS && !(pdev->info.family == CHIP_RAVEN || pdev->info.family == CHIP_RAVEN2);
100 }
101 
102 static bool
radv_filter_minmax_enabled(const struct radv_physical_device * pdev)103 radv_filter_minmax_enabled(const struct radv_physical_device *pdev)
104 {
105    /* Tahiti and Verde only: reduction mode is unsupported due to a bug
106     * (it might work sometimes, but that's not enough)
107     */
108    return !(pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_VERDE);
109 }
110 
111 bool
radv_enable_rt(const struct radv_physical_device * pdev,bool rt_pipelines)112 radv_enable_rt(const struct radv_physical_device *pdev, bool rt_pipelines)
113 {
114    if (pdev->info.gfx_level < GFX10_3 && !radv_emulate_rt(pdev))
115       return false;
116 
117    if (rt_pipelines && pdev->use_llvm)
118       return false;
119 
120    return true;
121 }
122 
123 bool
radv_emulate_rt(const struct radv_physical_device * pdev)124 radv_emulate_rt(const struct radv_physical_device *pdev)
125 {
126    const struct radv_instance *instance = radv_physical_device_instance(pdev);
127    return instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
128 }
129 
130 static bool
radv_is_conformant(const struct radv_physical_device * pdev)131 radv_is_conformant(const struct radv_physical_device *pdev)
132 {
133    return pdev->info.gfx_level >= GFX8 && pdev->info.gfx_level <= GFX10_3;
134 }
135 
136 static void
parse_hex(char * out,const char * in,unsigned length)137 parse_hex(char *out, const char *in, unsigned length)
138 {
139    for (unsigned i = 0; i < length; ++i)
140       out[i] = 0;
141 
142    for (unsigned i = 0; i < 2 * length; ++i) {
143       unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
144       out[i / 2] |= v << (4 * (1 - i % 2));
145    }
146 }
147 
148 static void
radv_physical_device_init_cache_key(struct radv_physical_device * pdev)149 radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
150 {
151    const struct radv_instance *instance = radv_physical_device_instance(pdev);
152    struct radv_physical_device_cache_key *key = &pdev->cache_key;
153 
154    key->family = pdev->info.family;
155    key->ptr_size = sizeof(void *);
156    key->conformant_trunc_coord = pdev->info.conformant_trunc_coord;
157 
158    key->clear_lds = instance->drirc.clear_lds;
159    key->cs_wave32 = pdev->cs_wave_size == 32;
160    key->disable_aniso_single_level = instance->drirc.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
161    key->disable_shrink_image_store = instance->drirc.disable_shrink_image_store;
162    key->disable_sinking_load_input_fs = instance->drirc.disable_sinking_load_input_fs;
163    key->dual_color_blend_by_location = instance->drirc.dual_color_blend_by_location;
164    key->emulate_rt = !!(instance->perftest_flags & RADV_PERFTEST_EMULATE_RT);
165    key->ge_wave32 = pdev->ge_wave_size == 32;
166    key->invariant_geom = !!(instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
167    key->lower_discard_to_demote = !!(instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
168    key->no_fmask = !!(instance->debug_flags & RADV_DEBUG_NO_FMASK);
169    key->no_ngg_gs = !!(instance->debug_flags & RADV_DEBUG_NO_NGG_GS);
170    key->no_rt = !!(instance->debug_flags & RADV_DEBUG_NO_RT);
171    key->ps_wave32 = pdev->ps_wave_size == 32;
172    key->rt_wave64 = pdev->rt_wave_size == 64;
173    key->split_fma = !!(instance->debug_flags & RADV_DEBUG_SPLIT_FMA);
174    key->ssbo_non_uniform = instance->drirc.ssbo_non_uniform;
175    key->tex_non_uniform = instance->drirc.tex_non_uniform;
176    key->use_llvm = pdev->use_llvm;
177    key->use_ngg = pdev->use_ngg;
178    key->use_ngg_culling = pdev->use_ngg_culling;
179 }
180 
181 static int
radv_device_get_cache_uuid(struct radv_physical_device * pdev,void * uuid)182 radv_device_get_cache_uuid(struct radv_physical_device *pdev, void *uuid)
183 {
184    struct mesa_sha1 ctx;
185    unsigned char sha1[20];
186 
187    memset(uuid, 0, VK_UUID_SIZE);
188    _mesa_sha1_init(&ctx);
189 
190 #ifdef RADV_BUILD_ID_OVERRIDE
191    {
192       unsigned size = strlen(RADV_BUILD_ID_OVERRIDE) / 2;
193       char *data = alloca(size);
194       parse_hex(data, RADV_BUILD_ID_OVERRIDE, size);
195       _mesa_sha1_update(&ctx, data, size);
196    }
197 #else
198    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx))
199       return -1;
200 #endif
201 
202 #if AMD_LLVM_AVAILABLE
203    if (pdev->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
204       return -1;
205 #endif
206 
207    _mesa_sha1_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key));
208    _mesa_sha1_final(&ctx, sha1);
209 
210    memcpy(uuid, sha1, VK_UUID_SIZE);
211    return 0;
212 }
213 
214 static void
radv_get_driver_uuid(void * uuid)215 radv_get_driver_uuid(void *uuid)
216 {
217    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
218 }
219 
220 static void
radv_get_device_uuid(const struct radeon_info * gpu_info,void * uuid)221 radv_get_device_uuid(const struct radeon_info *gpu_info, void *uuid)
222 {
223    ac_compute_device_uuid(gpu_info, uuid, VK_UUID_SIZE);
224 }
225 
226 static void
radv_physical_device_init_queue_table(struct radv_physical_device * pdev)227 radv_physical_device_init_queue_table(struct radv_physical_device *pdev)
228 {
229    const struct radv_instance *instance = radv_physical_device_instance(pdev);
230    int idx = 0;
231    pdev->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
232    idx++;
233 
234    for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
235       pdev->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
236 
237    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
238       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
239       idx++;
240    }
241 
242    if (pdev->video_decode_enabled) {
243       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
244          pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_DEC;
245          idx++;
246       }
247    }
248 
249    if (radv_transfer_queue_enabled(pdev)) {
250       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
251       idx++;
252    }
253 
254    if (pdev->video_encode_enabled) {
255       if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
256          pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_ENC;
257          idx++;
258       }
259    }
260 
261    if (radv_sparse_queue_enabled(pdev)) {
262       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_SPARSE;
263       idx++;
264    }
265 
266    pdev->num_queues = idx;
267 }
268 
269 enum radv_heap {
270    RADV_HEAP_VRAM = 1 << 0,
271    RADV_HEAP_GTT = 1 << 1,
272    RADV_HEAP_VRAM_VIS = 1 << 2,
273    RADV_HEAP_MAX = 1 << 3,
274 };
275 
276 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * pdev)277 radv_get_adjusted_vram_size(struct radv_physical_device *pdev)
278 {
279    const struct radv_instance *instance = radv_physical_device_instance(pdev);
280    int ov = instance->drirc.override_vram_size;
281    if (ov >= 0)
282       return MIN2((uint64_t)pdev->info.vram_size_kb * 1024, (uint64_t)ov << 20);
283    return (uint64_t)pdev->info.vram_size_kb * 1024;
284 }
285 
286 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * pdev)287 radv_get_visible_vram_size(struct radv_physical_device *pdev)
288 {
289    return MIN2(radv_get_adjusted_vram_size(pdev), (uint64_t)pdev->info.vram_vis_size_kb * 1024);
290 }
291 
292 static uint64_t
radv_get_vram_size(struct radv_physical_device * pdev)293 radv_get_vram_size(struct radv_physical_device *pdev)
294 {
295    uint64_t total_size = radv_get_adjusted_vram_size(pdev);
296    return total_size - MIN2(total_size, (uint64_t)pdev->info.vram_vis_size_kb * 1024);
297 }
298 
299 static void
radv_physical_device_init_mem_types(struct radv_physical_device * pdev)300 radv_physical_device_init_mem_types(struct radv_physical_device *pdev)
301 {
302    const struct radv_instance *instance = radv_physical_device_instance(pdev);
303    uint64_t visible_vram_size = radv_get_visible_vram_size(pdev);
304    uint64_t vram_size = radv_get_vram_size(pdev);
305    uint64_t gtt_size = (uint64_t)pdev->info.gart_size_kb * 1024;
306    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
307 
308    pdev->memory_properties.memoryHeapCount = 0;
309    pdev->heaps = 0;
310 
311    if (!pdev->info.has_dedicated_vram) {
312       const uint64_t total_size = gtt_size + visible_vram_size;
313 
314       if (instance->drirc.enable_unified_heap_on_apu) {
315          /* Some applications seem better when the driver exposes only one heap of VRAM on APUs. */
316          visible_vram_size = total_size;
317          gtt_size = 0;
318       } else {
319          /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
320           * greater than it. To workaround this, we compute the total available memory size (GTT +
321           * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
322           */
323          visible_vram_size = align64((total_size * 2) / 3, pdev->info.gart_page_size);
324          gtt_size = total_size - visible_vram_size;
325       }
326 
327       vram_size = 0;
328    }
329 
330    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
331     * remainder above visible VRAM. */
332    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
333       vram_index = pdev->memory_properties.memoryHeapCount++;
334       pdev->heaps |= RADV_HEAP_VRAM;
335       pdev->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
336          .size = vram_size,
337          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
338       };
339    }
340 
341    if (gtt_size > 0) {
342       gart_index = pdev->memory_properties.memoryHeapCount++;
343       pdev->heaps |= RADV_HEAP_GTT;
344       pdev->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
345          .size = gtt_size,
346          .flags = 0,
347       };
348    }
349 
350    if (visible_vram_size) {
351       visible_vram_index = pdev->memory_properties.memoryHeapCount++;
352       pdev->heaps |= RADV_HEAP_VRAM_VIS;
353       pdev->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
354          .size = visible_vram_size,
355          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
356       };
357    }
358 
359    unsigned type_count = 0;
360 
361    if (vram_index >= 0 || visible_vram_index >= 0) {
362       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
363       pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
364       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
365          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
366          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
367       };
368 
369       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
370       pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT;
371       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
372          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
373          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
374       };
375    }
376 
377    if (gart_index >= 0) {
378       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
379       pdev->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
380       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
381          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
382          .heapIndex = gart_index,
383       };
384    }
385    if (visible_vram_index >= 0) {
386       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
387       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
388       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
389          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
390                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
391          .heapIndex = visible_vram_index,
392       };
393 
394       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
395       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
396       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
397          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
398                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
399          .heapIndex = visible_vram_index,
400       };
401    }
402 
403    if (gart_index >= 0) {
404       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
405       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
406       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
407          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
408                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
409          .heapIndex = gart_index,
410       };
411 
412       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
413       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
414       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
415          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
416                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
417          .heapIndex = gart_index,
418       };
419    }
420    pdev->memory_properties.memoryTypeCount = type_count;
421 
422    if (pdev->info.has_l2_uncached) {
423       for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
424          VkMemoryType mem_type = pdev->memory_properties.memoryTypes[i];
425 
426          if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
427               mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
428              !(pdev->memory_flags[i] & RADEON_FLAG_32BIT)) {
429 
430             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
431                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
432 
433             pdev->memory_domains[type_count] = pdev->memory_domains[i];
434             pdev->memory_flags[type_count] = pdev->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
435             pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
436                .propertyFlags = property_flags,
437                .heapIndex = mem_type.heapIndex,
438             };
439          }
440       }
441       pdev->memory_properties.memoryTypeCount = type_count;
442    }
443 
444    for (unsigned i = 0; i < type_count; ++i) {
445       if (pdev->memory_flags[i] & RADEON_FLAG_32BIT)
446          pdev->memory_types_32bit |= BITFIELD_BIT(i);
447    }
448 }
449 
450 uint32_t
radv_find_memory_index(const struct radv_physical_device * pdev,VkMemoryPropertyFlags flags)451 radv_find_memory_index(const struct radv_physical_device *pdev, VkMemoryPropertyFlags flags)
452 {
453    const VkPhysicalDeviceMemoryProperties *mem_properties = &pdev->memory_properties;
454    for (uint32_t i = 0; i < mem_properties->memoryTypeCount; ++i) {
455       if (mem_properties->memoryTypes[i].propertyFlags == flags) {
456          return i;
457       }
458    }
459    unreachable("invalid memory properties");
460 }
461 
462 static void
radv_get_binning_settings(const struct radv_physical_device * pdev,struct radv_binning_settings * settings)463 radv_get_binning_settings(const struct radv_physical_device *pdev, struct radv_binning_settings *settings)
464 {
465    if ((pdev->info.has_dedicated_vram && pdev->info.max_render_backends > 4) || pdev->info.gfx_level >= GFX10) {
466       /* Using higher settings on GFX10+ can cause random GPU hangs. */
467       settings->context_states_per_bin = 1;
468       settings->persistent_states_per_bin = 1;
469    } else {
470       settings->context_states_per_bin = pdev->info.has_gfx9_scissor_bug ? 1 : 3;
471       settings->persistent_states_per_bin = 1;
472    }
473 
474    settings->fpovs_per_batch = 63;
475 }
476 
477 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * pdev,struct vk_device_extension_table * out_ext)478 radv_physical_device_get_supported_extensions(const struct radv_physical_device *pdev,
479                                               struct vk_device_extension_table *out_ext)
480 {
481    const struct radv_instance *instance = radv_physical_device_instance(pdev);
482    const struct vk_device_extension_table ext = {
483       .KHR_8bit_storage = true,
484       .KHR_16bit_storage = true,
485       .KHR_acceleration_structure = radv_enable_rt(pdev, false),
486       .KHR_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
487       .KHR_compute_shader_derivatives = true,
488       .KHR_cooperative_matrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
489       .KHR_bind_memory2 = true,
490       .KHR_buffer_device_address = true,
491       .KHR_copy_commands2 = true,
492       .KHR_create_renderpass2 = true,
493       .KHR_dedicated_allocation = true,
494       .KHR_deferred_host_operations = true,
495       .KHR_depth_stencil_resolve = true,
496       .KHR_descriptor_update_template = true,
497       .KHR_device_group = true,
498       .KHR_draw_indirect_count = true,
499       .KHR_driver_properties = true,
500       .KHR_dynamic_rendering = true,
501       .KHR_dynamic_rendering_local_read = true,
502       .KHR_external_fence = true,
503       .KHR_external_fence_fd = true,
504       .KHR_external_memory = true,
505       .KHR_external_memory_fd = true,
506       .KHR_external_semaphore = true,
507       .KHR_external_semaphore_fd = true,
508       .KHR_format_feature_flags2 = true,
509       .KHR_fragment_shader_barycentric = pdev->info.gfx_level >= GFX10_3,
510       .KHR_fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
511       .KHR_get_memory_requirements2 = true,
512       .KHR_global_priority = true,
513       .KHR_image_format_list = true,
514       .KHR_imageless_framebuffer = true,
515 #ifdef RADV_USE_WSI_PLATFORM
516       .KHR_incremental_present = true,
517 #endif
518       .KHR_index_type_uint8 = pdev->info.gfx_level >= GFX8,
519       .KHR_line_rasterization = true,
520       .KHR_load_store_op_none = true,
521       .KHR_maintenance1 = true,
522       .KHR_maintenance2 = true,
523       .KHR_maintenance3 = true,
524       .KHR_maintenance4 = true,
525       .KHR_maintenance5 = true,
526       .KHR_maintenance6 = true,
527       .KHR_maintenance7 = true,
528       .KHR_map_memory2 = true,
529       .KHR_multiview = true,
530       .KHR_performance_query = radv_perf_query_supported(pdev),
531       .KHR_pipeline_binary = true,
532       .KHR_pipeline_executable_properties = true,
533       .KHR_pipeline_library = !pdev->use_llvm,
534       /* Hide these behind dri configs for now since we cannot implement it reliably on
535        * all surfaces yet. There is no surface capability query for present wait/id,
536        * but the feature is useful enough to hide behind an opt-in mechanism for now.
537        * If the instance only enables surface extensions that unconditionally support present wait,
538        * we can also expose the extension that way. */
539       .KHR_present_id =
540          instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
541       .KHR_present_wait =
542          instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
543       .KHR_push_descriptor = true,
544       .KHR_ray_query = radv_enable_rt(pdev, false),
545       .KHR_ray_tracing_maintenance1 = radv_enable_rt(pdev, false),
546       .KHR_ray_tracing_pipeline = radv_enable_rt(pdev, true),
547       .KHR_ray_tracing_position_fetch = radv_enable_rt(pdev, false),
548       .KHR_relaxed_block_layout = true,
549       .KHR_sampler_mirror_clamp_to_edge = true,
550       .KHR_sampler_ycbcr_conversion = true,
551       .KHR_separate_depth_stencil_layouts = true,
552       .KHR_shader_atomic_int64 = true,
553       .KHR_shader_clock = true,
554       .KHR_shader_draw_parameters = true,
555       .KHR_shader_expect_assume = true,
556       .KHR_shader_float16_int8 = true,
557       .KHR_shader_float_controls = true,
558       .KHR_shader_float_controls2 = true,
559       .KHR_shader_integer_dot_product = true,
560       .KHR_shader_maximal_reconvergence = true,
561       .KHR_shader_non_semantic_info = true,
562       .KHR_shader_quad_control = true,
563       .KHR_shader_relaxed_extended_instruction = true,
564       .KHR_shader_subgroup_extended_types = true,
565       .KHR_shader_subgroup_rotate = true,
566       .KHR_shader_subgroup_uniform_control_flow = true,
567       .KHR_shader_terminate_invocation = true,
568       .KHR_spirv_1_4 = true,
569       .KHR_storage_buffer_storage_class = true,
570 #ifdef RADV_USE_WSI_PLATFORM
571       .KHR_swapchain = true,
572       .KHR_swapchain_mutable_format = true,
573 #endif
574       .KHR_synchronization2 = true,
575       .KHR_timeline_semaphore = true,
576       .KHR_uniform_buffer_standard_layout = true,
577       .KHR_variable_pointers = true,
578       .KHR_vertex_attribute_divisor = true,
579       .KHR_video_maintenance1 = true,
580       .KHR_video_queue = pdev->video_decode_enabled || pdev->video_encode_enabled,
581       .KHR_video_decode_av1 = (pdev->info.vcn_ip_version >= VCN_3_0_0 && pdev->info.vcn_ip_version != VCN_3_0_33 &&
582                                VIDEO_CODEC_AV1DEC && pdev->video_decode_enabled),
583       .KHR_video_decode_queue = pdev->video_decode_enabled,
584       .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && pdev->video_decode_enabled,
585       .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && pdev->video_decode_enabled,
586       .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && pdev->video_encode_enabled,
587       .KHR_video_encode_h265 = VIDEO_CODEC_H265ENC && pdev->video_encode_enabled,
588       .KHR_video_encode_queue = pdev->video_encode_enabled,
589       .KHR_vulkan_memory_model = true,
590       .KHR_workgroup_memory_explicit_layout = true,
591       .KHR_zero_initialize_workgroup_memory = true,
592       .EXT_4444_formats = true,
593       .EXT_attachment_feedback_loop_dynamic_state = true,
594       .EXT_attachment_feedback_loop_layout = true,
595       .EXT_border_color_swizzle = pdev->info.gfx_level >= GFX10,
596       .EXT_buffer_device_address = true,
597       .EXT_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
598       .EXT_color_write_enable = true,
599       .EXT_conditional_rendering = true,
600       .EXT_conservative_rasterization = pdev->info.gfx_level >= GFX9,
601       .EXT_custom_border_color = true,
602       .EXT_debug_marker = instance->vk.trace_mode & RADV_TRACE_MODE_RGP,
603       .EXT_depth_bias_control = true,
604       .EXT_depth_clamp_zero_one = true,
605       .EXT_depth_clip_control = true,
606       .EXT_depth_clip_enable = true,
607       .EXT_depth_range_unrestricted = true,
608       .EXT_descriptor_buffer = true,
609       .EXT_descriptor_indexing = true,
610       .EXT_device_address_binding_report = true,
611       .EXT_device_fault = pdev->info.has_gpuvm_fault_query,
612       .EXT_discard_rectangles = true,
613 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
614       .EXT_display_control = true,
615 #endif
616       .EXT_dynamic_rendering_unused_attachments = true,
617       .EXT_extended_dynamic_state = true,
618       .EXT_extended_dynamic_state2 = true,
619       .EXT_extended_dynamic_state3 = true,
620       .EXT_external_memory_acquire_unmodified = true,
621       .EXT_external_memory_dma_buf = true,
622       .EXT_external_memory_host = pdev->info.has_userptr,
623       .EXT_fragment_shader_interlock = radv_has_pops(pdev),
624       .EXT_global_priority = true,
625       .EXT_global_priority_query = true,
626       .EXT_graphics_pipeline_library = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_GPL),
627       .EXT_host_query_reset = true,
628       .EXT_image_2d_view_of_3d = true,
629       .EXT_image_compression_control = true,
630       .EXT_image_drm_format_modifier = pdev->info.gfx_level >= GFX9,
631       .EXT_image_robustness = true,
632       .EXT_image_sliced_view_of_3d = pdev->info.gfx_level >= GFX10,
633       .EXT_image_view_min_lod = true,
634       .EXT_index_type_uint8 = pdev->info.gfx_level >= GFX8,
635       .EXT_inline_uniform_block = true,
636       .EXT_legacy_vertex_attributes = !pdev->use_llvm,
637       .EXT_line_rasterization = true,
638       .EXT_load_store_op_none = true,
639       .EXT_map_memory_placed = true,
640       .EXT_memory_budget = true,
641       .EXT_memory_priority = true,
642       .EXT_mesh_shader = radv_taskmesh_enabled(pdev),
643       .EXT_multi_draw = true,
644       .EXT_mutable_descriptor_type = true, /* Trivial promotion from VALVE. */
645       .EXT_nested_command_buffer = true,
646       .EXT_non_seamless_cube_map = true,
647       .EXT_pci_bus_info = true,
648 #ifndef _WIN32
649       .EXT_physical_device_drm = true,
650 #endif
651       .EXT_pipeline_creation_cache_control = true,
652       .EXT_pipeline_creation_feedback = true,
653       .EXT_pipeline_library_group_handles = radv_enable_rt(pdev, true),
654       .EXT_pipeline_robustness = !pdev->use_llvm,
655       .EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
656       .EXT_primitive_topology_list_restart = true,
657       .EXT_primitives_generated_query = true,
658       .EXT_private_data = true,
659       .EXT_provoking_vertex = true,
660       .EXT_queue_family_foreign = true,
661       .EXT_robustness2 = true,
662       .EXT_sample_locations = pdev->info.gfx_level < GFX10,
663       .EXT_sampler_filter_minmax = radv_filter_minmax_enabled(pdev),
664       .EXT_scalar_block_layout = pdev->info.gfx_level >= GFX7,
665       .EXT_separate_stencil_usage = true,
666       .EXT_shader_atomic_float = true,
667       .EXT_shader_atomic_float2 = true,
668       .EXT_shader_demote_to_helper_invocation = true,
669       .EXT_shader_image_atomic_int64 = true,
670       .EXT_shader_module_identifier = true,
671       .EXT_shader_object = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_ESO),
672       .EXT_shader_replicated_composites = true,
673       .EXT_shader_stencil_export = true,
674       .EXT_shader_subgroup_ballot = true,
675       .EXT_shader_subgroup_vote = true,
676       .EXT_shader_viewport_index_layer = true,
677       .EXT_subgroup_size_control = true,
678 #ifdef RADV_USE_WSI_PLATFORM
679       .EXT_swapchain_maintenance1 = true,
680 #endif
681       .EXT_texel_buffer_alignment = true,
682       .EXT_tooling_info = true,
683       .EXT_transform_feedback = true,
684       .EXT_vertex_attribute_divisor = true,
685       .EXT_vertex_input_dynamic_state = !pdev->use_llvm,
686       .EXT_ycbcr_image_arrays = true,
687       .AMD_buffer_marker = true,
688       .AMD_device_coherent_memory = true,
689       .AMD_draw_indirect_count = true,
690       .AMD_gcn_shader = true,
691       .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
692       .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
693       .AMD_memory_overallocation_behavior = true,
694       .AMD_mixed_attachment_samples = true,
695       .AMD_rasterization_order = pdev->info.has_out_of_order_rast,
696       .AMD_shader_ballot = true,
697       .AMD_shader_core_properties = true,
698       .AMD_shader_core_properties2 = true,
699       .AMD_shader_early_and_late_fragment_tests = true,
700       .AMD_shader_explicit_vertex_parameter = true,
701       .AMD_shader_fragment_mask = pdev->use_fmask,
702       .AMD_shader_image_load_store_lod = true,
703       .AMD_shader_trinary_minmax = true,
704       .AMD_texture_gather_bias_lod = pdev->info.gfx_level < GFX11,
705 #if DETECT_OS_ANDROID
706       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
707       .ANDROID_native_buffer = true,
708 #endif
709       .GOOGLE_decorate_string = true,
710       .GOOGLE_hlsl_functionality1 = true,
711       .GOOGLE_user_type = true,
712       .INTEL_shader_integer_functions2 = true,
713       .MESA_image_alignment_control = pdev->info.gfx_level >= GFX9 && pdev->info.gfx_level <= GFX11_5,
714       .NV_compute_shader_derivatives = true,
715       .NV_device_generated_commands = instance->drirc.enable_dgc,
716       .NV_device_generated_commands_compute = instance->drirc.enable_dgc,
717       /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
718        * using it.
719        */
720       .VALVE_descriptor_set_host_mapping =
721          pdev->vk.instance->app_info.engine_name && strcmp(pdev->vk.instance->app_info.engine_name, "vkd3d") == 0,
722       .VALVE_mutable_descriptor_type = true,
723    };
724    *out_ext = ext;
725 }
726 
727 static void
radv_physical_device_get_features(const struct radv_physical_device * pdev,struct vk_features * features)728 radv_physical_device_get_features(const struct radv_physical_device *pdev, struct vk_features *features)
729 {
730    const struct radv_instance *instance = radv_physical_device_instance(pdev);
731    bool taskmesh_en = radv_taskmesh_enabled(pdev);
732    bool has_perf_query = radv_perf_query_supported(pdev);
733    bool has_shader_image_float_minmax = pdev->info.gfx_level != GFX8 && pdev->info.gfx_level != GFX9 &&
734                                         pdev->info.gfx_level != GFX11 && pdev->info.gfx_level != GFX11_5;
735    bool has_fragment_shader_interlock = radv_has_pops(pdev);
736 
737    *features = (struct vk_features){
738       /* Vulkan 1.0 */
739       .robustBufferAccess = true,
740       .fullDrawIndexUint32 = true,
741       .imageCubeArray = true,
742       .independentBlend = true,
743       .geometryShader = true,
744       .tessellationShader = true,
745       .sampleRateShading = true,
746       .dualSrcBlend = true,
747       .logicOp = true,
748       .multiDrawIndirect = true,
749       .drawIndirectFirstInstance = true,
750       .depthClamp = true,
751       .depthBiasClamp = true,
752       .fillModeNonSolid = true,
753       .depthBounds = true,
754       .wideLines = true,
755       .largePoints = true,
756       .alphaToOne = true,
757       .multiViewport = true,
758       .samplerAnisotropy = true,
759       .textureCompressionETC2 = pdev->info.has_etc_support || pdev->emulate_etc2,
760       .textureCompressionASTC_LDR = pdev->emulate_astc,
761       .textureCompressionBC = true,
762       .occlusionQueryPrecise = true,
763       .pipelineStatisticsQuery = true,
764       .vertexPipelineStoresAndAtomics = true,
765       .fragmentStoresAndAtomics = true,
766       .shaderTessellationAndGeometryPointSize = true,
767       .shaderImageGatherExtended = true,
768       .shaderStorageImageExtendedFormats = true,
769       .shaderStorageImageMultisample = true,
770       .shaderUniformBufferArrayDynamicIndexing = true,
771       .shaderSampledImageArrayDynamicIndexing = true,
772       .shaderStorageBufferArrayDynamicIndexing = true,
773       .shaderStorageImageArrayDynamicIndexing = true,
774       .shaderStorageImageReadWithoutFormat = true,
775       .shaderStorageImageWriteWithoutFormat = true,
776       .shaderClipDistance = true,
777       .shaderCullDistance = true,
778       .shaderFloat64 = true,
779       .shaderInt64 = true,
780       .shaderInt16 = true,
781       .sparseBinding = true,
782       .sparseResidencyBuffer = pdev->info.family >= CHIP_POLARIS10,
783       .sparseResidencyImage2D = pdev->info.family >= CHIP_POLARIS10,
784       .sparseResidencyImage3D = pdev->info.family >= CHIP_POLARIS10,
785       .sparseResidencyAliased = pdev->info.family >= CHIP_POLARIS10,
786       .variableMultisampleRate = true,
787       .shaderResourceMinLod = true,
788       .shaderResourceResidency = true,
789       .inheritedQueries = true,
790 
791       /* Vulkan 1.1 */
792       .storageBuffer16BitAccess = true,
793       .uniformAndStorageBuffer16BitAccess = true,
794       .storagePushConstant16 = true,
795       .storageInputOutput16 = pdev->info.has_packed_math_16bit,
796       .multiview = true,
797       .multiviewGeometryShader = true,
798       .multiviewTessellationShader = true,
799       .variablePointersStorageBuffer = true,
800       .variablePointers = true,
801       .protectedMemory = false,
802       .samplerYcbcrConversion = true,
803       .shaderDrawParameters = true,
804 
805       /* Vulkan 1.2 */
806       .samplerMirrorClampToEdge = true,
807       .drawIndirectCount = true,
808       .storageBuffer8BitAccess = true,
809       .uniformAndStorageBuffer8BitAccess = true,
810       .storagePushConstant8 = true,
811       .shaderBufferInt64Atomics = true,
812       .shaderSharedInt64Atomics = true,
813       .shaderFloat16 = pdev->info.has_packed_math_16bit,
814       .shaderInt8 = true,
815 
816       .descriptorIndexing = true,
817       .shaderInputAttachmentArrayDynamicIndexing = true,
818       .shaderUniformTexelBufferArrayDynamicIndexing = true,
819       .shaderStorageTexelBufferArrayDynamicIndexing = true,
820       .shaderUniformBufferArrayNonUniformIndexing = true,
821       .shaderSampledImageArrayNonUniformIndexing = true,
822       .shaderStorageBufferArrayNonUniformIndexing = true,
823       .shaderStorageImageArrayNonUniformIndexing = true,
824       .shaderInputAttachmentArrayNonUniformIndexing = true,
825       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
826       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
827       .descriptorBindingUniformBufferUpdateAfterBind = true,
828       .descriptorBindingSampledImageUpdateAfterBind = true,
829       .descriptorBindingStorageImageUpdateAfterBind = true,
830       .descriptorBindingStorageBufferUpdateAfterBind = true,
831       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
832       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
833       .descriptorBindingUpdateUnusedWhilePending = true,
834       .descriptorBindingPartiallyBound = true,
835       .descriptorBindingVariableDescriptorCount = true,
836       .runtimeDescriptorArray = true,
837 
838       .samplerFilterMinmax = true,
839       .scalarBlockLayout = pdev->info.gfx_level >= GFX7,
840       .imagelessFramebuffer = true,
841       .uniformBufferStandardLayout = true,
842       .shaderSubgroupExtendedTypes = true,
843       .separateDepthStencilLayouts = true,
844       .hostQueryReset = true,
845       .timelineSemaphore = true,
846       .bufferDeviceAddress = true,
847       .bufferDeviceAddressCaptureReplay = true,
848       .bufferDeviceAddressMultiDevice = false,
849       .vulkanMemoryModel = true,
850       .vulkanMemoryModelDeviceScope = true,
851       .vulkanMemoryModelAvailabilityVisibilityChains = false,
852       .shaderOutputViewportIndex = true,
853       .shaderOutputLayer = true,
854       .subgroupBroadcastDynamicId = true,
855 
856       /* Vulkan 1.3 */
857       .robustImageAccess = true,
858       .inlineUniformBlock = true,
859       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
860       .pipelineCreationCacheControl = true,
861       .privateData = true,
862       .shaderDemoteToHelperInvocation = true,
863       .shaderTerminateInvocation = true,
864       .subgroupSizeControl = true,
865       .computeFullSubgroups = true,
866       .synchronization2 = true,
867       .textureCompressionASTC_HDR = false,
868       .shaderZeroInitializeWorkgroupMemory = true,
869       .dynamicRendering = true,
870       .shaderIntegerDotProduct = true,
871       .maintenance4 = true,
872 
873       /* VK_EXT_conditional_rendering */
874       .conditionalRendering = true,
875       .inheritedConditionalRendering = false,
876 
877       /* VK_KHR_vertex_attribute_divisor */
878       .vertexAttributeInstanceRateDivisor = true,
879       .vertexAttributeInstanceRateZeroDivisor = true,
880 
881       /* VK_EXT_transform_feedback */
882       .transformFeedback = true,
883       .geometryStreams = true,
884 
885       /* VK_EXT_memory_priority */
886       .memoryPriority = true,
887 
888       /* VK_EXT_depth_clip_enable */
889       .depthClipEnable = true,
890 
891       /* VK_KHR_compute_shader_derivatives */
892       .computeDerivativeGroupQuads = false,
893       .computeDerivativeGroupLinear = true,
894 
895       /* VK_EXT_ycbcr_image_arrays */
896       .ycbcrImageArrays = true,
897 
898       /* VK_KHR_index_type_uint8 */
899       .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
900 
901       /* VK_KHR_pipeline_executable_properties */
902       .pipelineExecutableInfo = true,
903 
904       /* VK_KHR_shader_clock */
905       .shaderSubgroupClock = true,
906       .shaderDeviceClock = pdev->info.gfx_level >= GFX8,
907 
908       /* VK_EXT_texel_buffer_alignment */
909       .texelBufferAlignment = true,
910 
911       /* VK_AMD_device_coherent_memory */
912       .deviceCoherentMemory = pdev->info.has_l2_uncached,
913 
914       /* VK_KHR_line_rasterization */
915       .rectangularLines = true,
916       .bresenhamLines = true,
917       .smoothLines = true,
918       .stippledRectangularLines = false,
919       .stippledBresenhamLines = true,
920       .stippledSmoothLines = false,
921 
922       /* VK_EXT_robustness2 */
923       .robustBufferAccess2 = true,
924       .robustImageAccess2 = true,
925       .nullDescriptor = true,
926 
927       /* VK_EXT_custom_border_color */
928       .customBorderColors = true,
929       .customBorderColorWithoutFormat = true,
930 
931       /* VK_EXT_extended_dynamic_state */
932       .extendedDynamicState = true,
933 
934       /* VK_EXT_shader_atomic_float */
935       .shaderBufferFloat32Atomics = true,
936       .shaderBufferFloat32AtomicAdd = pdev->info.gfx_level >= GFX11,
937       .shaderBufferFloat64Atomics = true,
938       .shaderBufferFloat64AtomicAdd = false,
939       .shaderSharedFloat32Atomics = true,
940       .shaderSharedFloat32AtomicAdd = pdev->info.gfx_level >= GFX8,
941       .shaderSharedFloat64Atomics = true,
942       .shaderSharedFloat64AtomicAdd = false,
943       .shaderImageFloat32Atomics = true,
944       .shaderImageFloat32AtomicAdd = false,
945       .sparseImageFloat32Atomics = true,
946       .sparseImageFloat32AtomicAdd = false,
947 
948       /* VK_EXT_4444_formats */
949       .formatA4R4G4B4 = true,
950       .formatA4B4G4R4 = true,
951 
952       /* VK_EXT_shader_image_atomic_int64 */
953       .shaderImageInt64Atomics = true,
954       .sparseImageInt64Atomics = true,
955 
956       /* VK_EXT_mutable_descriptor_type */
957       .mutableDescriptorType = true,
958 
959       /* VK_KHR_fragment_shading_rate */
960       .pipelineFragmentShadingRate = true,
961       .primitiveFragmentShadingRate = true,
962       .attachmentFragmentShadingRate = radv_vrs_attachment_enabled(pdev),
963 
964       /* VK_KHR_workgroup_memory_explicit_layout */
965       .workgroupMemoryExplicitLayout = true,
966       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
967       .workgroupMemoryExplicitLayout8BitAccess = true,
968       .workgroupMemoryExplicitLayout16BitAccess = true,
969 
970       /* VK_EXT_provoking_vertex */
971       .provokingVertexLast = true,
972       .transformFeedbackPreservesProvokingVertex = true,
973 
974       /* VK_EXT_extended_dynamic_state2 */
975       .extendedDynamicState2 = true,
976       .extendedDynamicState2LogicOp = true,
977       .extendedDynamicState2PatchControlPoints = true,
978 
979       /* VK_EXT_global_priority_query */
980       .globalPriorityQuery = true,
981 
982       /* VK_KHR_acceleration_structure */
983       .accelerationStructure = true,
984       .accelerationStructureCaptureReplay = true,
985       .accelerationStructureIndirectBuild = false,
986       .accelerationStructureHostCommands = false,
987       .descriptorBindingAccelerationStructureUpdateAfterBind = true,
988 
989       /* VK_EXT_buffer_device_address */
990       .bufferDeviceAddressCaptureReplayEXT = true,
991 
992       /* VK_KHR_shader_subgroup_uniform_control_flow */
993       .shaderSubgroupUniformControlFlow = true,
994 
995       /* VK_EXT_map_memory_placed */
996       .memoryMapPlaced = true,
997       .memoryMapRangePlaced = false,
998       .memoryUnmapReserve = true,
999 
1000       /* VK_EXT_multi_draw */
1001       .multiDraw = true,
1002 
1003       /* VK_EXT_color_write_enable */
1004       .colorWriteEnable = true,
1005 
1006       /* VK_EXT_shader_atomic_float2 */
1007       .shaderBufferFloat16Atomics = false,
1008       .shaderBufferFloat16AtomicAdd = false,
1009       .shaderBufferFloat16AtomicMinMax = false,
1010       .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 32),
1011       .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 64),
1012       .shaderSharedFloat16Atomics = false,
1013       .shaderSharedFloat16AtomicAdd = false,
1014       .shaderSharedFloat16AtomicMinMax = false,
1015       .shaderSharedFloat32AtomicMinMax = true,
1016       .shaderSharedFloat64AtomicMinMax = true,
1017       .shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1018       .sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1019 
1020       /* VK_KHR_present_id */
1021       .presentId = pdev->vk.supported_extensions.KHR_present_id,
1022 
1023       /* VK_KHR_present_wait */
1024       .presentWait = pdev->vk.supported_extensions.KHR_present_wait,
1025 
1026       /* VK_EXT_primitive_topology_list_restart */
1027       .primitiveTopologyListRestart = true,
1028       .primitiveTopologyPatchListRestart = false,
1029 
1030       /* VK_KHR_ray_query */
1031       .rayQuery = true,
1032 
1033       /* VK_EXT_pipeline_library_group_handles */
1034       .pipelineLibraryGroupHandles = true,
1035 
1036       /* VK_KHR_ray_tracing_pipeline */
1037       .rayTracingPipeline = true,
1038       .rayTracingPipelineShaderGroupHandleCaptureReplay = true,
1039       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1040       .rayTracingPipelineTraceRaysIndirect = true,
1041       .rayTraversalPrimitiveCulling = true,
1042 
1043       /* VK_KHR_ray_tracing_maintenance1 */
1044       .rayTracingMaintenance1 = true,
1045       .rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdev, true),
1046 
1047       /* VK_KHR_ray_tracing_position_fetch */
1048       .rayTracingPositionFetch = true,
1049 
1050       /* VK_EXT_vertex_input_dynamic_state */
1051       .vertexInputDynamicState = true,
1052 
1053       /* VK_EXT_image_view_min_lod */
1054       .minLod = true,
1055 
1056       /* VK_EXT_mesh_shader */
1057       .meshShader = taskmesh_en,
1058       .taskShader = taskmesh_en,
1059       .multiviewMeshShader = taskmesh_en,
1060       .primitiveFragmentShadingRateMeshShader = taskmesh_en,
1061       .meshShaderQueries = false,
1062 
1063       /* VK_VALVE_descriptor_set_host_mapping */
1064       .descriptorSetHostMapping = true,
1065 
1066       /* VK_EXT_depth_clip_control */
1067       .depthClipControl = true,
1068 
1069       /* VK_EXT_image_2d_view_of_3d  */
1070       .image2DViewOf3D = true,
1071       .sampler2DViewOf3D = false,
1072 
1073       /* VK_INTEL_shader_integer_functions2 */
1074       .shaderIntegerFunctions2 = true,
1075 
1076       /* VK_EXT_primitives_generated_query */
1077       .primitivesGeneratedQuery = true,
1078       .primitivesGeneratedQueryWithRasterizerDiscard = true,
1079       .primitivesGeneratedQueryWithNonZeroStreams = true,
1080 
1081       /* VK_EXT_non_seamless_cube_map */
1082       .nonSeamlessCubeMap = true,
1083 
1084       /* VK_EXT_border_color_swizzle */
1085       .borderColorSwizzle = true,
1086       .borderColorSwizzleFromImage = true,
1087 
1088       /* VK_EXT_shader_module_identifier */
1089       .shaderModuleIdentifier = true,
1090 
1091       /* VK_KHR_performance_query */
1092       .performanceCounterQueryPools = has_perf_query,
1093       .performanceCounterMultipleQueryPools = has_perf_query,
1094 
1095       /* VK_NV_device_generated_commands */
1096       .deviceGeneratedCommandsNV = true,
1097 
1098       /* VK_EXT_attachment_feedback_loop_layout */
1099       .attachmentFeedbackLoopLayout = true,
1100 
1101       /* VK_EXT_graphics_pipeline_library */
1102       .graphicsPipelineLibrary = true,
1103 
1104       /* VK_EXT_extended_dynamic_state3 */
1105       .extendedDynamicState3TessellationDomainOrigin = true,
1106       .extendedDynamicState3PolygonMode = true,
1107       .extendedDynamicState3SampleMask = true,
1108       .extendedDynamicState3AlphaToCoverageEnable = !pdev->use_llvm,
1109       .extendedDynamicState3LogicOpEnable = true,
1110       .extendedDynamicState3LineStippleEnable = true,
1111       .extendedDynamicState3ColorBlendEnable = !pdev->use_llvm,
1112       .extendedDynamicState3DepthClipEnable = true,
1113       .extendedDynamicState3ConservativeRasterizationMode = pdev->info.gfx_level >= GFX9,
1114       .extendedDynamicState3DepthClipNegativeOneToOne = true,
1115       .extendedDynamicState3ProvokingVertexMode = true,
1116       .extendedDynamicState3DepthClampEnable = true,
1117       .extendedDynamicState3ColorWriteMask = !pdev->use_llvm,
1118       .extendedDynamicState3RasterizationSamples = true,
1119       .extendedDynamicState3ColorBlendEquation = !pdev->use_llvm,
1120       .extendedDynamicState3SampleLocationsEnable = pdev->info.gfx_level < GFX10,
1121       .extendedDynamicState3LineRasterizationMode = true,
1122       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1123       .extendedDynamicState3AlphaToOneEnable = !pdev->use_llvm,
1124       .extendedDynamicState3RasterizationStream = false,
1125       .extendedDynamicState3ColorBlendAdvanced = false,
1126       .extendedDynamicState3ViewportWScalingEnable = false,
1127       .extendedDynamicState3ViewportSwizzle = false,
1128       .extendedDynamicState3CoverageToColorEnable = false,
1129       .extendedDynamicState3CoverageToColorLocation = false,
1130       .extendedDynamicState3CoverageModulationMode = false,
1131       .extendedDynamicState3CoverageModulationTableEnable = false,
1132       .extendedDynamicState3CoverageModulationTable = false,
1133       .extendedDynamicState3CoverageReductionMode = false,
1134       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1135       .extendedDynamicState3ShadingRateImageEnable = false,
1136 
1137       /* VK_EXT_descriptor_buffer */
1138       .descriptorBuffer = true,
1139       .descriptorBufferCaptureReplay = false,
1140       .descriptorBufferImageLayoutIgnored = true,
1141       .descriptorBufferPushDescriptors = true,
1142 
1143       /* VK_AMD_shader_early_and_late_fragment_tests */
1144       .shaderEarlyAndLateFragmentTests = true,
1145 
1146       /* VK_EXT_image_sliced_view_of_3d */
1147       .imageSlicedViewOf3D = true,
1148 
1149 #ifdef RADV_USE_WSI_PLATFORM
1150       /* VK_EXT_swapchain_maintenance1 */
1151       .swapchainMaintenance1 = true,
1152 #endif
1153 
1154       /* VK_EXT_attachment_feedback_loop_dynamic_state */
1155       .attachmentFeedbackLoopDynamicState = true,
1156 
1157       /* VK_EXT_dynamic_rendering_unused_attachments */
1158       .dynamicRenderingUnusedAttachments = true,
1159 
1160       /* VK_KHR_fragment_shader_barycentric */
1161       .fragmentShaderBarycentric = true,
1162 
1163       /* VK_EXT_depth_bias_control */
1164       .depthBiasControl = true,
1165       .leastRepresentableValueForceUnormRepresentation = true,
1166       .floatRepresentation = true,
1167       .depthBiasExact = true,
1168 
1169       /* VK_EXT_fragment_shader_interlock */
1170       .fragmentShaderSampleInterlock = has_fragment_shader_interlock,
1171       .fragmentShaderPixelInterlock = has_fragment_shader_interlock,
1172       .fragmentShaderShadingRateInterlock = false,
1173 
1174       /* VK_EXT_pipeline_robustness */
1175       .pipelineRobustness = true,
1176 
1177       /* VK_KHR_maintenance5 */
1178       .maintenance5 = true,
1179 
1180       /* VK_NV_device_generated_commands_compute */
1181       .deviceGeneratedCompute = true,
1182       .deviceGeneratedComputePipelines = true,
1183       .deviceGeneratedComputeCaptureReplay = false,
1184 
1185       /* VK_KHR_cooperative_matrix */
1186       .cooperativeMatrix = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
1187       .cooperativeMatrixRobustBufferAccess = pdev->info.gfx_level >= GFX11 && !pdev->use_llvm,
1188 
1189       /* VK_EXT_image_compression_control */
1190       .imageCompressionControl = true,
1191 
1192       /* VK_EXT_device_fault */
1193       .deviceFault = true,
1194       .deviceFaultVendorBinary = instance->debug_flags & RADV_DEBUG_HANG,
1195 
1196       /* VK_EXT_depth_clamp_zero_one */
1197       .depthClampZeroOne = true,
1198 
1199       /* VK_KHR_maintenance6 */
1200       .maintenance6 = true,
1201 
1202       /* VK_KHR_shader_subgroup_rotate */
1203       .shaderSubgroupRotate = true,
1204       .shaderSubgroupRotateClustered = true,
1205 
1206       /* VK_EXT_shader_object */
1207       .shaderObject = true,
1208 
1209       /* VK_KHR_shader_expect_assume */
1210       .shaderExpectAssume = true,
1211 
1212       /* VK_KHR_shader_maximal_reconvergence */
1213       .shaderMaximalReconvergence = true,
1214 
1215       /* VK_KHR_shader_quad_control */
1216       .shaderQuadControl = true,
1217 
1218       /* VK_EXT_address_binding_report */
1219       .reportAddressBinding = true,
1220 
1221       /* VK_EXT_nested_command_buffer */
1222       .nestedCommandBuffer = true,
1223       .nestedCommandBufferRendering = true,
1224       .nestedCommandBufferSimultaneousUse = true,
1225 
1226       /* VK_KHR_dynamic_rendering_local_read */
1227       .dynamicRenderingLocalRead = true,
1228 
1229       /* VK_EXT_legacy_vertex_attributes */
1230       .legacyVertexAttributes = true,
1231 
1232       /* VK_MESA_image_alignment_control */
1233       .imageAlignmentControl = true,
1234 
1235       /* VK_EXT_shader_replicated_composites */
1236       .shaderReplicatedComposites = true,
1237 
1238       /* VK_KHR_maintenance7 */
1239       .maintenance7 = true,
1240 
1241       /* VK_KHR_video_maintenance1 */
1242       .videoMaintenance1 = true,
1243 
1244       /* VK_KHR_pipeline_binary */
1245       .pipelineBinaries = true,
1246 
1247       /* VK_KHR_shader_relaxed_extended_instruction */
1248       .shaderRelaxedExtendedInstruction = true,
1249 
1250       /* VK_KHR_shader_float_controls2 */
1251       .shaderFloatControls2 = true,
1252    };
1253 }
1254 
1255 static size_t
radv_max_descriptor_set_size()1256 radv_max_descriptor_set_size()
1257 {
1258    /* make sure that the entire descriptor set is addressable with a signed
1259     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1260     * be at most 2 GiB. the combined image & samples object count as one of
1261     * both. This limit is for the pipeline layout, not for the set layout, but
1262     * there is no set limit, so we just set a pipeline limit. I don't think
1263     * any app is going to hit this soon. */
1264    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1265           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1266            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1267            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
1268 }
1269 
1270 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdev)1271 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdev)
1272 {
1273    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1274    uint32_t uniform_offset_alignment = instance->drirc.override_uniform_offset_alignment;
1275    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1276       fprintf(stderr,
1277               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1278               "not a power of two\n",
1279               uniform_offset_alignment);
1280       uniform_offset_alignment = 0;
1281    }
1282 
1283    /* Take at least the hardware limit. */
1284    return MAX2(uniform_offset_alignment, 4);
1285 }
1286 
1287 static const char *
radv_get_compiler_string(struct radv_physical_device * pdev)1288 radv_get_compiler_string(struct radv_physical_device *pdev)
1289 {
1290    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1291 
1292    if (!pdev->use_llvm) {
1293       /* Some games like SotTR apply shader workarounds if the LLVM
1294        * version is too old or if the LLVM version string is
1295        * missing. This gives 2-5% performance with SotTR and ACO.
1296        */
1297       if (instance->drirc.report_llvm9_version_string) {
1298          return " (LLVM 9.0.1)";
1299       }
1300 
1301       return "";
1302    }
1303 
1304 #if AMD_LLVM_AVAILABLE
1305    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
1306 #else
1307    unreachable("LLVM is not available");
1308 #endif
1309 }
1310 
1311 static void
radv_get_physical_device_properties(struct radv_physical_device * pdev)1312 radv_get_physical_device_properties(struct radv_physical_device *pdev)
1313 {
1314    VkSampleCountFlags sample_counts = 0xf;
1315 
1316    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1317 
1318    VkPhysicalDeviceType device_type;
1319    if (pdev->info.has_dedicated_vram) {
1320       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1321    } else {
1322       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1323    }
1324 
1325    pdev->vk.properties = (struct vk_properties){
1326       .apiVersion = RADV_API_VERSION,
1327       .driverVersion = vk_get_driver_version(),
1328       .vendorID = ATI_VENDOR_ID,
1329       .deviceID = pdev->info.pci_id,
1330       .deviceType = device_type,
1331       .maxImageDimension1D = (1 << 14),
1332       .maxImageDimension2D = (1 << 14),
1333       .maxImageDimension3D = (1 << 11),
1334       .maxImageDimensionCube = (1 << 14),
1335       .maxImageArrayLayers = (1 << 11),
1336       .maxTexelBufferElements = UINT32_MAX,
1337       .maxUniformBufferRange = UINT32_MAX,
1338       .maxStorageBufferRange = UINT32_MAX,
1339       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1340       .maxMemoryAllocationCount = UINT32_MAX,
1341       .maxSamplerAllocationCount = 64 * 1024,
1342       .bufferImageGranularity = 1,
1343       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1344       .maxBoundDescriptorSets = MAX_SETS,
1345       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1346       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1347       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1348       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1349       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1350       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1351       .maxPerStageResources = max_descriptor_set_size,
1352       .maxDescriptorSetSamplers = max_descriptor_set_size,
1353       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1354       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1355       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1356       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1357       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1358       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1359       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1360       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1361       .maxVertexInputBindings = MAX_VBS,
1362       .maxVertexInputAttributeOffset = UINT32_MAX,
1363       .maxVertexInputBindingStride = 2048,
1364       .maxVertexOutputComponents = 128,
1365       .maxTessellationGenerationLevel = 64,
1366       .maxTessellationPatchSize = 32,
1367       .maxTessellationControlPerVertexInputComponents = 128,
1368       .maxTessellationControlPerVertexOutputComponents = 128,
1369       .maxTessellationControlPerPatchOutputComponents = 120,
1370       .maxTessellationControlTotalOutputComponents = 4096,
1371       .maxTessellationEvaluationInputComponents = 128,
1372       .maxTessellationEvaluationOutputComponents = 128,
1373       .maxGeometryShaderInvocations = 127,
1374       .maxGeometryInputComponents = 64,
1375       .maxGeometryOutputComponents = 128,
1376       .maxGeometryOutputVertices = 256,
1377       .maxGeometryTotalOutputComponents = 1024,
1378       .maxFragmentInputComponents = 128,
1379       .maxFragmentOutputAttachments = 8,
1380       .maxFragmentDualSrcAttachments = 1,
1381       .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1382       .maxComputeSharedMemorySize = pdev->max_shared_size,
1383       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1384       .maxComputeWorkGroupInvocations = 1024,
1385       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1386       .subPixelPrecisionBits = 8,
1387       .subTexelPrecisionBits = 8,
1388       .mipmapPrecisionBits = 8,
1389       .maxDrawIndexedIndexValue = UINT32_MAX,
1390       .maxDrawIndirectCount = UINT32_MAX,
1391       .maxSamplerLodBias = 16,
1392       .maxSamplerAnisotropy = 16,
1393       .maxViewports = MAX_VIEWPORTS,
1394       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1395       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1396       .viewportSubPixelBits = 8,
1397       .minMemoryMapAlignment = 4096, /* A page */
1398       .minTexelBufferOffsetAlignment = 4,
1399       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdev),
1400       .minStorageBufferOffsetAlignment = 4,
1401       .minTexelOffset = -32,
1402       .maxTexelOffset = 31,
1403       .minTexelGatherOffset = -32,
1404       .maxTexelGatherOffset = 31,
1405       .minInterpolationOffset = -2,
1406       .maxInterpolationOffset = 2,
1407       .subPixelInterpolationOffsetBits = 8,
1408       .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1409       .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1410       .maxFramebufferLayers = (1 << 10),
1411       .framebufferColorSampleCounts = sample_counts,
1412       .framebufferDepthSampleCounts = sample_counts,
1413       .framebufferStencilSampleCounts = sample_counts,
1414       .framebufferNoAttachmentsSampleCounts = sample_counts,
1415       .maxColorAttachments = MAX_RTS,
1416       .sampledImageColorSampleCounts = sample_counts,
1417       .sampledImageIntegerSampleCounts = sample_counts,
1418       .sampledImageDepthSampleCounts = sample_counts,
1419       .sampledImageStencilSampleCounts = sample_counts,
1420       .storageImageSampleCounts = sample_counts,
1421       .maxSampleMaskWords = 1,
1422       .timestampComputeAndGraphics = true,
1423       .timestampPeriod = 1000000.0 / pdev->info.clock_crystal_freq,
1424       .maxClipDistances = 8,
1425       .maxCullDistances = 8,
1426       .maxCombinedClipAndCullDistances = 8,
1427       .discreteQueuePriorities = 2,
1428       .pointSizeRange = {0.0, 8191.875},
1429       .lineWidthRange = {0.0, 8.0},
1430       .pointSizeGranularity = (1.0 / 8.0),
1431       .lineWidthGranularity = (1.0 / 8.0),
1432       .strictLines = false, /* FINISHME */
1433       .standardSampleLocations = true,
1434       .optimalBufferCopyOffsetAlignment = 1,
1435       .optimalBufferCopyRowPitchAlignment = 1,
1436       .nonCoherentAtomSize = 64,
1437       .sparseResidencyNonResidentStrict = pdev->info.family >= CHIP_POLARIS10,
1438       .sparseResidencyStandard2DBlockShape = pdev->info.family >= CHIP_POLARIS10,
1439       .sparseResidencyStandard3DBlockShape = pdev->info.gfx_level >= GFX9,
1440    };
1441 
1442    struct vk_properties *p = &pdev->vk.properties;
1443 
1444    /* Vulkan 1.1 */
1445    strcpy(p->deviceName, pdev->marketing_name);
1446    memcpy(p->pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1447 
1448    memcpy(p->deviceUUID, pdev->device_uuid, VK_UUID_SIZE);
1449    memcpy(p->driverUUID, pdev->driver_uuid, VK_UUID_SIZE);
1450    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1451    /* The LUID is for Windows. */
1452    p->deviceLUIDValid = false;
1453    p->deviceNodeMask = 0;
1454 
1455    p->subgroupSize = RADV_SUBGROUP_SIZE;
1456    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1457    if (radv_taskmesh_enabled(pdev))
1458       p->subgroupSupportedStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1459 
1460    if (radv_enable_rt(pdev, true))
1461       p->subgroupSupportedStages |= RADV_RT_STAGE_BITS;
1462    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1463                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1464                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1465                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1466                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
1467    p->subgroupQuadOperationsInAllStages = true;
1468 
1469    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1470    p->maxMultiviewViewCount = MAX_VIEWS;
1471    p->maxMultiviewInstanceIndex = INT_MAX;
1472    p->protectedNoFault = false;
1473    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1474    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1475 
1476    /* Vulkan 1.2 */
1477    p->driverID = VK_DRIVER_ID_MESA_RADV;
1478    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1479    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1480             radv_get_compiler_string(pdev));
1481 
1482    if (radv_is_conformant(pdev)) {
1483       if (pdev->info.gfx_level >= GFX10_3) {
1484          p->conformanceVersion = (VkConformanceVersion){
1485             .major = 1,
1486             .minor = 3,
1487             .subminor = 0,
1488             .patch = 0,
1489          };
1490       } else {
1491          p->conformanceVersion = (VkConformanceVersion){
1492             .major = 1,
1493             .minor = 2,
1494             .subminor = 7,
1495             .patch = 1,
1496          };
1497       }
1498    } else {
1499       p->conformanceVersion = (VkConformanceVersion){
1500          .major = 0,
1501          .minor = 0,
1502          .subminor = 0,
1503          .patch = 0,
1504       };
1505    }
1506 
1507    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1508     * controlled by the same config register.
1509     */
1510    if (pdev->info.has_packed_math_16bit) {
1511       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1512       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1513    } else {
1514       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1515       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1516    }
1517 
1518    /* With LLVM, do not allow both preserving and flushing denorms because
1519     * different shaders in the same pipeline can have different settings and
1520     * this won't work for merged shaders. To make it work, this requires LLVM
1521     * support for changing the register. The same logic applies for the
1522     * rounding modes because they are configured with the same config
1523     * register.
1524     */
1525    p->shaderDenormFlushToZeroFloat32 = true;
1526    p->shaderDenormPreserveFloat32 = !pdev->use_llvm;
1527    p->shaderRoundingModeRTEFloat32 = true;
1528    p->shaderRoundingModeRTZFloat32 = !pdev->use_llvm;
1529    p->shaderSignedZeroInfNanPreserveFloat32 = true;
1530 
1531    p->shaderDenormFlushToZeroFloat16 = pdev->info.has_packed_math_16bit && !pdev->use_llvm;
1532    p->shaderDenormPreserveFloat16 = pdev->info.has_packed_math_16bit;
1533    p->shaderRoundingModeRTEFloat16 = pdev->info.has_packed_math_16bit;
1534    p->shaderRoundingModeRTZFloat16 = pdev->info.has_packed_math_16bit && !pdev->use_llvm;
1535    p->shaderSignedZeroInfNanPreserveFloat16 = pdev->info.has_packed_math_16bit;
1536 
1537    p->shaderDenormFlushToZeroFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm;
1538    p->shaderDenormPreserveFloat64 = pdev->info.gfx_level >= GFX8;
1539    p->shaderRoundingModeRTEFloat64 = pdev->info.gfx_level >= GFX8;
1540    p->shaderRoundingModeRTZFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm;
1541    p->shaderSignedZeroInfNanPreserveFloat64 = pdev->info.gfx_level >= GFX8;
1542 
1543    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1544    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1545    p->shaderSampledImageArrayNonUniformIndexingNative = false;
1546    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1547    p->shaderStorageImageArrayNonUniformIndexingNative = false;
1548    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1549    p->robustBufferAccessUpdateAfterBind = true;
1550    p->quadDivergentImplicitLod = false;
1551 
1552    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1553    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1554    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1555    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1556    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1557    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1558    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1559    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1560    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1561    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1562    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1563    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1564    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1565    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1566    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1567 
1568    /* We support all of the depth resolve modes */
1569    p->supportedDepthResolveModes =
1570       VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1571 
1572    /* Average doesn't make sense for stencil so we don't support that */
1573    p->supportedStencilResolveModes =
1574       VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1575 
1576    p->independentResolveNone = true;
1577    p->independentResolve = true;
1578 
1579    /* GFX6-8 only support single channel min/max filter. */
1580    p->filterMinmaxImageComponentMapping = pdev->info.gfx_level >= GFX9;
1581    p->filterMinmaxSingleComponentFormats = true;
1582 
1583    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1584 
1585    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1586 
1587    /* Vulkan 1.3 */
1588    p->minSubgroupSize = 64;
1589    p->maxSubgroupSize = 64;
1590    p->maxComputeWorkgroupSubgroups = UINT32_MAX;
1591    p->requiredSubgroupSizeStages = 0;
1592    if (pdev->info.gfx_level >= GFX10) {
1593       /* Only GFX10+ supports wave32. */
1594       p->minSubgroupSize = 32;
1595       p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1596 
1597       if (radv_taskmesh_enabled(pdev)) {
1598          p->requiredSubgroupSizeStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1599       }
1600    }
1601 
1602    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1603    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1604    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1605    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1606    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1607    p->maxInlineUniformTotalSize = UINT16_MAX;
1608 
1609    bool accel_dot = pdev->info.has_accelerated_dot_product;
1610    bool gfx11plus = pdev->info.gfx_level >= GFX11;
1611    p->integerDotProduct8BitUnsignedAccelerated = accel_dot;
1612    p->integerDotProduct8BitSignedAccelerated = accel_dot;
1613    p->integerDotProduct8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1614    p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel_dot;
1615    p->integerDotProduct4x8BitPackedSignedAccelerated = accel_dot;
1616    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1617    p->integerDotProduct16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1618    p->integerDotProduct16BitSignedAccelerated = accel_dot && !gfx11plus;
1619    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1620    p->integerDotProduct32BitUnsignedAccelerated = false;
1621    p->integerDotProduct32BitSignedAccelerated = false;
1622    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1623    p->integerDotProduct64BitUnsignedAccelerated = false;
1624    p->integerDotProduct64BitSignedAccelerated = false;
1625    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1626    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel_dot;
1627    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel_dot;
1628    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1629    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel_dot;
1630    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel_dot;
1631    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1632    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1633    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel_dot && !gfx11plus;
1634    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1635    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1636    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1637    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1638    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1639    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1640    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1641 
1642    p->storageTexelBufferOffsetAlignmentBytes = 4;
1643    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1644    p->uniformTexelBufferOffsetAlignmentBytes = 4;
1645    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1646 
1647    p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1648 
1649    /* VK_KHR_push_descriptor */
1650    p->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1651 
1652    /* VK_EXT_discard_rectangles */
1653    p->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1654 
1655    /* VK_EXT_external_memory_host */
1656    p->minImportedHostPointerAlignment = 4096;
1657 
1658    /* VK_AMD_shader_core_properties */
1659    /* Shader engines. */
1660    p->shaderEngineCount = pdev->info.max_se;
1661    p->shaderArraysPerEngineCount = pdev->info.max_sa_per_se;
1662    p->computeUnitsPerShaderArray = pdev->info.min_good_cu_per_sa;
1663    p->simdPerComputeUnit = pdev->info.num_simd_per_compute_unit;
1664    p->wavefrontsPerSimd = pdev->info.max_waves_per_simd;
1665    p->wavefrontSize = 64;
1666 
1667    /* SGPR. */
1668    p->sgprsPerSimd = pdev->info.num_physical_sgprs_per_simd;
1669    p->minSgprAllocation = pdev->info.min_sgpr_alloc;
1670    p->maxSgprAllocation = pdev->info.max_sgpr_alloc;
1671    p->sgprAllocationGranularity = pdev->info.sgpr_alloc_granularity;
1672 
1673    /* VGPR. */
1674    p->vgprsPerSimd = pdev->info.num_physical_wave64_vgprs_per_simd;
1675    p->minVgprAllocation = pdev->info.min_wave64_vgpr_alloc;
1676    p->maxVgprAllocation = pdev->info.max_vgpr_alloc;
1677    p->vgprAllocationGranularity = pdev->info.wave64_vgpr_alloc_granularity;
1678 
1679    /* VK_AMD_shader_core_properties2 */
1680    p->shaderCoreFeatures = 0;
1681    p->activeComputeUnitCount = pdev->info.num_cu;
1682 
1683    /* VK_KHR_vertex_attribute_divisor */
1684    p->maxVertexAttribDivisor = UINT32_MAX;
1685    p->supportsNonZeroFirstInstance = true;
1686 
1687    /* VK_EXT_conservative_rasterization */
1688    p->primitiveOverestimationSize = 0;
1689    p->maxExtraPrimitiveOverestimationSize = 0;
1690    p->extraPrimitiveOverestimationSizeGranularity = 0;
1691    p->primitiveUnderestimation = true;
1692    p->conservativePointAndLineRasterization = false;
1693    p->degenerateTrianglesRasterized = true;
1694    p->degenerateLinesRasterized = false;
1695    p->fullyCoveredFragmentShaderInputVariable = true;
1696    p->conservativeRasterizationPostDepthCoverage = false;
1697 
1698    /* VK_EXT_pci_bus_info */
1699 #ifndef _WIN32
1700    p->pciDomain = pdev->bus_info.domain;
1701    p->pciBus = pdev->bus_info.bus;
1702    p->pciDevice = pdev->bus_info.dev;
1703    p->pciFunction = pdev->bus_info.func;
1704 #endif
1705 
1706    /* VK_EXT_transform_feedback */
1707    p->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1708    p->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1709    p->maxTransformFeedbackBufferSize = UINT32_MAX;
1710    p->maxTransformFeedbackStreamDataSize = 512;
1711    p->maxTransformFeedbackBufferDataSize = 512;
1712    p->maxTransformFeedbackBufferDataStride = 512;
1713    p->transformFeedbackQueries = true;
1714    p->transformFeedbackStreamsLinesTriangles = true;
1715    p->transformFeedbackRasterizationStreamSelect = false;
1716    p->transformFeedbackDraw = true;
1717 
1718    /* VK_EXT_sample_locations */
1719    p->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
1720    p->maxSampleLocationGridSize = (VkExtent2D){2, 2};
1721    p->sampleLocationCoordinateRange[0] = 0.0f;
1722    p->sampleLocationCoordinateRange[1] = 0.9375f;
1723    p->sampleLocationSubPixelBits = 4;
1724    p->variableSampleLocations = false;
1725 
1726    /* VK_KHR_line_rasterization */
1727    p->lineSubPixelPrecisionBits = 4;
1728 
1729    /* VK_EXT_robustness2 */
1730    p->robustStorageBufferAccessSizeAlignment = 4;
1731    p->robustUniformBufferAccessSizeAlignment = 4;
1732 
1733    /* VK_EXT_custom_border_color */
1734    p->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
1735 
1736    /* VK_KHR_fragment_shading_rate */
1737    if (radv_vrs_attachment_enabled(pdev)) {
1738       p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1739       p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1740    } else {
1741       p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1742       p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1743    }
1744    p->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1745    p->primitiveFragmentShadingRateWithMultipleViewports = true;
1746    p->layeredShadingRateAttachments = false; /* TODO */
1747    p->fragmentShadingRateNonTrivialCombinerOps = true;
1748    p->maxFragmentSize = (VkExtent2D){2, 2};
1749    p->maxFragmentSizeAspectRatio = 2;
1750    p->maxFragmentShadingRateCoverageSamples = 32;
1751    p->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
1752    p->fragmentShadingRateWithShaderDepthStencilWrites = !pdev->info.has_vrs_ds_export_bug;
1753    p->fragmentShadingRateWithSampleMask = true;
1754    p->fragmentShadingRateWithShaderSampleMask = false;
1755    p->fragmentShadingRateWithConservativeRasterization = true;
1756    p->fragmentShadingRateWithFragmentShaderInterlock = pdev->info.gfx_level >= GFX11 && radv_has_pops(pdev);
1757    p->fragmentShadingRateWithCustomSampleLocations = false;
1758    p->fragmentShadingRateStrictMultiplyCombiner = true;
1759 
1760    /* VK_EXT_provoking_vertex */
1761    p->provokingVertexModePerPipeline = true;
1762    p->transformFeedbackPreservesTriangleFanProvokingVertex = true;
1763 
1764    /* VK_KHR_acceleration_structure */
1765    p->maxGeometryCount = (1 << 24) - 1;
1766    p->maxInstanceCount = (1 << 24) - 1;
1767    p->maxPrimitiveCount = (1 << 29) - 1;
1768    p->maxPerStageDescriptorAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1769    p->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1770    p->maxDescriptorSetAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1771    p->maxDescriptorSetUpdateAfterBindAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1772    p->minAccelerationStructureScratchOffsetAlignment = 128;
1773 
1774    /* VK_EXT_physical_device_drm */
1775 #ifndef _WIN32
1776    if (pdev->available_nodes & (1 << DRM_NODE_PRIMARY)) {
1777       p->drmHasPrimary = true;
1778       p->drmPrimaryMajor = (int64_t)major(pdev->primary_devid);
1779       p->drmPrimaryMinor = (int64_t)minor(pdev->primary_devid);
1780    } else {
1781       p->drmHasPrimary = false;
1782    }
1783    if (pdev->available_nodes & (1 << DRM_NODE_RENDER)) {
1784       p->drmHasRender = true;
1785       p->drmRenderMajor = (int64_t)major(pdev->render_devid);
1786       p->drmRenderMinor = (int64_t)minor(pdev->render_devid);
1787    } else {
1788       p->drmHasRender = false;
1789    }
1790 #endif
1791 
1792    /* VK_EXT_multi_draw */
1793    p->maxMultiDrawCount = 2048;
1794 
1795    /* VK_KHR_ray_tracing_pipeline */
1796 
1797    p->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
1798    p->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
1799    p->maxShaderGroupStride = 16384; /* dummy */
1800    /* This isn't strictly necessary, but Doom Eternal breaks if the
1801     * alignment is any lower. */
1802    p->shaderGroupBaseAlignment = RADV_RT_HANDLE_SIZE;
1803    p->shaderGroupHandleCaptureReplaySize = sizeof(struct radv_rt_capture_replay_handle);
1804    p->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
1805    p->shaderGroupHandleAlignment = 16;
1806    p->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
1807 
1808    /* VK_EXT_shader_module_identifier */
1809    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1810    memcpy(p->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
1811           sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1812 
1813    /* VK_KHR_performance_query */
1814    p->allowCommandBufferQueryCopies = false;
1815 
1816    /* VK_NV_device_generated_commands */
1817    p->maxIndirectCommandsStreamCount = 1;
1818    p->maxIndirectCommandsStreamStride = UINT32_MAX;
1819    p->maxIndirectCommandsTokenCount = 512;
1820    p->maxIndirectCommandsTokenOffset = UINT16_MAX;
1821    p->minIndirectCommandsBufferOffsetAlignment = 4;
1822    p->minSequencesCountBufferOffsetAlignment = 4;
1823    p->minSequencesIndexBufferOffsetAlignment = 4;
1824    /* Don't support even a shader group count = 1 until we support shader
1825     * overrides during pipeline creation. */
1826    p->maxGraphicsShaderGroupCount = 0;
1827    /* MSB reserved for signalling indirect count enablement. */
1828    p->maxIndirectSequenceCount = UINT32_MAX >> 1;
1829 
1830    /* VK_EXT_graphics_pipeline_library */
1831    p->graphicsPipelineLibraryFastLinking = true;
1832    p->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1833 
1834    /* VK_EXT_mesh_shader */
1835    p->maxTaskWorkGroupTotalCount = 4194304; /* 2^22 min required */
1836    p->maxTaskWorkGroupCount[0] = 65535;
1837    p->maxTaskWorkGroupCount[1] = 65535;
1838    p->maxTaskWorkGroupCount[2] = 65535;
1839    p->maxTaskWorkGroupInvocations = 1024;
1840    p->maxTaskWorkGroupSize[0] = 1024;
1841    p->maxTaskWorkGroupSize[1] = 1024;
1842    p->maxTaskWorkGroupSize[2] = 1024;
1843    p->maxTaskPayloadSize = 16384; /* 16K min required */
1844    p->maxTaskSharedMemorySize = 65536;
1845    p->maxTaskPayloadAndSharedMemorySize = 65536;
1846 
1847    p->maxMeshWorkGroupTotalCount = 4194304; /* 2^22 min required */
1848    p->maxMeshWorkGroupCount[0] = 65535;
1849    p->maxMeshWorkGroupCount[1] = 65535;
1850    p->maxMeshWorkGroupCount[2] = 65535;
1851    p->maxMeshWorkGroupInvocations = 256; /* Max NGG HW limit */
1852    p->maxMeshWorkGroupSize[0] = 256;
1853    p->maxMeshWorkGroupSize[1] = 256;
1854    p->maxMeshWorkGroupSize[2] = 256;
1855    p->maxMeshOutputMemorySize = 32 * 1024;                                                    /* 32K min required */
1856    p->maxMeshSharedMemorySize = 28672;                                                        /* 28K min required */
1857    p->maxMeshPayloadAndSharedMemorySize = p->maxTaskPayloadSize + p->maxMeshSharedMemorySize; /* 28K min required */
1858    p->maxMeshPayloadAndOutputMemorySize = p->maxTaskPayloadSize + p->maxMeshOutputMemorySize; /* 47K min required */
1859    p->maxMeshOutputComponents = 128; /* 32x vec4 min required */
1860    p->maxMeshOutputVertices = 256;
1861    p->maxMeshOutputPrimitives = 256;
1862    p->maxMeshOutputLayers = 8;
1863    p->maxMeshMultiviewViewCount = MAX_VIEWS;
1864    p->meshOutputPerVertexGranularity = 1;
1865    p->meshOutputPerPrimitiveGranularity = 1;
1866 
1867    p->maxPreferredTaskWorkGroupInvocations = 64;
1868    p->maxPreferredMeshWorkGroupInvocations = 128;
1869    p->prefersLocalInvocationVertexOutput = true;
1870    p->prefersLocalInvocationPrimitiveOutput = true;
1871    p->prefersCompactVertexOutput = true;
1872    p->prefersCompactPrimitiveOutput = false;
1873 
1874    /* VK_EXT_extended_dynamic_state3 */
1875    p->dynamicPrimitiveTopologyUnrestricted = false;
1876 
1877    /* VK_EXT_descriptor_buffer */
1878    p->combinedImageSamplerDescriptorSingleArray = true;
1879    p->bufferlessPushDescriptors = true;
1880    p->allowSamplerImageViewPostSubmitCreation = false;
1881    p->descriptorBufferOffsetAlignment = 4;
1882    p->maxDescriptorBufferBindings = MAX_SETS;
1883    p->maxResourceDescriptorBufferBindings = MAX_SETS;
1884    p->maxSamplerDescriptorBufferBindings = MAX_SETS;
1885    p->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1886    p->maxEmbeddedImmutableSamplers = radv_max_descriptor_set_size();
1887    p->bufferCaptureReplayDescriptorDataSize = 0;
1888    p->imageCaptureReplayDescriptorDataSize = 0;
1889    p->imageViewCaptureReplayDescriptorDataSize = 0;
1890    p->samplerCaptureReplayDescriptorDataSize = 0;
1891    p->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1892    p->samplerDescriptorSize = 16;
1893    p->combinedImageSamplerDescriptorSize = 96;
1894    p->sampledImageDescriptorSize = 64;
1895    p->storageImageDescriptorSize = 32;
1896    p->uniformTexelBufferDescriptorSize = 16;
1897    p->robustUniformTexelBufferDescriptorSize = 16;
1898    p->storageTexelBufferDescriptorSize = 16;
1899    p->robustStorageTexelBufferDescriptorSize = 16;
1900    p->uniformBufferDescriptorSize = 16;
1901    p->robustUniformBufferDescriptorSize = 16;
1902    p->storageBufferDescriptorSize = 16;
1903    p->robustStorageBufferDescriptorSize = 16;
1904    p->inputAttachmentDescriptorSize = 64;
1905    p->accelerationStructureDescriptorSize = 16;
1906    p->maxSamplerDescriptorBufferRange = UINT32_MAX;
1907    p->maxResourceDescriptorBufferRange = UINT32_MAX;
1908    p->samplerDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1909    p->resourceDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1910    p->descriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1911 
1912    /* VK_KHR_fragment_shader_barycentric */
1913    p->triStripVertexOrderIndependentOfProvokingVertex = false;
1914 
1915    /* VK_EXT_pipeline_robustness */
1916    p->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1917    p->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1918    p->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1919    p->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1920 
1921    /* VK_KHR_maintenance5 */
1922    p->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1923    p->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1924    p->depthStencilSwizzleOneSupport = true;
1925    p->polygonModePointSize = true;
1926    p->nonStrictSinglePixelWideLinesUseParallelogram = true;
1927    p->nonStrictWideLinesUseParallelogram = true;
1928 
1929    /* VK_KHR_cooperative_matrix */
1930    p->cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
1931 
1932    /* VK_KHR_maintenance6 */
1933    p->blockTexelViewCompatibleMultipleLayers = true;
1934    p->maxCombinedImageSamplerDescriptorCount = 1;
1935    p->fragmentShadingRateClampCombinerInputs = true;
1936 
1937    /* VK_EXT_shader_object */
1938    radv_device_get_cache_uuid(pdev, p->shaderBinaryUUID);
1939    p->shaderBinaryVersion = 1;
1940 
1941    /* VK_EXT_map_memory_placed */
1942    uint64_t os_page_size = 4096;
1943    os_get_page_size(&os_page_size);
1944    p->minPlacedMemoryMapAlignment = os_page_size;
1945 
1946    /* VK_EXT_nested_command_buffer */
1947    p->maxCommandBufferNestingLevel = UINT32_MAX;
1948 
1949    /* VK_EXT_legacy_vertex_attributes */
1950    p->nativeUnalignedPerformance = false;
1951 
1952    /* VK_MESA_image_alignment_control */
1953    p->supportedImageAlignmentMask = (4 * 1024) | (64 * 1024);
1954    if (gfx11plus)
1955       p->supportedImageAlignmentMask |= 256 * 1024;
1956 
1957    /* VK_KHR_maintenance7 */
1958    p->robustFragmentShadingRateAttachmentAccess = true;
1959    p->separateDepthStencilAttachmentAccess = true;
1960    p->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1961    p->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1962    p->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1963    p->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1964    p->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1965    p->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1966 
1967    /* VK_KHR_pipeline_binary */
1968    p->pipelineBinaryInternalCache = true;
1969    p->pipelineBinaryInternalCacheControl = true;
1970    p->pipelineBinaryPrefersInternalCache = false;
1971    p->pipelineBinaryPrecompiledInternalCache = false;
1972    p->pipelineBinaryCompressedData = false;
1973 
1974    /* VK_KHR_compute_shader_derivatives */
1975    p->meshAndTaskShaderDerivatives = radv_taskmesh_enabled(pdev);
1976 }
1977 
1978 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** pdev_out)1979 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
1980                                 struct radv_physical_device **pdev_out)
1981 {
1982    VkResult result;
1983    int fd = -1;
1984    int master_fd = -1;
1985 
1986 #ifdef _WIN32
1987    assert(drm_device == NULL);
1988 #else
1989    if (drm_device) {
1990       const char *path = drm_device->nodes[DRM_NODE_RENDER];
1991       drmVersionPtr version;
1992 
1993       fd = open(path, O_RDWR | O_CLOEXEC);
1994       if (fd < 0) {
1995          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
1996       }
1997 
1998       version = drmGetVersion(fd);
1999       if (!version) {
2000          close(fd);
2001 
2002          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2003                           "Could not get the kernel driver version for device %s: %m", path);
2004       }
2005 
2006       if (strcmp(version->name, "amdgpu")) {
2007          drmFreeVersion(version);
2008          close(fd);
2009 
2010          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2011                           "Device '%s' is not using the AMDGPU kernel driver: %m", path);
2012       }
2013       drmFreeVersion(version);
2014 
2015       if (instance->debug_flags & RADV_DEBUG_STARTUP)
2016          fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
2017    }
2018 #endif
2019 
2020    struct radv_physical_device *pdev =
2021       vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*pdev), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2022    if (!pdev) {
2023       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2024       goto fail_fd;
2025    }
2026 
2027    struct vk_physical_device_dispatch_table dispatch_table;
2028    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
2029    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
2030 
2031    result = vk_physical_device_init(&pdev->vk, &instance->vk, NULL, NULL, NULL, &dispatch_table);
2032    if (result != VK_SUCCESS) {
2033       goto fail_alloc;
2034    }
2035 
2036 #ifdef _WIN32
2037    pdev->ws = radv_null_winsys_create();
2038 #else
2039    if (drm_device) {
2040       bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2041 
2042       pdev->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid);
2043    } else {
2044       pdev->ws = radv_null_winsys_create();
2045    }
2046 #endif
2047 
2048    if (!pdev->ws) {
2049       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
2050       goto fail_base;
2051    }
2052 
2053    pdev->vk.supported_sync_types = pdev->ws->get_sync_types(pdev->ws);
2054 
2055 #ifndef _WIN32
2056    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
2057       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
2058       if (master_fd >= 0) {
2059          uint32_t accel_working = 0;
2060          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
2061                                            .return_size = sizeof(accel_working),
2062                                            .query = AMDGPU_INFO_ACCEL_WORKING};
2063 
2064          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
2065              !accel_working) {
2066             close(master_fd);
2067             master_fd = -1;
2068          }
2069       }
2070    }
2071 #endif
2072 
2073    pdev->master_fd = master_fd;
2074    pdev->local_fd = fd;
2075    pdev->ws->query_info(pdev->ws, &pdev->info);
2076 
2077    pdev->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
2078 #if !AMD_LLVM_AVAILABLE
2079    if (pdev->use_llvm) {
2080       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
2081                       "enabled at build time.\n");
2082       abort();
2083    }
2084 #endif
2085 
2086 #if DETECT_OS_ANDROID
2087    pdev->emulate_etc2 = !pdev->info.has_etc_support;
2088    pdev->emulate_astc = true;
2089 #else
2090    pdev->emulate_etc2 = !pdev->info.has_etc_support && instance->drirc.vk_require_etc2;
2091    pdev->emulate_astc = instance->drirc.vk_require_astc;
2092 #endif
2093 
2094    snprintf(pdev->name, sizeof(pdev->name), "AMD RADV %s%s", pdev->info.name, radv_get_compiler_string(pdev));
2095 
2096    const char *marketing_name = pdev->ws->get_chip_name(pdev->ws);
2097    snprintf(pdev->marketing_name, sizeof(pdev->name), "%s (RADV %s%s)", marketing_name ? marketing_name : "AMD Unknown",
2098             pdev->info.name, radv_get_compiler_string(pdev));
2099 
2100    if (!radv_is_conformant(pdev))
2101       vk_warn_non_conformant_implementation("radv");
2102 
2103    radv_get_driver_uuid(&pdev->driver_uuid);
2104    radv_get_device_uuid(&pdev->info, &pdev->device_uuid);
2105 
2106    pdev->dcc_msaa_allowed = (instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
2107 
2108    pdev->use_fmask = pdev->info.gfx_level < GFX11 && !(instance->debug_flags & RADV_DEBUG_NO_FMASK);
2109 
2110    pdev->use_ngg = (pdev->info.gfx_level >= GFX10 && pdev->info.family != CHIP_NAVI14 &&
2111                     !(instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
2112                    pdev->info.gfx_level >= GFX11;
2113 
2114    /* TODO: Investigate if NGG culling helps on GFX11. */
2115    pdev->use_ngg_culling = pdev->use_ngg && pdev->info.max_render_backends > 1 &&
2116                            (pdev->info.gfx_level == GFX10_3 || (instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
2117                            !(instance->debug_flags & RADV_DEBUG_NO_NGGC);
2118 
2119    pdev->use_ngg_streamout = pdev->info.gfx_level >= GFX11;
2120 
2121    pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
2122 
2123    pdev->mesh_fast_launch_2 = pdev->info.gfx_level >= GFX11;
2124 
2125    pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;
2126 
2127    /* Determine the number of threads per wave for all stages. */
2128    pdev->cs_wave_size = 64;
2129    pdev->ps_wave_size = 64;
2130    pdev->ge_wave_size = 64;
2131    pdev->rt_wave_size = 64;
2132 
2133    if (pdev->info.gfx_level >= GFX10) {
2134       if (instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
2135          pdev->cs_wave_size = 32;
2136 
2137       /* For pixel shaders, wave64 is recommended. */
2138       if (instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
2139          pdev->ps_wave_size = 32;
2140 
2141       if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
2142          pdev->ge_wave_size = 32;
2143 
2144       /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
2145        * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
2146        * wave32 VOPD for VALU dependent code.
2147        * (as well as the SALU count becoming more problematic with wave32)
2148        */
2149       if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11)
2150          pdev->rt_wave_size = 32;
2151 
2152       if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || instance->drirc.force_rt_wave64)
2153          pdev->rt_wave_size = 64;
2154    }
2155 
2156    radv_probe_video_decode(pdev);
2157    radv_probe_video_encode(pdev);
2158 
2159    pdev->max_shared_size = pdev->info.gfx_level >= GFX7 ? 65536 : 32768;
2160 
2161    radv_physical_device_init_mem_types(pdev);
2162 
2163    radv_physical_device_get_supported_extensions(pdev, &pdev->vk.supported_extensions);
2164    radv_physical_device_get_features(pdev, &pdev->vk.supported_features);
2165 
2166    radv_get_nir_options(pdev);
2167 
2168 #ifndef _WIN32
2169    if (drm_device) {
2170       struct stat primary_stat = {0}, render_stat = {0};
2171 
2172       pdev->available_nodes = drm_device->available_nodes;
2173       pdev->bus_info = *drm_device->businfo.pci;
2174 
2175       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
2176           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
2177          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
2178                             drm_device->nodes[DRM_NODE_PRIMARY]);
2179          goto fail_perfcounters;
2180       }
2181       pdev->primary_devid = primary_stat.st_rdev;
2182 
2183       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
2184           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
2185          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
2186                             drm_device->nodes[DRM_NODE_RENDER]);
2187          goto fail_perfcounters;
2188       }
2189       pdev->render_devid = render_stat.st_rdev;
2190    }
2191 #endif
2192 
2193    radv_physical_device_init_cache_key(pdev);
2194 
2195    if (radv_device_get_cache_uuid(pdev, pdev->cache_uuid)) {
2196       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
2197       goto fail_wsi;
2198    }
2199 
2200    /* The gpu id is already embedded in the uuid so we just pass "radv"
2201     * when creating the cache.
2202     */
2203    char buf[VK_UUID_SIZE * 2 + 1];
2204    mesa_bytes_to_hex(buf, pdev->cache_uuid, VK_UUID_SIZE);
2205    pdev->vk.disk_cache = disk_cache_create(pdev->name, buf, 0);
2206 
2207    radv_get_physical_device_properties(pdev);
2208 
2209    if ((instance->debug_flags & RADV_DEBUG_INFO))
2210       ac_print_gpu_info(&pdev->info, stdout);
2211 
2212    radv_init_physical_device_decoder(pdev);
2213    radv_init_physical_device_encoder(pdev);
2214 
2215    radv_physical_device_init_queue_table(pdev);
2216 
2217    /* We don't check the error code, but later check if it is initialized. */
2218    ac_init_perfcounters(&pdev->info, false, false, &pdev->ac_perfcounters);
2219 
2220    /* The WSI is structured as a layer on top of the driver, so this has
2221     * to be the last part of initialization (at least until we get other
2222     * semi-layers).
2223     */
2224    result = radv_init_wsi(pdev);
2225    if (result != VK_SUCCESS) {
2226       vk_error(instance, result);
2227       goto fail_perfcounters;
2228    }
2229 
2230    pdev->gs_table_depth = ac_get_gs_table_depth(pdev->info.gfx_level, pdev->info.family);
2231 
2232    ac_get_hs_info(&pdev->info, &pdev->hs);
2233    ac_get_task_info(&pdev->info, &pdev->task_info);
2234    radv_get_binning_settings(pdev, &pdev->binning_settings);
2235 
2236    if (pdev->info.has_distributed_tess) {
2237       if (pdev->info.family == CHIP_FIJI || pdev->info.family >= CHIP_POLARIS10)
2238          pdev->tess_distribution_mode = V_028B6C_TRAPEZOIDS;
2239       else
2240          pdev->tess_distribution_mode = V_028B6C_DONUTS;
2241    } else {
2242       pdev->tess_distribution_mode = V_028B6C_NO_DIST;
2243    }
2244 
2245    *pdev_out = pdev;
2246 
2247    return VK_SUCCESS;
2248 
2249 fail_perfcounters:
2250    ac_destroy_perfcounters(&pdev->ac_perfcounters);
2251    disk_cache_destroy(pdev->vk.disk_cache);
2252 fail_wsi:
2253    pdev->ws->destroy(pdev->ws);
2254 fail_base:
2255    vk_physical_device_finish(&pdev->vk);
2256 fail_alloc:
2257    vk_free(&instance->vk.alloc, pdev);
2258 fail_fd:
2259    if (fd != -1)
2260       close(fd);
2261    if (master_fd != -1)
2262       close(master_fd);
2263    return result;
2264 }
2265 
2266 VkResult
create_null_physical_device(struct vk_instance * vk_instance)2267 create_null_physical_device(struct vk_instance *vk_instance)
2268 {
2269    struct radv_instance *instance = container_of(vk_instance, struct radv_instance, vk);
2270    struct radv_physical_device *pdev;
2271 
2272    VkResult result = radv_physical_device_try_create(instance, NULL, &pdev);
2273    if (result != VK_SUCCESS)
2274       return result;
2275 
2276    list_addtail(&pdev->vk.link, &instance->vk.physical_devices.list);
2277    return VK_SUCCESS;
2278 }
2279 
2280 VkResult
create_drm_physical_device(struct vk_instance * vk_instance,struct _drmDevice * device,struct vk_physical_device ** out)2281 create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
2282 {
2283 #ifndef _WIN32
2284    if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI ||
2285        device->deviceinfo.pci->vendor_id != ATI_VENDOR_ID)
2286       return VK_ERROR_INCOMPATIBLE_DRIVER;
2287 
2288    return radv_physical_device_try_create((struct radv_instance *)vk_instance, device,
2289                                           (struct radv_physical_device **)out);
2290 #else
2291    return VK_SUCCESS;
2292 #endif
2293 }
2294 
2295 void
radv_physical_device_destroy(struct vk_physical_device * vk_device)2296 radv_physical_device_destroy(struct vk_physical_device *vk_device)
2297 {
2298    struct radv_physical_device *pdev = container_of(vk_device, struct radv_physical_device, vk);
2299    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2300 
2301    radv_finish_wsi(pdev);
2302    ac_destroy_perfcounters(&pdev->ac_perfcounters);
2303    pdev->ws->destroy(pdev->ws);
2304    disk_cache_destroy(pdev->vk.disk_cache);
2305    if (pdev->local_fd != -1)
2306       close(pdev->local_fd);
2307    if (pdev->master_fd != -1)
2308       close(pdev->master_fd);
2309    vk_physical_device_finish(&pdev->vk);
2310    vk_free(&instance->vk.alloc, pdev);
2311 }
2312 
2313 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdev,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2314 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdev, uint32_t *pCount,
2315                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2316 {
2317    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2318    int num_queue_families = 1;
2319    int idx;
2320    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2321       num_queue_families++;
2322 
2323    if (pdev->video_decode_enabled) {
2324       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0)
2325          num_queue_families++;
2326    }
2327 
2328    if (radv_transfer_queue_enabled(pdev)) {
2329       num_queue_families++;
2330    }
2331 
2332    if (pdev->video_encode_enabled) {
2333      if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0)
2334        num_queue_families++;
2335    }
2336 
2337    if (radv_sparse_queue_enabled(pdev)) {
2338       num_queue_families++;
2339    }
2340 
2341    if (pQueueFamilyProperties == NULL) {
2342       *pCount = num_queue_families;
2343       return;
2344    }
2345 
2346    if (!*pCount)
2347       return;
2348 
2349    idx = 0;
2350    if (*pCount >= 1) {
2351       VkQueueFlags gfx_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2352       if (!radv_sparse_queue_enabled(pdev))
2353          gfx_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2354       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2355          .queueFlags = gfx_flags,
2356          .queueCount = 1,
2357          .timestampValidBits = 64,
2358          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2359       };
2360       idx++;
2361    }
2362 
2363    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2364       VkQueueFlags compute_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2365       if (!radv_sparse_queue_enabled(pdev))
2366          compute_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2367       if (*pCount > idx) {
2368          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2369             .queueFlags = compute_flags,
2370             .queueCount = pdev->info.ip[AMD_IP_COMPUTE].num_queues,
2371             .timestampValidBits = 64,
2372             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2373          };
2374          idx++;
2375       }
2376    }
2377 
2378    if (pdev->video_decode_enabled) {
2379       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
2380          if (*pCount > idx) {
2381             *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2382                .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2383                .queueCount = pdev->info.ip[pdev->vid_decode_ip].num_queues,
2384                .timestampValidBits = 0,
2385                .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2386             };
2387             idx++;
2388          }
2389       }
2390    }
2391 
2392    if (radv_transfer_queue_enabled(pdev)) {
2393       if (*pCount > idx) {
2394          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2395             .queueFlags = VK_QUEUE_TRANSFER_BIT,
2396             .queueCount = pdev->info.ip[AMD_IP_SDMA].num_queues,
2397             .timestampValidBits = 64,
2398             .minImageTransferGranularity = (VkExtent3D){16, 16, 8},
2399          };
2400          idx++;
2401       }
2402    }
2403 
2404    if (pdev->video_encode_enabled) {
2405       if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
2406          if (*pCount > idx) {
2407             *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2408                .queueFlags = VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
2409                .queueCount = pdev->info.ip[AMD_IP_VCN_ENC].num_queues,
2410                .timestampValidBits = 0,
2411                .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2412             };
2413             idx++;
2414          }
2415       }
2416    }
2417 
2418    if (radv_sparse_queue_enabled(pdev)) {
2419       if (*pCount > idx) {
2420          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2421             .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
2422             .queueCount = 1,
2423             .timestampValidBits = 64,
2424             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2425          };
2426          idx++;
2427       }
2428    }
2429 
2430    *pCount = idx;
2431 }
2432 
2433 static const VkQueueGlobalPriorityKHR radv_global_queue_priorities[] = {
2434    VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2435    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2436    VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2437    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2438 };
2439 
2440 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2441 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2442                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2443 {
2444    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2445    if (!pQueueFamilyProperties) {
2446       radv_get_physical_device_queue_family_properties(pdev, pCount, NULL);
2447       return;
2448    }
2449    VkQueueFamilyProperties *properties[] = {
2450       &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
2451       &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
2452       &pQueueFamilyProperties[4].queueFamilyProperties, &pQueueFamilyProperties[5].queueFamilyProperties,
2453    };
2454    radv_get_physical_device_queue_family_properties(pdev, pCount, properties);
2455    assert(*pCount <= 6);
2456 
2457    for (uint32_t i = 0; i < *pCount; i++) {
2458       vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
2459          switch (ext->sType) {
2460          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2461             VkQueueFamilyGlobalPriorityPropertiesKHR *prop = (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2462             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
2463             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2464             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2465             break;
2466          }
2467          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2468             VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2469             prop->queryResultStatusSupport = VK_FALSE;
2470             break;
2471          }
2472          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2473             VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
2474             prop->videoCodecOperations = 0;
2475             if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2476                if (VIDEO_CODEC_H264DEC)
2477                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
2478                if (VIDEO_CODEC_H265DEC)
2479                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2480                if (VIDEO_CODEC_AV1DEC && pdev->info.vcn_ip_version >= VCN_3_0_0 &&
2481                    pdev->info.vcn_ip_version != VCN_3_0_33)
2482                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2483             }
2484             if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2485                if (VIDEO_CODEC_H264ENC)
2486                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR;
2487                if (VIDEO_CODEC_H265ENC)
2488                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2489             }
2490             break;
2491          }
2492          default:
2493             break;
2494          }
2495       }
2496    }
2497 }
2498 
2499 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2500 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2501                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2502 {
2503    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2504    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2505    VkPhysicalDeviceMemoryProperties *memory_properties = &pdev->memory_properties;
2506 
2507    /* For all memory heaps, the computation of budget is as follow:
2508     *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2509     *
2510     * The Vulkan spec 1.1.97 says that the budget should include any
2511     * currently allocated device memory.
2512     *
2513     * Note that the application heap usages are not really accurate (eg.
2514     * in presence of shared buffers).
2515     */
2516    if (!pdev->info.has_dedicated_vram) {
2517       if (instance->drirc.enable_unified_heap_on_apu) {
2518          /* When the heaps are unified, only the visible VRAM heap is exposed on APUs. */
2519          assert(pdev->heaps == RADV_HEAP_VRAM_VIS);
2520          assert(pdev->memory_properties.memoryHeaps[0].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2521          const uint8_t vram_vis_heap_idx = 0;
2522 
2523          /* Get the total heap size which is the visible VRAM heap size. */
2524          uint64_t total_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2525 
2526          /* Get the different memory usages. */
2527          uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2528                                             pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2529          uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2530          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2531          uint64_t total_system_usage =
2532             pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2533          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2534 
2535          /* Compute the total free space that can be allocated for this process across all heaps. */
2536          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2537 
2538          memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
2539          memoryBudget->heapUsage[vram_vis_heap_idx] = total_internal_usage;
2540       } else {
2541          /* On APUs, the driver exposes fake heaps to the application because usually the carveout
2542           * is too small for games but the budgets need to be redistributed accordingly.
2543           */
2544          assert(pdev->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2545          assert(pdev->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2546          assert(pdev->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2547          const uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2548 
2549          /* Get the visible VRAM/GTT heap sizes and internal usages. */
2550          uint64_t gtt_heap_size = pdev->memory_properties.memoryHeaps[gtt_heap_idx].size;
2551          uint64_t vram_vis_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2552 
2553          uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2554                                             pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2555          uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2556 
2557          /* Compute the total heap size, internal and system usage. */
2558          uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2559          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2560          uint64_t total_system_usage =
2561             pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2562 
2563          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2564 
2565          /* Compute the total free space that can be allocated for this process across all heaps. */
2566          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2567 
2568          /* Compute the remaining visible VRAM size for this process. */
2569          uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2570 
2571          /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
2572           * sizes, and align down to the page size to be conservative.
2573           */
2574          vram_vis_free_space =
2575             ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), pdev->info.gart_page_size);
2576          uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2577 
2578          memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2579          memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2580          memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2581          memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2582       }
2583    } else {
2584       unsigned mask = pdev->heaps;
2585       unsigned heap = 0;
2586       while (mask) {
2587          uint64_t internal_usage = 0, system_usage = 0;
2588          unsigned type = 1u << u_bit_scan(&mask);
2589 
2590          switch (type) {
2591          case RADV_HEAP_VRAM:
2592             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2593             system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_USAGE);
2594             break;
2595          case RADV_HEAP_VRAM_VIS:
2596             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS);
2597             if (!(pdev->heaps & RADV_HEAP_VRAM))
2598                internal_usage += pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2599             system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE);
2600             break;
2601          case RADV_HEAP_GTT:
2602             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2603             system_usage = pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2604             break;
2605          }
2606 
2607          uint64_t total_usage = MAX2(internal_usage, system_usage);
2608 
2609          uint64_t free_space = pdev->memory_properties.memoryHeaps[heap].size -
2610                                MIN2(pdev->memory_properties.memoryHeaps[heap].size, total_usage);
2611          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2612          memoryBudget->heapUsage[heap] = internal_usage;
2613          ++heap;
2614       }
2615 
2616       assert(heap == memory_properties->memoryHeapCount);
2617    }
2618 
2619    /* The heapBudget value must be less than or equal to VkMemoryHeap::size for each heap. */
2620    for (uint32_t i = 0; i < memory_properties->memoryHeapCount; i++) {
2621       memoryBudget->heapBudget[i] = MIN2(memory_properties->memoryHeaps[i].size, memoryBudget->heapBudget[i]);
2622    }
2623 
2624    /* The heapBudget and heapUsage values must be zero for array elements
2625     * greater than or equal to
2626     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2627     */
2628    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2629       memoryBudget->heapBudget[i] = 0;
2630       memoryBudget->heapUsage[i] = 0;
2631    }
2632 }
2633 
2634 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2635 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2636                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2637 {
2638    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2639 
2640    pMemoryProperties->memoryProperties = pdev->memory_properties;
2641 
2642    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2643       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2644    if (memory_budget)
2645       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2646 }
2647 
2648 static const VkTimeDomainKHR radv_time_domains[] = {
2649    VK_TIME_DOMAIN_DEVICE_KHR,
2650    VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
2651 #ifdef CLOCK_MONOTONIC_RAW
2652    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
2653 #endif
2654 };
2655 
2656 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)2657 radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice, uint32_t *pTimeDomainCount,
2658                                                   VkTimeDomainKHR *pTimeDomains)
2659 {
2660    int d;
2661    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
2662 
2663    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
2664       vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
2665       {
2666          *i = radv_time_domains[d];
2667       }
2668    }
2669 
2670    return vk_outarray_status(&out);
2671 }
2672 
2673 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2674 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
2675                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
2676 {
2677    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2678 
2679    if (samples & supported_samples) {
2680       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2681    } else {
2682       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
2683    }
2684 }
2685 
2686 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2687 radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
2688                                               VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
2689 {
2690    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
2691                           pFragmentShadingRateCount);
2692 
2693 #define append_rate(w, h, s)                                                                                           \
2694    {                                                                                                                   \
2695       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                                                  \
2696          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,                              \
2697          .sampleCounts = s,                                                                                            \
2698          .fragmentSize = {.width = w, .height = h},                                                                    \
2699       };                                                                                                               \
2700       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;                             \
2701    }
2702 
2703    for (uint32_t x = 2; x >= 1; x--) {
2704       for (uint32_t y = 2; y >= 1; y--) {
2705          VkSampleCountFlagBits samples;
2706 
2707          if (x == 1 && y == 1) {
2708             samples = ~0;
2709          } else {
2710             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2711          }
2712 
2713          append_rate(x, y, samples);
2714       }
2715    }
2716 #undef append_rate
2717 
2718    return vk_outarray_status(&out);
2719 }
2720 
2721 /* VK_EXT_tooling_info */
2722 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice,uint32_t * pToolCount,VkPhysicalDeviceToolProperties * pToolProperties)2723 radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, uint32_t *pToolCount,
2724                                      VkPhysicalDeviceToolProperties *pToolProperties)
2725 {
2726    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2727    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2728    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount);
2729    bool rgp_enabled, rmv_enabled, rra_enabled;
2730    uint32_t tool_count = 0;
2731 
2732    /* RGP */
2733    rgp_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2734    if (rgp_enabled)
2735       tool_count++;
2736 
2737    /* RMV */
2738    rmv_enabled = instance->vk.trace_mode & VK_TRACE_MODE_RMV;
2739    if (rmv_enabled)
2740       tool_count++;
2741 
2742    /* RRA */
2743    rra_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RRA;
2744    if (rra_enabled)
2745       tool_count++;
2746 
2747    if (!pToolProperties) {
2748       *pToolCount = tool_count;
2749       return VK_SUCCESS;
2750    }
2751 
2752    if (rgp_enabled) {
2753       VkPhysicalDeviceToolProperties tool = {
2754          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2755          .name = "Radeon GPU Profiler",
2756          .version = "1.15",
2757          .description = "A ground-breaking low-level optimization tool that provides detailed "
2758                         "information on Radeon GPUs.",
2759          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT |
2760                      /* VK_EXT_debug_marker is only exposed if SQTT is enabled. */
2761                      VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT | VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT,
2762       };
2763       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2764    }
2765 
2766    if (rmv_enabled) {
2767       VkPhysicalDeviceToolProperties tool = {
2768          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2769          .name = "Radeon Memory Visualizer",
2770          .version = "1.6",
2771          .description = "A tool to allow you to gain a deep understanding of how your application "
2772                         "uses memory for graphics resources.",
2773          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2774       };
2775       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2776    }
2777 
2778    if (rra_enabled) {
2779       VkPhysicalDeviceToolProperties tool = {
2780          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2781          .name = "Radeon Raytracing Analyzer",
2782          .version = "1.2",
2783          .description = "A tool to investigate the performance of your ray tracing applications and "
2784                         "highlight potential bottlenecks.",
2785          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2786       };
2787       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2788    }
2789 
2790    return vk_outarray_status(&out);
2791 }
2792 
2793 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2794 radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
2795                                                      VkCooperativeMatrixPropertiesKHR *pProperties)
2796 {
2797    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2798 
2799    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2800    {
2801       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2802                                                      .MSize = 16,
2803                                                      .NSize = 16,
2804                                                      .KSize = 16,
2805                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2806                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2807                                                      .CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2808                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2809                                                      .saturatingAccumulation = false,
2810                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2811    }
2812 
2813    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2814    {
2815       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2816                                                      .MSize = 16,
2817                                                      .NSize = 16,
2818                                                      .KSize = 16,
2819                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2820                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2821                                                      .CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2822                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2823                                                      .saturatingAccumulation = false,
2824                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2825    }
2826 
2827    for (unsigned asigned = 0; asigned < 2; asigned++) {
2828       for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
2829          for (unsigned csigned = 0; csigned < 2; csigned++) {
2830             for (unsigned saturate = 0; saturate < 2; saturate++) {
2831                if (!csigned && saturate)
2832                   continue; /* The HW only supports signed acc. */
2833                vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2834                {
2835                   *p = (struct VkCooperativeMatrixPropertiesKHR){
2836                      .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2837                      .MSize = 16,
2838                      .NSize = 16,
2839                      .KSize = 16,
2840                      .AType = asigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2841                      .BType = bsigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2842                      .CType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2843                      .ResultType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2844                      .saturatingAccumulation = saturate,
2845                      .scope = VK_SCOPE_SUBGROUP_KHR};
2846                }
2847             }
2848          }
2849       }
2850    }
2851 
2852    return vk_outarray_status(&out);
2853 }
2854