xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_physical_device.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright © 2024 Intel Corporation
2  * SPDX-License-Identifier: MIT
3  */
4 
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8 
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11 
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14 
15 #include "perf/intel_perf.h"
16 
17 #include "git_sha1.h"
18 
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21 
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27 
28 /* This is probably far to big but it reflects the max size used for messages
29  * in OpenGLs KHR_debug.
30  */
31 #define MAX_DEBUG_MESSAGE_LENGTH    4096
32 
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36    char str[MAX_DEBUG_MESSAGE_LENGTH];
37    struct anv_device *device = (struct anv_device *)data;
38    UNUSED struct anv_instance *instance = device->physical->instance;
39 
40    va_list args;
41    va_start(args, fmt);
42    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43    va_end(args);
44 
45    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47 
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51    va_list args;
52    va_start(args, fmt);
53 
54    if (INTEL_DEBUG(DEBUG_PERF))
55       mesa_logd_v(fmt, args);
56 
57    va_end(args);
58 }
59 
60 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)61 get_device_extensions(const struct anv_physical_device *device,
62                       struct vk_device_extension_table *ext)
63 {
64    const bool has_syncobj_wait =
65       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
66 
67    const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
68 
69    *ext = (struct vk_device_extension_table) {
70       .KHR_8bit_storage                      = true,
71       .KHR_16bit_storage                     = !device->instance->no_16bit,
72       .KHR_acceleration_structure            = rt_enabled,
73       .KHR_bind_memory2                      = true,
74       .KHR_buffer_device_address             = true,
75       .KHR_calibrated_timestamps             = device->has_reg_timestamp,
76       .KHR_compute_shader_derivatives        = true,
77       .KHR_copy_commands2                    = true,
78       .KHR_cooperative_matrix                = anv_has_cooperative_matrix(device),
79       .KHR_create_renderpass2                = true,
80       .KHR_dedicated_allocation              = true,
81       .KHR_deferred_host_operations          = true,
82       .KHR_depth_stencil_resolve             = true,
83       .KHR_descriptor_update_template        = true,
84       .KHR_device_group                      = true,
85       .KHR_draw_indirect_count               = true,
86       .KHR_driver_properties                 = true,
87       .KHR_dynamic_rendering                 = true,
88       .KHR_external_fence                    = has_syncobj_wait,
89       .KHR_external_fence_fd                 = has_syncobj_wait,
90       .KHR_external_memory                   = true,
91       .KHR_external_memory_fd                = true,
92       .KHR_external_semaphore                = true,
93       .KHR_external_semaphore_fd             = true,
94       .KHR_format_feature_flags2             = true,
95       .KHR_fragment_shading_rate             = device->info.ver >= 11,
96       .KHR_get_memory_requirements2          = true,
97       .KHR_global_priority                   = device->max_context_priority >=
98                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
99       .KHR_image_format_list                 = true,
100       .KHR_imageless_framebuffer             = true,
101 #ifdef ANV_USE_WSI_PLATFORM
102       .KHR_incremental_present               = true,
103 #endif
104       .KHR_index_type_uint8                  = true,
105       .KHR_line_rasterization                = true,
106       .KHR_load_store_op_none                = true,
107       .KHR_maintenance1                      = true,
108       .KHR_maintenance2                      = true,
109       .KHR_maintenance3                      = true,
110       .KHR_maintenance4                      = true,
111       .KHR_maintenance5                      = true,
112       .KHR_maintenance6                      = true,
113       .KHR_maintenance7                      = true,
114       .KHR_map_memory2                       = true,
115       .KHR_multiview                         = true,
116       .KHR_performance_query =
117          device->perf &&
118          (intel_perf_has_hold_preemption(device->perf) ||
119           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
120          device->use_call_secondary,
121       .KHR_pipeline_executable_properties    = true,
122       .KHR_pipeline_library                  = true,
123       /* Hide these behind dri configs for now since we cannot implement it reliably on
124        * all surfaces yet. There is no surface capability query for present wait/id,
125        * but the feature is useful enough to hide behind an opt-in mechanism for now.
126        * If the instance only enables surface extensions that unconditionally support present wait,
127        * we can also expose the extension that way. */
128       .KHR_present_id =
129          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
130          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
131       .KHR_present_wait =
132          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
133          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
134       .KHR_push_descriptor                   = true,
135       .KHR_ray_query                         = rt_enabled,
136       .KHR_ray_tracing_maintenance1          = rt_enabled,
137       .KHR_ray_tracing_pipeline              = rt_enabled,
138       .KHR_ray_tracing_position_fetch        = rt_enabled,
139       .KHR_relaxed_block_layout              = true,
140       .KHR_sampler_mirror_clamp_to_edge      = true,
141       .KHR_sampler_ycbcr_conversion          = true,
142       .KHR_separate_depth_stencil_layouts    = true,
143       .KHR_shader_atomic_int64               = true,
144       .KHR_shader_clock                      = true,
145       .KHR_shader_draw_parameters            = true,
146       .KHR_shader_expect_assume              = true,
147       .KHR_shader_float16_int8               = !device->instance->no_16bit,
148       .KHR_shader_float_controls             = true,
149       .KHR_shader_float_controls2            = true,
150       .KHR_shader_integer_dot_product        = true,
151       .KHR_shader_maximal_reconvergence      = true,
152       .KHR_shader_non_semantic_info          = true,
153       .KHR_shader_quad_control               = true,
154       .KHR_shader_relaxed_extended_instruction = true,
155       .KHR_shader_subgroup_extended_types    = true,
156       .KHR_shader_subgroup_rotate            = true,
157       .KHR_shader_subgroup_uniform_control_flow = true,
158       .KHR_shader_terminate_invocation       = true,
159       .KHR_spirv_1_4                         = true,
160       .KHR_storage_buffer_storage_class      = true,
161 #ifdef ANV_USE_WSI_PLATFORM
162       .KHR_swapchain                         = true,
163       .KHR_swapchain_mutable_format          = true,
164 #endif
165       .KHR_synchronization2                  = true,
166       .KHR_timeline_semaphore                = true,
167       .KHR_uniform_buffer_standard_layout    = true,
168       .KHR_variable_pointers                 = true,
169       .KHR_vertex_attribute_divisor          = true,
170       .KHR_video_queue                       = device->video_decode_enabled || device->video_encode_enabled,
171       .KHR_video_decode_queue                = device->video_decode_enabled,
172       .KHR_video_decode_h264                 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
173       .KHR_video_decode_h265                 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
174       .KHR_video_encode_queue                = device->video_encode_enabled,
175       .KHR_video_encode_h264                 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
176       .KHR_video_encode_h265                 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
177       .KHR_vulkan_memory_model               = true,
178       .KHR_workgroup_memory_explicit_layout  = true,
179       .KHR_zero_initialize_workgroup_memory  = true,
180       .EXT_4444_formats                      = true,
181       .EXT_attachment_feedback_loop_layout   = true,
182       .EXT_attachment_feedback_loop_dynamic_state = true,
183       .EXT_border_color_swizzle              = true,
184       .EXT_buffer_device_address             = true,
185       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
186       .EXT_color_write_enable                = true,
187       .EXT_conditional_rendering             = true,
188       .EXT_conservative_rasterization        = true,
189       .EXT_custom_border_color               = true,
190       .EXT_depth_bias_control                = true,
191       .EXT_depth_clamp_zero_one              = true,
192       .EXT_depth_clip_control                = true,
193       .EXT_depth_range_unrestricted          = device->info.ver >= 20,
194       .EXT_depth_clip_enable                 = true,
195       .EXT_descriptor_buffer                 = true,
196       .EXT_descriptor_indexing               = true,
197 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
198       .EXT_display_control                   = true,
199 #endif
200       .EXT_dynamic_rendering_unused_attachments = true,
201       .EXT_extended_dynamic_state            = true,
202       .EXT_extended_dynamic_state2           = true,
203       .EXT_extended_dynamic_state3           = true,
204       .EXT_external_memory_dma_buf           = true,
205       .EXT_external_memory_host              = true,
206       .EXT_fragment_shader_interlock         = true,
207       .EXT_global_priority                   = device->max_context_priority >=
208                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
209       .EXT_global_priority_query             = device->max_context_priority >=
210                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
211       .EXT_graphics_pipeline_library         = !debug_get_bool_option("ANV_NO_GPL", false),
212       .EXT_host_query_reset                  = true,
213       .EXT_image_2d_view_of_3d               = true,
214       /* Because of Xe2 PAT selected compression and the Vulkan spec
215        * requirement to always return the same memory types for Images with
216        * same properties we can't support EXT_image_compression_control on Xe2+
217        */
218       .EXT_image_compression_control         = device->instance->compression_control_enabled &&
219                                                device->info.ver < 20,
220       .EXT_image_robustness                  = true,
221       .EXT_image_drm_format_modifier         = true,
222       .EXT_image_sliced_view_of_3d           = true,
223       .EXT_image_view_min_lod                = true,
224       .EXT_index_type_uint8                  = true,
225       .EXT_inline_uniform_block              = true,
226       .EXT_legacy_dithering                  = true,
227       .EXT_legacy_vertex_attributes          = true,
228       .EXT_line_rasterization                = true,
229       .EXT_load_store_op_none                = true,
230       .EXT_map_memory_placed                 = device->info.has_mmap_offset,
231       /* Enable the extension only if we have support on both the local &
232        * system memory
233        */
234       .EXT_memory_budget                     = (!device->info.has_local_mem ||
235                                                 device->vram_mappable.available > 0) &&
236                                                device->sys.available,
237       .EXT_mesh_shader                       = device->info.has_mesh_shading,
238       .EXT_mutable_descriptor_type           = true,
239       .EXT_nested_command_buffer             = true,
240       .EXT_non_seamless_cube_map             = true,
241       .EXT_pci_bus_info                      = true,
242       .EXT_physical_device_drm               = true,
243       .EXT_pipeline_creation_cache_control   = true,
244       .EXT_pipeline_creation_feedback        = true,
245       .EXT_pipeline_library_group_handles    = rt_enabled,
246       .EXT_pipeline_robustness               = true,
247       .EXT_post_depth_coverage               = true,
248       .EXT_primitives_generated_query        = true,
249       .EXT_primitive_topology_list_restart   = true,
250       .EXT_private_data                      = true,
251       .EXT_provoking_vertex                  = true,
252       .EXT_queue_family_foreign              = true,
253       .EXT_robustness2                       = true,
254       .EXT_sample_locations                  = true,
255       .EXT_sampler_filter_minmax             = true,
256       .EXT_scalar_block_layout               = true,
257       .EXT_separate_stencil_usage            = true,
258       .EXT_shader_atomic_float               = true,
259       .EXT_shader_atomic_float2              = true,
260       .EXT_shader_demote_to_helper_invocation = true,
261       .EXT_shader_module_identifier          = true,
262       .EXT_shader_replicated_composites      = true,
263       .EXT_shader_stencil_export             = true,
264       .EXT_shader_subgroup_ballot            = true,
265       .EXT_shader_subgroup_vote              = true,
266       .EXT_shader_viewport_index_layer       = true,
267       .EXT_subgroup_size_control             = true,
268 #ifdef ANV_USE_WSI_PLATFORM
269       .EXT_swapchain_maintenance1            = true,
270 #endif
271       .EXT_texel_buffer_alignment            = true,
272       .EXT_tooling_info                      = true,
273       .EXT_transform_feedback                = true,
274       .EXT_vertex_attribute_divisor          = true,
275       .EXT_vertex_input_dynamic_state        = true,
276       .EXT_ycbcr_image_arrays                = true,
277       .AMD_buffer_marker                     = true,
278       .AMD_texture_gather_bias_lod           = device->info.ver >= 20,
279 #if DETECT_OS_ANDROID
280       .ANDROID_external_memory_android_hardware_buffer = true,
281       .ANDROID_native_buffer                 = true,
282 #endif
283       .GOOGLE_decorate_string                = true,
284       .GOOGLE_hlsl_functionality1            = true,
285       .GOOGLE_user_type                      = true,
286       .INTEL_performance_query               = device->perf &&
287                                                intel_perf_has_hold_preemption(device->perf),
288       .INTEL_shader_integer_functions2       = true,
289       .EXT_multi_draw                        = true,
290       .NV_compute_shader_derivatives         = true,
291       .MESA_image_alignment_control          = true,
292       .VALVE_mutable_descriptor_type         = true,
293    };
294 }
295 
296 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)297 get_features(const struct anv_physical_device *pdevice,
298              struct vk_features *features)
299 {
300    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
301 
302    const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
303 
304    const bool mesh_shader =
305       pdevice->vk.supported_extensions.EXT_mesh_shader;
306 
307    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
308 
309    *features = (struct vk_features) {
310       /* Vulkan 1.0 */
311       .robustBufferAccess                       = true,
312       .fullDrawIndexUint32                      = true,
313       .imageCubeArray                           = true,
314       .independentBlend                         = true,
315       .geometryShader                           = true,
316       .tessellationShader                       = true,
317       .sampleRateShading                        = true,
318       .dualSrcBlend                             = true,
319       .logicOp                                  = true,
320       .multiDrawIndirect                        = true,
321       .drawIndirectFirstInstance                = true,
322       .depthClamp                               = true,
323       .depthBiasClamp                           = true,
324       .fillModeNonSolid                         = true,
325       .depthBounds                              = pdevice->info.ver >= 12,
326       .wideLines                                = true,
327       .largePoints                              = true,
328       .alphaToOne                               = true,
329       .multiViewport                            = true,
330       .samplerAnisotropy                        = true,
331       .textureCompressionETC2                   = true,
332       .textureCompressionASTC_LDR               = pdevice->has_astc_ldr ||
333                                                   pdevice->emu_astc_ldr,
334       .textureCompressionBC                     = true,
335       .occlusionQueryPrecise                    = true,
336       .pipelineStatisticsQuery                  = true,
337       .vertexPipelineStoresAndAtomics           = true,
338       .fragmentStoresAndAtomics                 = true,
339       .shaderTessellationAndGeometryPointSize   = true,
340       .shaderImageGatherExtended                = true,
341       .shaderStorageImageExtendedFormats        = true,
342       .shaderStorageImageMultisample            = false,
343       /* Gfx12.5 has all the required format supported in HW for typed
344        * read/writes
345        */
346       .shaderStorageImageReadWithoutFormat      = pdevice->info.verx10 >= 125,
347       .shaderStorageImageWriteWithoutFormat     = true,
348       .shaderUniformBufferArrayDynamicIndexing  = true,
349       .shaderSampledImageArrayDynamicIndexing   = true,
350       .shaderStorageBufferArrayDynamicIndexing  = true,
351       .shaderStorageImageArrayDynamicIndexing   = true,
352       .shaderClipDistance                       = true,
353       .shaderCullDistance                       = true,
354       .shaderFloat64                            = pdevice->info.has_64bit_float ||
355                                                   pdevice->instance->fp64_workaround_enabled,
356       .shaderInt64                              = true,
357       .shaderInt16                              = true,
358       .shaderResourceMinLod                     = true,
359       .shaderResourceResidency                  = has_sparse_or_fake,
360       .sparseBinding                            = has_sparse_or_fake,
361       .sparseResidencyAliased                   = has_sparse_or_fake,
362       .sparseResidencyBuffer                    = has_sparse_or_fake,
363       .sparseResidencyImage2D                   = has_sparse_or_fake,
364       .sparseResidencyImage3D                   = has_sparse_or_fake,
365       .sparseResidency2Samples                  = has_sparse_or_fake,
366       .sparseResidency4Samples                  = has_sparse_or_fake,
367       .sparseResidency8Samples                  = has_sparse_or_fake &&
368                                                   pdevice->info.verx10 != 125,
369       .sparseResidency16Samples                 = has_sparse_or_fake &&
370                                                   pdevice->info.verx10 != 125,
371       .variableMultisampleRate                  = true,
372       .inheritedQueries                         = true,
373 
374       /* Vulkan 1.1 */
375       .storageBuffer16BitAccess            = !pdevice->instance->no_16bit,
376       .uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit,
377       .storagePushConstant16               = true,
378       .storageInputOutput16                = false,
379       .multiview                           = true,
380       .multiviewGeometryShader             = true,
381       .multiviewTessellationShader         = true,
382       .variablePointersStorageBuffer       = true,
383       .variablePointers                    = true,
384       .protectedMemory                     = pdevice->has_protected_contexts,
385       .samplerYcbcrConversion              = true,
386       .shaderDrawParameters                = true,
387 
388       /* Vulkan 1.2 */
389       .samplerMirrorClampToEdge            = true,
390       .drawIndirectCount                   = true,
391       .storageBuffer8BitAccess             = true,
392       .uniformAndStorageBuffer8BitAccess   = true,
393       .storagePushConstant8                = true,
394       .shaderBufferInt64Atomics            = true,
395       .shaderSharedInt64Atomics            = false,
396       .shaderFloat16                       = !pdevice->instance->no_16bit,
397       .shaderInt8                          = !pdevice->instance->no_16bit,
398 
399       .descriptorIndexing                                 = true,
400       .shaderInputAttachmentArrayDynamicIndexing          = false,
401       .shaderUniformTexelBufferArrayDynamicIndexing       = true,
402       .shaderStorageTexelBufferArrayDynamicIndexing       = true,
403       .shaderUniformBufferArrayNonUniformIndexing         = true,
404       .shaderSampledImageArrayNonUniformIndexing          = true,
405       .shaderStorageBufferArrayNonUniformIndexing         = true,
406       .shaderStorageImageArrayNonUniformIndexing          = true,
407       .shaderInputAttachmentArrayNonUniformIndexing       = false,
408       .shaderUniformTexelBufferArrayNonUniformIndexing    = true,
409       .shaderStorageTexelBufferArrayNonUniformIndexing    = true,
410       .descriptorBindingUniformBufferUpdateAfterBind      = true,
411       .descriptorBindingSampledImageUpdateAfterBind       = true,
412       .descriptorBindingStorageImageUpdateAfterBind       = true,
413       .descriptorBindingStorageBufferUpdateAfterBind      = true,
414       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
415       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
416       .descriptorBindingUpdateUnusedWhilePending          = true,
417       .descriptorBindingPartiallyBound                    = true,
418       .descriptorBindingVariableDescriptorCount           = true,
419       .runtimeDescriptorArray                             = true,
420 
421       .samplerFilterMinmax                 = true,
422       .scalarBlockLayout                   = true,
423       .imagelessFramebuffer                = true,
424       .uniformBufferStandardLayout         = true,
425       .shaderSubgroupExtendedTypes         = true,
426       .separateDepthStencilLayouts         = true,
427       .hostQueryReset                      = true,
428       .timelineSemaphore                   = true,
429       .bufferDeviceAddress                 = true,
430       .bufferDeviceAddressCaptureReplay    = true,
431       .bufferDeviceAddressMultiDevice      = false,
432       .vulkanMemoryModel                   = true,
433       .vulkanMemoryModelDeviceScope        = true,
434       .vulkanMemoryModelAvailabilityVisibilityChains = true,
435       .shaderOutputViewportIndex           = true,
436       .shaderOutputLayer                   = true,
437       .subgroupBroadcastDynamicId          = true,
438 
439       /* Vulkan 1.3 */
440       .robustImageAccess = true,
441       .inlineUniformBlock = true,
442       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
443       .pipelineCreationCacheControl = true,
444       .privateData = true,
445       .shaderDemoteToHelperInvocation = true,
446       .shaderTerminateInvocation = true,
447       .subgroupSizeControl = true,
448       .computeFullSubgroups = true,
449       .synchronization2 = true,
450       .textureCompressionASTC_HDR = false,
451       .shaderZeroInitializeWorkgroupMemory = true,
452       .dynamicRendering = true,
453       .shaderIntegerDotProduct = true,
454       .maintenance4 = true,
455 
456       /* VK_EXT_4444_formats */
457       .formatA4R4G4B4 = true,
458       .formatA4B4G4R4 = false,
459 
460       /* VK_KHR_acceleration_structure */
461       .accelerationStructure = rt_enabled,
462       .accelerationStructureCaptureReplay = false, /* TODO */
463       .accelerationStructureIndirectBuild = false, /* TODO */
464       .accelerationStructureHostCommands = false,
465       .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
466 
467       /* VK_EXT_border_color_swizzle */
468       .borderColorSwizzle = true,
469       .borderColorSwizzleFromImage = true,
470 
471       /* VK_EXT_color_write_enable */
472       .colorWriteEnable = true,
473 
474       /* VK_EXT_image_2d_view_of_3d  */
475       .image2DViewOf3D = true,
476       .sampler2DViewOf3D = true,
477 
478       /* VK_EXT_image_sliced_view_of_3d */
479       .imageSlicedViewOf3D = true,
480 
481       /* VK_KHR_compute_shader_derivatives */
482       .computeDerivativeGroupQuads = true,
483       .computeDerivativeGroupLinear = true,
484 
485       /* VK_EXT_conditional_rendering */
486       .conditionalRendering = true,
487       .inheritedConditionalRendering = true,
488 
489       /* VK_EXT_custom_border_color */
490       .customBorderColors = true,
491       .customBorderColorWithoutFormat = true,
492 
493       /* VK_EXT_depth_clamp_zero_one */
494       .depthClampZeroOne = true,
495 
496       /* VK_EXT_depth_clip_enable */
497       .depthClipEnable = true,
498 
499       /* VK_EXT_fragment_shader_interlock */
500       .fragmentShaderSampleInterlock = true,
501       .fragmentShaderPixelInterlock = true,
502       .fragmentShaderShadingRateInterlock = false,
503 
504       /* VK_EXT_global_priority_query */
505       .globalPriorityQuery = true,
506 
507       /* VK_EXT_graphics_pipeline_library */
508       .graphicsPipelineLibrary =
509          pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
510 
511       /* VK_KHR_fragment_shading_rate */
512       .pipelineFragmentShadingRate = true,
513       .primitiveFragmentShadingRate =
514          pdevice->info.has_coarse_pixel_primitive_and_cb,
515       .attachmentFragmentShadingRate =
516          pdevice->info.has_coarse_pixel_primitive_and_cb,
517 
518       /* VK_EXT_image_view_min_lod */
519       .minLod = true,
520 
521       /* VK_EXT_index_type_uint8 */
522       .indexTypeUint8 = true,
523 
524       /* VK_EXT_line_rasterization */
525       /* Rectangular lines must use the strict algorithm, which is not
526        * supported for wide lines prior to ICL.  See rasterization_mode for
527        * details and how the HW states are programmed.
528        */
529       .rectangularLines = pdevice->info.ver >= 10,
530       .bresenhamLines = true,
531       /* Support for Smooth lines with MSAA was removed on gfx11.  From the
532        * BSpec section "Multisample ModesState" table for "AA Line Support
533        * Requirements":
534        *
535        *    GFX10:BUG:######## 	NUM_MULTISAMPLES == 1
536        *
537        * Fortunately, this isn't a case most people care about.
538        */
539       .smoothLines = pdevice->info.ver < 10,
540       .stippledRectangularLines = false,
541       .stippledBresenhamLines = true,
542       .stippledSmoothLines = false,
543 
544       /* VK_NV_mesh_shader */
545       .taskShaderNV = false,
546       .meshShaderNV = false,
547 
548       /* VK_EXT_mesh_shader */
549       .taskShader = mesh_shader,
550       .meshShader = mesh_shader,
551       .multiviewMeshShader = false,
552       .primitiveFragmentShadingRateMeshShader = mesh_shader,
553       .meshShaderQueries = mesh_shader,
554 
555       /* VK_EXT_mutable_descriptor_type */
556       .mutableDescriptorType = true,
557 
558       /* VK_KHR_performance_query */
559       .performanceCounterQueryPools = true,
560       /* HW only supports a single configuration at a time. */
561       .performanceCounterMultipleQueryPools = false,
562 
563       /* VK_KHR_pipeline_executable_properties */
564       .pipelineExecutableInfo = true,
565 
566       /* VK_EXT_primitives_generated_query */
567       .primitivesGeneratedQuery = true,
568       .primitivesGeneratedQueryWithRasterizerDiscard = false,
569       .primitivesGeneratedQueryWithNonZeroStreams = false,
570 
571       /* VK_EXT_pipeline_library_group_handles */
572       .pipelineLibraryGroupHandles = true,
573 
574       /* VK_EXT_provoking_vertex */
575       .provokingVertexLast = true,
576       .transformFeedbackPreservesProvokingVertex = true,
577 
578       /* VK_KHR_ray_query */
579       .rayQuery = rt_enabled,
580 
581       /* VK_KHR_ray_tracing_maintenance1 */
582       .rayTracingMaintenance1 = rt_enabled,
583       .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
584 
585       /* VK_KHR_ray_tracing_pipeline */
586       .rayTracingPipeline = rt_enabled,
587       .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
588       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
589       .rayTracingPipelineTraceRaysIndirect = rt_enabled,
590       .rayTraversalPrimitiveCulling = rt_enabled,
591 
592       /* VK_EXT_robustness2 */
593       .robustBufferAccess2 = true,
594       .robustImageAccess2 = true,
595       .nullDescriptor = true,
596 
597       /* VK_EXT_shader_replicated_composites */
598       .shaderReplicatedComposites = true,
599 
600       /* VK_EXT_shader_atomic_float */
601       .shaderBufferFloat32Atomics =    true,
602       .shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc,
603       .shaderBufferFloat64Atomics =
604          pdevice->info.has_64bit_float && pdevice->info.has_lsc,
605       .shaderBufferFloat64AtomicAdd =  pdevice->info.ver >= 20,
606       .shaderSharedFloat32Atomics =    true,
607       .shaderSharedFloat32AtomicAdd =  false,
608       .shaderSharedFloat64Atomics =    false,
609       .shaderSharedFloat64AtomicAdd =  false,
610       .shaderImageFloat32Atomics =     true,
611       .shaderImageFloat32AtomicAdd =   pdevice->info.ver >= 20,
612       .sparseImageFloat32Atomics =     false,
613       .sparseImageFloat32AtomicAdd =   false,
614 
615       /* VK_EXT_shader_atomic_float2 */
616       .shaderBufferFloat16Atomics      = pdevice->info.has_lsc,
617       .shaderBufferFloat16AtomicAdd    = false,
618       .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
619       .shaderBufferFloat32AtomicMinMax = true,
620       .shaderBufferFloat64AtomicMinMax =
621          pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
622          pdevice->info.ver < 20,
623       .shaderSharedFloat16Atomics      = pdevice->info.has_lsc,
624       .shaderSharedFloat16AtomicAdd    = false,
625       .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
626       .shaderSharedFloat32AtomicMinMax = true,
627       .shaderSharedFloat64AtomicMinMax = false,
628       .shaderImageFloat32AtomicMinMax  = false,
629       .sparseImageFloat32AtomicMinMax  = false,
630 
631       /* VK_KHR_shader_clock */
632       .shaderSubgroupClock = true,
633       .shaderDeviceClock = false,
634 
635       /* VK_INTEL_shader_integer_functions2 */
636       .shaderIntegerFunctions2 = true,
637 
638       /* VK_EXT_shader_module_identifier */
639       .shaderModuleIdentifier = true,
640 
641       /* VK_KHR_shader_subgroup_uniform_control_flow */
642       .shaderSubgroupUniformControlFlow = true,
643 
644       /* VK_EXT_texel_buffer_alignment */
645       .texelBufferAlignment = true,
646 
647       /* VK_EXT_transform_feedback */
648       .transformFeedback = true,
649       .geometryStreams = true,
650 
651       /* VK_KHR_vertex_attribute_divisor */
652       .vertexAttributeInstanceRateDivisor = true,
653       .vertexAttributeInstanceRateZeroDivisor = true,
654 
655       /* VK_KHR_workgroup_memory_explicit_layout */
656       .workgroupMemoryExplicitLayout = true,
657       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
658       .workgroupMemoryExplicitLayout8BitAccess = true,
659       .workgroupMemoryExplicitLayout16BitAccess = true,
660 
661       /* VK_EXT_ycbcr_image_arrays */
662       .ycbcrImageArrays = true,
663 
664       /* VK_EXT_extended_dynamic_state */
665       .extendedDynamicState = true,
666 
667       /* VK_EXT_extended_dynamic_state2 */
668       .extendedDynamicState2 = true,
669       .extendedDynamicState2LogicOp = true,
670       .extendedDynamicState2PatchControlPoints = true,
671 
672       /* VK_EXT_extended_dynamic_state3 */
673       .extendedDynamicState3PolygonMode = true,
674       .extendedDynamicState3TessellationDomainOrigin = true,
675       .extendedDynamicState3RasterizationStream = true,
676       .extendedDynamicState3LineStippleEnable = true,
677       .extendedDynamicState3LineRasterizationMode = true,
678       .extendedDynamicState3LogicOpEnable = true,
679       .extendedDynamicState3AlphaToOneEnable = true,
680       .extendedDynamicState3DepthClipEnable = true,
681       .extendedDynamicState3DepthClampEnable = true,
682       .extendedDynamicState3DepthClipNegativeOneToOne = true,
683       .extendedDynamicState3ProvokingVertexMode = true,
684       .extendedDynamicState3ColorBlendEnable = true,
685       .extendedDynamicState3ColorWriteMask = true,
686       .extendedDynamicState3ColorBlendEquation = true,
687       .extendedDynamicState3SampleLocationsEnable = true,
688       .extendedDynamicState3SampleMask = true,
689       .extendedDynamicState3ConservativeRasterizationMode = true,
690       .extendedDynamicState3AlphaToCoverageEnable = true,
691       .extendedDynamicState3RasterizationSamples = true,
692 
693       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
694       .extendedDynamicState3ViewportWScalingEnable = false,
695       .extendedDynamicState3ViewportSwizzle = false,
696       .extendedDynamicState3ShadingRateImageEnable = false,
697       .extendedDynamicState3CoverageToColorEnable = false,
698       .extendedDynamicState3CoverageToColorLocation = false,
699       .extendedDynamicState3CoverageModulationMode = false,
700       .extendedDynamicState3CoverageModulationTableEnable = false,
701       .extendedDynamicState3CoverageModulationTable = false,
702       .extendedDynamicState3CoverageReductionMode = false,
703       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
704       .extendedDynamicState3ColorBlendAdvanced = false,
705 
706       /* VK_EXT_multi_draw */
707       .multiDraw = true,
708 
709       /* VK_EXT_non_seamless_cube_map */
710       .nonSeamlessCubeMap = true,
711 
712       /* VK_EXT_primitive_topology_list_restart */
713       .primitiveTopologyListRestart = true,
714       .primitiveTopologyPatchListRestart = true,
715 
716       /* VK_EXT_depth_clip_control */
717       .depthClipControl = true,
718 
719       /* VK_KHR_present_id */
720       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
721 
722       /* VK_KHR_present_wait */
723       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
724 
725       /* VK_EXT_vertex_input_dynamic_state */
726       .vertexInputDynamicState = true,
727 
728       /* VK_KHR_ray_tracing_position_fetch */
729       .rayTracingPositionFetch = rt_enabled,
730 
731       /* VK_EXT_dynamic_rendering_unused_attachments */
732       .dynamicRenderingUnusedAttachments = true,
733 
734       /* VK_EXT_depth_bias_control */
735       .depthBiasControl = true,
736       .floatRepresentation = true,
737       .leastRepresentableValueForceUnormRepresentation = false,
738       .depthBiasExact = true,
739 
740       /* VK_EXT_pipeline_robustness */
741       .pipelineRobustness = true,
742 
743       /* VK_KHR_maintenance5 */
744       .maintenance5 = true,
745 
746       /* VK_KHR_maintenance6 */
747       .maintenance6 = true,
748 
749       /* VK_EXT_nested_command_buffer */
750       .nestedCommandBuffer = true,
751       .nestedCommandBufferRendering = true,
752       .nestedCommandBufferSimultaneousUse = false,
753 
754       /* VK_KHR_cooperative_matrix */
755       .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
756 
757       /* VK_KHR_shader_maximal_reconvergence */
758       .shaderMaximalReconvergence = true,
759 
760       /* VK_KHR_shader_subgroup_rotate */
761       .shaderSubgroupRotate = true,
762       .shaderSubgroupRotateClustered = true,
763 
764       /* VK_EXT_attachment_feedback_loop_layout */
765       .attachmentFeedbackLoopLayout = true,
766 
767       /* VK_EXT_attachment_feedback_loop_dynamic_state */
768       .attachmentFeedbackLoopDynamicState = true,
769 
770       /* VK_KHR_shader_expect_assume */
771       .shaderExpectAssume = true,
772 
773       /* VK_EXT_descriptor_buffer */
774       .descriptorBuffer = true,
775       .descriptorBufferCaptureReplay = true,
776       .descriptorBufferImageLayoutIgnored = false,
777       .descriptorBufferPushDescriptors = true,
778 
779       /* VK_EXT_map_memory_placed */
780       .memoryMapPlaced = true,
781       .memoryMapRangePlaced = false,
782       .memoryUnmapReserve = true,
783 
784       /* VK_KHR_shader_quad_control */
785       .shaderQuadControl = true,
786 
787 #ifdef ANV_USE_WSI_PLATFORM
788       /* VK_EXT_swapchain_maintenance1 */
789       .swapchainMaintenance1 = true,
790 #endif
791 
792       /* VK_EXT_image_compression_control */
793       .imageCompressionControl = true,
794 
795       /* VK_KHR_shader_float_controls2 */
796       .shaderFloatControls2 = true,
797 
798       /* VK_EXT_legacy_vertex_attributes */
799       .legacyVertexAttributes = true,
800 
801       /* VK_EXT_legacy_dithering */
802       .legacyDithering = true,
803 
804       /* VK_MESA_image_alignment_control */
805       .imageAlignmentControl = true,
806 
807       /* VK_KHR_maintenance7 */
808       .maintenance7 = true,
809 
810       /* VK_KHR_shader_relaxed_extended_instruction */
811       .shaderRelaxedExtendedInstruction = true,
812    };
813 
814    /* The new DOOM and Wolfenstein games require depthBounds without
815     * checking for it.  They seem to run fine without it so just claim it's
816     * there and accept the consequences.
817     */
818    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
819       features->depthBounds = true;
820 }
821 
822 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
823 
824 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
825 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
826 
827 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)828 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
829 {
830    VkDeviceSize ret = 0;
831 
832    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
833       if (pdevice->memory.heaps[i].size > ret)
834          ret = pdevice->memory.heaps[i].size;
835    }
836 
837    return ret;
838 }
839 
840 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)841 get_properties_1_1(const struct anv_physical_device *pdevice,
842                    struct vk_properties *p)
843 {
844    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
845    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
846    memset(p->deviceLUID, 0, VK_LUID_SIZE);
847    p->deviceNodeMask = 0;
848    p->deviceLUIDValid = false;
849 
850    p->subgroupSize = BRW_SUBGROUP_SIZE;
851    VkShaderStageFlags scalar_stages = 0;
852    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
853       scalar_stages |= mesa_to_vk_shader_stage(stage);
854    }
855    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
856       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
857                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
858                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
859                        VK_SHADER_STAGE_MISS_BIT_KHR |
860                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
861                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
862    }
863    if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
864       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
865                        VK_SHADER_STAGE_MESH_BIT_EXT;
866    }
867    p->subgroupSupportedStages = scalar_stages;
868    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
869                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
870                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
871                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
872                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
873                                     VK_SUBGROUP_FEATURE_QUAD_BIT |
874                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
875                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
876                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
877                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
878    p->subgroupQuadOperationsInAllStages = true;
879 
880    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
881    p->maxMultiviewViewCount      = 16;
882    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
883    /* Our protected implementation is a memory encryption mechanism, it
884     * shouldn't page fault, but it hangs the HW so in terms of user visibility
885     * it's similar to a fault.
886     */
887    p->protectedNoFault           = false;
888    /* This value doesn't matter for us today as our per-stage descriptors are
889     * the real limit.
890     */
891    p->maxPerSetDescriptors       = 1024;
892 
893    for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
894       p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
895                                         pdevice->memory.heaps[i].size);
896    }
897 }
898 
899 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)900 get_properties_1_2(const struct anv_physical_device *pdevice,
901                    struct vk_properties *p)
902 {
903    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
904    memset(p->driverName, 0, sizeof(p->driverName));
905    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
906             "Intel open-source Mesa driver");
907    memset(p->driverInfo, 0, sizeof(p->driverInfo));
908    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
909             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
910 
911    p->conformanceVersion = (VkConformanceVersion) {
912       .major = 1,
913       .minor = 3,
914       .subminor = 6,
915       .patch = 0,
916    };
917 
918    p->denormBehaviorIndependence =
919       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
920    p->roundingModeIndependence =
921       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
922 
923    /* Broadwell does not support HF denorms and there are restrictions
924     * other gens. According to Kabylake's PRM:
925     *
926     * "math - Extended Math Function
927     * [...]
928     * Restriction : Half-float denorms are always retained."
929     */
930    p->shaderDenormFlushToZeroFloat16         = false;
931    p->shaderDenormPreserveFloat16            = true;
932    p->shaderRoundingModeRTEFloat16           = true;
933    p->shaderRoundingModeRTZFloat16           = true;
934    p->shaderSignedZeroInfNanPreserveFloat16  = true;
935 
936    p->shaderDenormFlushToZeroFloat32         = true;
937    p->shaderDenormPreserveFloat32            = true;
938    p->shaderRoundingModeRTEFloat32           = true;
939    p->shaderRoundingModeRTZFloat32           = true;
940    p->shaderSignedZeroInfNanPreserveFloat32  = true;
941 
942    p->shaderDenormFlushToZeroFloat64         = true;
943    p->shaderDenormPreserveFloat64            = true;
944    p->shaderRoundingModeRTEFloat64           = true;
945    p->shaderRoundingModeRTZFloat64           = true;
946    p->shaderSignedZeroInfNanPreserveFloat64  = true;
947 
948    /* It's a bit hard to exactly map our implementation to the limits
949     * described by Vulkan.  The bindless surface handle in the extended
950     * message descriptors is 20 bits and it's an index into the table of
951     * RENDER_SURFACE_STATE structs that starts at bindless surface base
952     * address.  This means that we can have at must 1M surface states
953     * allocated at any given time.  Since most image views take two
954     * descriptors, this means we have a limit of about 500K image views.
955     *
956     * However, since we allocate surface states at vkCreateImageView time,
957     * this means our limit is actually something on the order of 500K image
958     * views allocated at any time.  The actual limit describe by Vulkan, on
959     * the other hand, is a limit of how many you can have in a descriptor set.
960     * Assuming anyone using 1M descriptors will be using the same image view
961     * twice a bunch of times (or a bunch of null descriptors), we can safely
962     * advertise a larger limit here.
963     */
964    const unsigned max_bindless_views =
965       anv_physical_device_bindless_heap_size(pdevice, false) / ANV_SURFACE_STATE_SIZE;
966    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
967    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
968    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
969    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
970    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
971    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
972    p->robustBufferAccessUpdateAfterBind                  = true;
973    p->quadDivergentImplicitLod                           = false;
974    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
975    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
976    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
977    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
978    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
979    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
980    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
981    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
982    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
983    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
984    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
985    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
986    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
987    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
988    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
989 
990    /* We support all of the depth resolve modes */
991    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
992                                       VK_RESOLVE_MODE_AVERAGE_BIT |
993                                       VK_RESOLVE_MODE_MIN_BIT |
994                                       VK_RESOLVE_MODE_MAX_BIT;
995    /* Average doesn't make sense for stencil so we don't support that */
996    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
997                                       VK_RESOLVE_MODE_MIN_BIT |
998                                       VK_RESOLVE_MODE_MAX_BIT;
999    p->independentResolveNone  = true;
1000    p->independentResolve      = true;
1001 
1002    p->filterMinmaxSingleComponentFormats  = true;
1003    p->filterMinmaxImageComponentMapping   = true;
1004 
1005    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1006 
1007    p->framebufferIntegerColorSampleCounts =
1008       isl_device_get_sample_counts(&pdevice->isl_dev);
1009 }
1010 
1011 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1012 get_properties_1_3(const struct anv_physical_device *pdevice,
1013                    struct vk_properties *p)
1014 {
1015    if (pdevice->info.ver >= 20)
1016       p->minSubgroupSize = 16;
1017    else
1018       p->minSubgroupSize = 8;
1019    p->maxSubgroupSize = 32;
1020    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1021    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1022                                    VK_SHADER_STAGE_TASK_BIT_EXT |
1023                                    VK_SHADER_STAGE_MESH_BIT_EXT;
1024 
1025    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1026    p->maxPerStageDescriptorInlineUniformBlocks =
1027       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1028    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1029       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1030    p->maxDescriptorSetInlineUniformBlocks =
1031       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1032    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1033       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1034    p->maxInlineUniformTotalSize = UINT16_MAX;
1035 
1036    p->integerDotProduct8BitUnsignedAccelerated = false;
1037    p->integerDotProduct8BitSignedAccelerated = false;
1038    p->integerDotProduct8BitMixedSignednessAccelerated = false;
1039    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1040    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1041    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1042    p->integerDotProduct16BitUnsignedAccelerated = false;
1043    p->integerDotProduct16BitSignedAccelerated = false;
1044    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1045    p->integerDotProduct32BitUnsignedAccelerated = false;
1046    p->integerDotProduct32BitSignedAccelerated = false;
1047    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1048    p->integerDotProduct64BitUnsignedAccelerated = false;
1049    p->integerDotProduct64BitSignedAccelerated = false;
1050    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1051    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1052    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1053    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1054    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1055    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1056    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1057    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1058    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1059    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1060    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1061    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1062    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1063    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1064    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1065    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1066 
1067    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1068     * Base Address:
1069     *
1070     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1071     *    specifies the base address of the first element of the surface,
1072     *    computed in software by adding the surface base address to the
1073     *    byte offset of the element in the buffer. The base address must
1074     *    be aligned to element size."
1075     *
1076     * The typed dataport messages require that things be texel aligned.
1077     * Otherwise, we may just load/store the wrong data or, in the worst
1078     * case, there may be hangs.
1079     */
1080    p->storageTexelBufferOffsetAlignmentBytes = 16;
1081    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1082 
1083    /* The sampler, however, is much more forgiving and it can handle
1084     * arbitrary byte alignment for linear and buffer surfaces.  It's
1085     * hard to find a good PRM citation for this but years of empirical
1086     * experience demonstrate that this is true.
1087     */
1088    p->uniformTexelBufferOffsetAlignmentBytes = 1;
1089    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1090 
1091    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1092 }
1093 
1094 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1095 get_properties(const struct anv_physical_device *pdevice,
1096                struct vk_properties *props)
1097 {
1098 
1099       const struct intel_device_info *devinfo = &pdevice->info;
1100 
1101    const uint32_t max_ssbos = UINT16_MAX;
1102    const uint32_t max_textures = UINT16_MAX;
1103    const uint32_t max_samplers = UINT16_MAX;
1104    const uint32_t max_images = UINT16_MAX;
1105    const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1106 
1107    /* Claim a high per-stage limit since we have bindless. */
1108    const uint32_t max_per_stage = UINT32_MAX;
1109 
1110    const uint32_t max_workgroup_size =
1111       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1112 
1113    const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1114    const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1115 
1116    uint64_t sparse_addr_space_size =
1117       !has_sparse_or_fake ? 0 :
1118       sparse_uses_trtt ? pdevice->va.trtt.size :
1119       pdevice->va.high_heap.size;
1120 
1121    VkSampleCountFlags sample_counts =
1122       isl_device_get_sample_counts(&pdevice->isl_dev);
1123 
1124 #if DETECT_OS_ANDROID
1125    /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1126    uint64_t front_rendering_usage = 0;
1127    struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1128    if (gralloc != NULL) {
1129       u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1130       u_gralloc_destroy(&gralloc);
1131    }
1132 #endif /* DETECT_OS_ANDROID */
1133 
1134    *props = (struct vk_properties) {
1135       .apiVersion = ANV_API_VERSION,
1136       .driverVersion = vk_get_driver_version(),
1137       .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1138                   pdevice->instance->force_vk_vendor : 0x8086,
1139       .deviceID = pdevice->info.pci_device_id,
1140       .deviceType = pdevice->info.has_local_mem ?
1141                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1142                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1143 
1144       /* Limits: */
1145       .maxImageDimension1D                      = (1 << 14),
1146       .maxImageDimension2D                      = (1 << 14),
1147       .maxImageDimension3D                      = (1 << 11),
1148       .maxImageDimensionCube                    = (1 << 14),
1149       .maxImageArrayLayers                      = (1 << 11),
1150       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1151       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1152       .maxStorageBufferRange                    = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1153       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1154       .maxMemoryAllocationCount                 = UINT32_MAX,
1155       .maxSamplerAllocationCount                = 64 * 1024,
1156       .bufferImageGranularity                   = 1,
1157       .sparseAddressSpaceSize                   = sparse_addr_space_size,
1158       .maxBoundDescriptorSets                   = MAX_SETS,
1159       .maxPerStageDescriptorSamplers            = max_samplers,
1160       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1161       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1162       .maxPerStageDescriptorSampledImages       = max_textures,
1163       .maxPerStageDescriptorStorageImages       = max_images,
1164       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1165       .maxPerStageResources                     = max_per_stage,
1166       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1167       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1168       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1169       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1170       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1171       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1172       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1173       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1174       .maxVertexInputAttributes                 = MAX_VES,
1175       .maxVertexInputBindings                   = MAX_VBS,
1176       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1177        *
1178        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1179        */
1180       .maxVertexInputAttributeOffset            = 2047,
1181       /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1182        *
1183        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1184        */
1185       .maxVertexInputBindingStride              = 4095,
1186       .maxVertexOutputComponents                = 128,
1187       .maxTessellationGenerationLevel           = 64,
1188       .maxTessellationPatchSize                 = 32,
1189       .maxTessellationControlPerVertexInputComponents = 128,
1190       .maxTessellationControlPerVertexOutputComponents = 128,
1191       .maxTessellationControlPerPatchOutputComponents = 128,
1192       .maxTessellationControlTotalOutputComponents = 2048,
1193       .maxTessellationEvaluationInputComponents = 128,
1194       .maxTessellationEvaluationOutputComponents = 128,
1195       .maxGeometryShaderInvocations             = 32,
1196       .maxGeometryInputComponents               = 128,
1197       .maxGeometryOutputComponents              = 128,
1198       .maxGeometryOutputVertices                = 256,
1199       .maxGeometryTotalOutputComponents         = 1024,
1200       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1201       .maxFragmentOutputAttachments             = 8,
1202       .maxFragmentDualSrcAttachments            = 1,
1203       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1204       .maxComputeSharedMemorySize               = intel_device_info_get_max_slm_size(&pdevice->info),
1205       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1206       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1207       .maxComputeWorkGroupSize = {
1208          max_workgroup_size,
1209          max_workgroup_size,
1210          max_workgroup_size,
1211       },
1212       .subPixelPrecisionBits                    = 8,
1213       .subTexelPrecisionBits                    = 8,
1214       .mipmapPrecisionBits                      = 8,
1215       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1216       .maxDrawIndirectCount                     = UINT32_MAX,
1217       .maxSamplerLodBias                        = 16,
1218       .maxSamplerAnisotropy                     = 16,
1219       .maxViewports                             = MAX_VIEWPORTS,
1220       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1221       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1222       .viewportSubPixelBits                     = 13, /* We take a float? */
1223       .minMemoryMapAlignment                    = 4096, /* A page */
1224       /* The dataport requires texel alignment so we need to assume a worst
1225        * case of R32G32B32A32 which is 16 bytes.
1226        */
1227       .minTexelBufferOffsetAlignment            = 16,
1228       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1229       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1230       .minTexelOffset                           = -8,
1231       .maxTexelOffset                           = 7,
1232       .minTexelGatherOffset                     = -32,
1233       .maxTexelGatherOffset                     = 31,
1234       .minInterpolationOffset                   = -0.5,
1235       .maxInterpolationOffset                   = 0.4375,
1236       .subPixelInterpolationOffsetBits          = 4,
1237       .maxFramebufferWidth                      = (1 << 14),
1238       .maxFramebufferHeight                     = (1 << 14),
1239       .maxFramebufferLayers                     = (1 << 11),
1240       .framebufferColorSampleCounts             = sample_counts,
1241       .framebufferDepthSampleCounts             = sample_counts,
1242       .framebufferStencilSampleCounts           = sample_counts,
1243       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1244       .maxColorAttachments                      = MAX_RTS,
1245       .sampledImageColorSampleCounts            = sample_counts,
1246       .sampledImageIntegerSampleCounts          = sample_counts,
1247       .sampledImageDepthSampleCounts            = sample_counts,
1248       .sampledImageStencilSampleCounts          = sample_counts,
1249       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1250       .maxSampleMaskWords                       = 1,
1251       .timestampComputeAndGraphics              = true,
1252       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1253       .maxClipDistances                         = 8,
1254       .maxCullDistances                         = 8,
1255       .maxCombinedClipAndCullDistances          = 8,
1256       .discreteQueuePriorities                  = 2,
1257       .pointSizeRange                           = { 0.125, 255.875 },
1258       /* While SKL and up support much wider lines than we are setting here,
1259        * in practice we run into conformance issues if we go past this limit.
1260        * Since the Windows driver does the same, it's probably fair to assume
1261        * that no one needs more than this.
1262        */
1263       .lineWidthRange                           = { 0.0, 8.0 },
1264       .pointSizeGranularity                     = (1.0 / 8.0),
1265       .lineWidthGranularity                     = (1.0 / 128.0),
1266       .strictLines                              = false,
1267       .standardSampleLocations                  = true,
1268       .optimalBufferCopyOffsetAlignment         = 128,
1269       .optimalBufferCopyRowPitchAlignment       = 128,
1270       .nonCoherentAtomSize                      = 64,
1271 
1272       /* Sparse: */
1273       .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1274       .sparseResidencyStandard2DMultisampleBlockShape = false,
1275       .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1276       .sparseResidencyAlignedMipSize = false,
1277       .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1278 
1279       /* VK_KHR_cooperative_matrix */
1280       .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1281    };
1282 
1283    snprintf(props->deviceName, sizeof(props->deviceName),
1284             "%s", pdevice->info.name);
1285    memcpy(props->pipelineCacheUUID,
1286           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1287 
1288    get_properties_1_1(pdevice, props);
1289    get_properties_1_2(pdevice, props);
1290    get_properties_1_3(pdevice, props);
1291 
1292    /* VK_KHR_acceleration_structure */
1293    {
1294       props->maxGeometryCount = (1u << 24) - 1;
1295       props->maxInstanceCount = (1u << 24) - 1;
1296       props->maxPrimitiveCount = (1u << 29) - 1;
1297       props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1298       props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1299       props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1300       props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1301       props->minAccelerationStructureScratchOffsetAlignment = 64;
1302    }
1303 
1304    /* VK_KHR_compute_shader_derivatives */
1305    {
1306       props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1307    }
1308 
1309    /* VK_KHR_fragment_shading_rate */
1310    {
1311       props->primitiveFragmentShadingRateWithMultipleViewports =
1312          pdevice->info.has_coarse_pixel_primitive_and_cb;
1313       props->layeredShadingRateAttachments =
1314       pdevice->info.has_coarse_pixel_primitive_and_cb;
1315       props->fragmentShadingRateNonTrivialCombinerOps =
1316          pdevice->info.has_coarse_pixel_primitive_and_cb;
1317       props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1318       props->maxFragmentSizeAspectRatio =
1319          pdevice->info.has_coarse_pixel_primitive_and_cb ?
1320          2 : 4;
1321       props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1322          (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1323       props->maxFragmentShadingRateRasterizationSamples =
1324       pdevice->info.has_coarse_pixel_primitive_and_cb ?
1325          VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
1326       props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1327       props->fragmentShadingRateWithSampleMask = true;
1328       props->fragmentShadingRateWithShaderSampleMask = false;
1329       props->fragmentShadingRateWithConservativeRasterization = true;
1330       props->fragmentShadingRateWithFragmentShaderInterlock = true;
1331       props->fragmentShadingRateWithCustomSampleLocations = true;
1332       props->fragmentShadingRateStrictMultiplyCombiner = true;
1333 
1334       if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1335          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1336          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1337          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1338       } else {
1339          /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1340          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1341          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1342          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1343       }
1344    }
1345 
1346    /* VK_KHR_maintenance5 */
1347    {
1348       props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1349       props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1350       props->depthStencilSwizzleOneSupport = true;
1351       props->polygonModePointSize = true;
1352       props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1353       props->nonStrictWideLinesUseParallelogram = false;
1354    }
1355 
1356    /* VK_KHR_maintenance6 */
1357    {
1358       props->blockTexelViewCompatibleMultipleLayers = true;
1359       props->maxCombinedImageSamplerDescriptorCount = 3;
1360       props->fragmentShadingRateClampCombinerInputs = true;
1361    }
1362 
1363    /* VK_KHR_maintenance7 */
1364    {
1365       props->robustFragmentShadingRateAttachmentAccess = true;
1366       props->separateDepthStencilAttachmentAccess = true;
1367       props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1368       props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1369       props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1370       props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1371       props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1372       props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1373    }
1374 
1375    /* VK_KHR_performance_query */
1376    {
1377       props->allowCommandBufferQueryCopies = false;
1378    }
1379 
1380    /* VK_KHR_push_descriptor */
1381    {
1382       props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1383    }
1384 
1385    /* VK_KHR_ray_tracing_pipeline */
1386    {
1387       /* TODO */
1388       props->shaderGroupHandleSize = 32;
1389       props->maxRayRecursionDepth = 31;
1390       /* MemRay::hitGroupSRStride is 16 bits */
1391       props->maxShaderGroupStride = UINT16_MAX;
1392       /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1393       props->shaderGroupBaseAlignment = 16;
1394       props->shaderGroupHandleAlignment = 16;
1395       props->shaderGroupHandleCaptureReplaySize = 32;
1396       props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1397       props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1398    }
1399 
1400    /* VK_KHR_vertex_attribute_divisor */
1401    {
1402       props->maxVertexAttribDivisor = UINT32_MAX / 16;
1403       props->supportsNonZeroFirstInstance = true;
1404    }
1405 
1406    /* VK_EXT_conservative_rasterization */
1407    {
1408       /* There's nothing in the public docs about this value as far as I can
1409        * tell. However, this is the value the Windows driver reports and
1410        * there's a comment on a rejected HW feature in the internal docs that
1411        * says:
1412        *
1413        *    "This is similar to conservative rasterization, except the
1414        *    primitive area is not extended by 1/512 and..."
1415        *
1416        * That's a bit of an obtuse reference but it's the best we've got for
1417        * now.
1418        */
1419       props->primitiveOverestimationSize = 1.0f / 512.0f;
1420       props->maxExtraPrimitiveOverestimationSize = 0.0f;
1421       props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1422       props->primitiveUnderestimation = false;
1423       props->conservativePointAndLineRasterization = false;
1424       props->degenerateTrianglesRasterized = true;
1425       props->degenerateLinesRasterized = false;
1426       props->fullyCoveredFragmentShaderInputVariable = false;
1427       props->conservativeRasterizationPostDepthCoverage = true;
1428    }
1429 
1430    /* VK_EXT_custom_border_color */
1431    {
1432       props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1433    }
1434 
1435    /* VK_EXT_descriptor_buffer */
1436    {
1437       props->combinedImageSamplerDescriptorSingleArray = true;
1438       props->bufferlessPushDescriptors = true;
1439       /* Written to the buffer before a timeline semaphore is signaled, but
1440        * after vkQueueSubmit().
1441        */
1442       props->allowSamplerImageViewPostSubmitCreation = true;
1443       props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1444 
1445       if (pdevice->uses_ex_bso) {
1446          props->maxDescriptorBufferBindings = MAX_SETS;
1447          props->maxResourceDescriptorBufferBindings = MAX_SETS;
1448          props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1449          props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1450       } else {
1451          props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1452          props->maxResourceDescriptorBufferBindings = 1;
1453          props->maxSamplerDescriptorBufferBindings = 1;
1454          props->maxEmbeddedImmutableSamplerBindings = 1;
1455       }
1456       props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1457 
1458       /* Storing a 64bit address */
1459       props->bufferCaptureReplayDescriptorDataSize = 8;
1460       props->imageCaptureReplayDescriptorDataSize = 8;
1461       /* Offset inside the reserved border color pool */
1462       props->samplerCaptureReplayDescriptorDataSize = 4;
1463 
1464       /* Not affected by replay */
1465       props->imageViewCaptureReplayDescriptorDataSize = 0;
1466       /* The acceleration structure virtual address backing is coming from a
1467        * buffer, so as long as that buffer is captured/replayed correctly we
1468        * should always get the same address.
1469        */
1470       props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1471 
1472       props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1473       props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1474                                                         ANV_SURFACE_STATE_SIZE);
1475       props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1476       props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1477       props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1478       props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1479       props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1480       props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1481       props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1482       props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1483       props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1484       props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1485       props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1486       props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1487       props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1488       props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1489                                                                                        true);
1490       props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1491       props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1492       props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1493    }
1494 
1495    /* VK_EXT_extended_dynamic_state3 */
1496    {
1497       props->dynamicPrimitiveTopologyUnrestricted = true;
1498    }
1499 
1500    /* VK_EXT_external_memory_host */
1501    {
1502       props->minImportedHostPointerAlignment = 4096;
1503    }
1504 
1505    /* VK_EXT_graphics_pipeline_library */
1506    {
1507       props->graphicsPipelineLibraryFastLinking = true;
1508       props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1509    }
1510 
1511    /* VK_EXT_legacy_vertex_attributes */
1512    {
1513       props->nativeUnalignedPerformance = true;
1514    }
1515 
1516    /* VK_EXT_line_rasterization */
1517    {
1518       /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1519        * Rules - Legacy Mode", it says the following:
1520        *
1521        *    "Note that the device divides a pixel into a 16x16 array of
1522        *     subpixels, referenced by their upper left corners."
1523        *
1524        * This is the only known reference in the PRMs to the subpixel
1525        * precision of line rasterization and a "16x16 array of subpixels"
1526        * implies 4 subpixel precision bits. Empirical testing has shown that 4
1527        * subpixel precision bits applies to all line rasterization types.
1528        */
1529       props->lineSubPixelPrecisionBits = 4;
1530    }
1531 
1532    /* VK_EXT_map_memory_placed */
1533    {
1534       props->minPlacedMemoryMapAlignment = 4096;
1535    }
1536 
1537    /* VK_EXT_mesh_shader */
1538    {
1539       /* Bounded by the maximum representable size in
1540        * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
1541        */
1542       const uint32_t max_slm_size = 64 * 1024;
1543 
1544       /* Bounded by the maximum representable size in
1545        * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
1546        */
1547       const uint32_t max_workgroup_size = 1 << 10;
1548 
1549       /* 3DMESH_3D limitation. */
1550       const uint32_t max_threadgroup_count = 1 << 22;
1551 
1552       /* 3DMESH_3D limitation. */
1553       const uint32_t max_threadgroup_xyz = 65535;
1554 
1555       const uint32_t max_urb_size = 64 * 1024;
1556 
1557       props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1558       props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1559       props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1560       props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1561 
1562       props->maxTaskWorkGroupInvocations = max_workgroup_size;
1563       props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1564       props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1565       props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1566 
1567       /* TUE header with padding */
1568       const uint32_t task_payload_reserved = 32;
1569 
1570       props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1571       props->maxTaskSharedMemorySize = max_slm_size;
1572       props->maxTaskPayloadAndSharedMemorySize =
1573          props->maxTaskPayloadSize +
1574          props->maxTaskSharedMemorySize;
1575 
1576       props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1577       props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1578       props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1579       props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1580 
1581       props->maxMeshWorkGroupInvocations = max_workgroup_size;
1582       props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1583       props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1584       props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1585 
1586       props->maxMeshSharedMemorySize = max_slm_size;
1587       props->maxMeshPayloadAndSharedMemorySize =
1588          props->maxTaskPayloadSize +
1589          props->maxMeshSharedMemorySize;
1590 
1591       /* Unfortunately spec's formula for the max output size doesn't match our hardware
1592        * (because some per-primitive and per-vertex attributes have alignment restrictions),
1593        * so we have to advertise the minimum value mandated by the spec to not overflow it.
1594        */
1595       props->maxMeshOutputPrimitives = 256;
1596       props->maxMeshOutputVertices = 256;
1597 
1598       /* NumPrim + Primitive Data List */
1599       const uint32_t max_indices_memory =
1600          ALIGN(sizeof(uint32_t) +
1601                sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1602 
1603       props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1604 
1605       props->maxMeshPayloadAndOutputMemorySize =
1606          props->maxTaskPayloadSize +
1607          props->maxMeshOutputMemorySize;
1608 
1609       props->maxMeshOutputComponents = 128;
1610 
1611       /* RTAIndex is 11-bits wide */
1612       props->maxMeshOutputLayers = 1 << 11;
1613 
1614       props->maxMeshMultiviewViewCount = 1;
1615 
1616       /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1617       props->meshOutputPerVertexGranularity = 8;
1618       /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1619       props->meshOutputPerPrimitiveGranularity = 8;
1620 
1621       /* SIMD16 */
1622       props->maxPreferredTaskWorkGroupInvocations = 16;
1623       props->maxPreferredMeshWorkGroupInvocations = 16;
1624 
1625       props->prefersLocalInvocationVertexOutput = false;
1626       props->prefersLocalInvocationPrimitiveOutput = false;
1627       props->prefersCompactVertexOutput = false;
1628       props->prefersCompactPrimitiveOutput = false;
1629 
1630       /* Spec minimum values */
1631       assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1632       assert(props->maxTaskWorkGroupCount[0] >= 65535);
1633       assert(props->maxTaskWorkGroupCount[1] >= 65535);
1634       assert(props->maxTaskWorkGroupCount[2] >= 65535);
1635 
1636       assert(props->maxTaskWorkGroupInvocations >= 128);
1637       assert(props->maxTaskWorkGroupSize[0] >= 128);
1638       assert(props->maxTaskWorkGroupSize[1] >= 128);
1639       assert(props->maxTaskWorkGroupSize[2] >= 128);
1640 
1641       assert(props->maxTaskPayloadSize >= 16384);
1642       assert(props->maxTaskSharedMemorySize >= 32768);
1643       assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1644 
1645 
1646       assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1647       assert(props->maxMeshWorkGroupCount[0] >= 65535);
1648       assert(props->maxMeshWorkGroupCount[1] >= 65535);
1649       assert(props->maxMeshWorkGroupCount[2] >= 65535);
1650 
1651       assert(props->maxMeshWorkGroupInvocations >= 128);
1652       assert(props->maxMeshWorkGroupSize[0] >= 128);
1653       assert(props->maxMeshWorkGroupSize[1] >= 128);
1654       assert(props->maxMeshWorkGroupSize[2] >= 128);
1655 
1656       assert(props->maxMeshSharedMemorySize >= 28672);
1657       assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1658       assert(props->maxMeshOutputMemorySize >= 32768);
1659       assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1660 
1661       assert(props->maxMeshOutputComponents >= 128);
1662 
1663       assert(props->maxMeshOutputVertices >= 256);
1664       assert(props->maxMeshOutputPrimitives >= 256);
1665       assert(props->maxMeshOutputLayers >= 8);
1666       assert(props->maxMeshMultiviewViewCount >= 1);
1667    }
1668 
1669    /* VK_EXT_multi_draw */
1670    {
1671       props->maxMultiDrawCount = 2048;
1672    }
1673 
1674    /* VK_EXT_nested_command_buffer */
1675    {
1676       props->maxCommandBufferNestingLevel = UINT32_MAX;
1677    }
1678 
1679    /* VK_EXT_pci_bus_info */
1680    {
1681       props->pciDomain = pdevice->info.pci_domain;
1682       props->pciBus = pdevice->info.pci_bus;
1683       props->pciDevice = pdevice->info.pci_dev;
1684       props->pciFunction = pdevice->info.pci_func;
1685    }
1686 
1687    /* VK_EXT_physical_device_drm */
1688    {
1689       props->drmHasPrimary = pdevice->has_master;
1690       props->drmPrimaryMajor = pdevice->master_major;
1691       props->drmPrimaryMinor = pdevice->master_minor;
1692       props->drmHasRender = pdevice->has_local;
1693       props->drmRenderMajor = pdevice->local_major;
1694       props->drmRenderMinor = pdevice->local_minor;
1695    }
1696 
1697    /* VK_EXT_pipeline_robustness */
1698    {
1699       props->defaultRobustnessStorageBuffers =
1700          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1701       props->defaultRobustnessUniformBuffers =
1702          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1703       props->defaultRobustnessVertexInputs =
1704          VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1705       props->defaultRobustnessImages =
1706          VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1707    }
1708 
1709    /* VK_EXT_provoking_vertex */
1710    {
1711       props->provokingVertexModePerPipeline = true;
1712       props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1713    }
1714 
1715    /* VK_EXT_robustness2 */
1716    {
1717       props->robustStorageBufferAccessSizeAlignment =
1718          ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1719       props->robustUniformBufferAccessSizeAlignment =
1720          ANV_UBO_ALIGNMENT;
1721    }
1722 
1723    /* VK_EXT_sample_locations */
1724    {
1725       props->sampleLocationSampleCounts =
1726          isl_device_get_sample_counts(&pdevice->isl_dev);
1727 
1728       /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1729       props->maxSampleLocationGridSize.width = 1;
1730       props->maxSampleLocationGridSize.height = 1;
1731 
1732       props->sampleLocationCoordinateRange[0] = 0;
1733       props->sampleLocationCoordinateRange[1] = 0.9375;
1734       props->sampleLocationSubPixelBits = 4;
1735 
1736       props->variableSampleLocations = true;
1737    }
1738 
1739    /* VK_EXT_shader_module_identifier */
1740    {
1741       STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1742                     sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1743       memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1744              vk_shaderModuleIdentifierAlgorithmUUID,
1745              sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1746    }
1747 
1748    /* VK_EXT_transform_feedback */
1749    {
1750       props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1751       props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1752       props->maxTransformFeedbackBufferSize = (1ull << 32);
1753       props->maxTransformFeedbackStreamDataSize = 128 * 4;
1754       props->maxTransformFeedbackBufferDataSize = 128 * 4;
1755       props->maxTransformFeedbackBufferDataStride = 2048;
1756       props->transformFeedbackQueries = true;
1757       props->transformFeedbackStreamsLinesTriangles = false;
1758       props->transformFeedbackRasterizationStreamSelect = false;
1759       props->transformFeedbackDraw = true;
1760    }
1761 
1762    /* VK_ANDROID_native_buffer */
1763 #if DETECT_OS_ANDROID
1764    {
1765       props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1766    }
1767 #endif /* DETECT_OS_ANDROID */
1768 
1769 
1770    /* VK_MESA_image_alignment_control */
1771    {
1772       /* We support 4k/64k tiling alignments on most platforms */
1773       props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1774    }
1775 }
1776 
1777 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1778 anv_init_meminfo(struct anv_physical_device *device, int fd)
1779 {
1780    const struct intel_device_info *devinfo = &device->info;
1781 
1782    device->sys.region = &devinfo->mem.sram.mem;
1783    device->sys.size = devinfo->mem.sram.mappable.size;
1784    device->sys.available = devinfo->mem.sram.mappable.free;
1785 
1786    device->vram_mappable.region = &devinfo->mem.vram.mem;
1787    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1788    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1789 
1790    device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1791    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1792    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1793 
1794    return VK_SUCCESS;
1795 }
1796 
1797 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1798 anv_update_meminfo(struct anv_physical_device *device, int fd)
1799 {
1800    if (!intel_device_info_update_memory_info(&device->info, fd))
1801       return;
1802 
1803    const struct intel_device_info *devinfo = &device->info;
1804    device->sys.available = devinfo->mem.sram.mappable.free;
1805    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1806    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1807 }
1808 
1809 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1810 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1811 {
1812    VkResult result = anv_init_meminfo(device, fd);
1813    if (result != VK_SUCCESS)
1814       return result;
1815 
1816    assert(device->sys.size != 0);
1817 
1818    if (anv_physical_device_has_vram(device)) {
1819       /* We can create 2 or 3 different heaps when we have local memory
1820        * support, first heap with local memory size and second with system
1821        * memory size and the third is added only if part of the vram is
1822        * mappable to the host.
1823        */
1824       device->memory.heap_count = 2;
1825       device->memory.heaps[0] = (struct anv_memory_heap) {
1826          /* If there is a vram_non_mappable, use that for the device only
1827           * heap. Otherwise use the vram_mappable.
1828           */
1829          .size = device->vram_non_mappable.size != 0 ?
1830                  device->vram_non_mappable.size : device->vram_mappable.size,
1831          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1832          .is_local_mem = true,
1833       };
1834       device->memory.heaps[1] = (struct anv_memory_heap) {
1835          .size = device->sys.size,
1836          .flags = 0,
1837          .is_local_mem = false,
1838       };
1839       /* Add an additional smaller vram mappable heap if we can't map all the
1840        * vram to the host.
1841        */
1842       if (device->vram_non_mappable.size > 0) {
1843          device->memory.heap_count++;
1844          device->memory.heaps[2] = (struct anv_memory_heap) {
1845             .size = device->vram_mappable.size,
1846             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1847             .is_local_mem = true,
1848          };
1849       }
1850    } else {
1851       device->memory.heap_count = 1;
1852       device->memory.heaps[0] = (struct anv_memory_heap) {
1853          .size = device->sys.size,
1854          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1855          .is_local_mem = false,
1856       };
1857    }
1858 
1859    switch (device->info.kmd_type) {
1860    case INTEL_KMD_TYPE_XE:
1861       result = anv_xe_physical_device_init_memory_types(device);
1862       break;
1863    case INTEL_KMD_TYPE_I915:
1864    default:
1865       result = anv_i915_physical_device_init_memory_types(device);
1866       break;
1867    }
1868 
1869    assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1870 
1871    if (result != VK_SUCCESS)
1872       return result;
1873 
1874    /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
1875     * find memory types both with and without
1876     * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
1877     * memory types just to make these games happy.
1878     * This behavior is not spec-compliant as we still only have one heap that
1879     * is now inconsistent with some of the memory types, but the game doesn't
1880     * seem to care about it.
1881     */
1882    if (device->instance->anv_fake_nonlocal_memory &&
1883        !anv_physical_device_has_vram(device)) {
1884       const uint32_t base_types_count = device->memory.type_count;
1885       for (int i = 0; i < base_types_count; i++) {
1886          if (!(device->memory.types[i].propertyFlags &
1887                VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
1888             continue;
1889 
1890          assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1891          struct anv_memory_type *new_type =
1892             &device->memory.types[device->memory.type_count++];
1893          *new_type = device->memory.types[i];
1894 
1895          device->memory.types[i].propertyFlags &=
1896             ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1897       }
1898    }
1899 
1900    /* Replicate all non protected memory types for descriptor buffers because
1901     * we want to identify memory allocations to place them in the right memory
1902     * heap.
1903     */
1904    device->memory.default_buffer_mem_types =
1905       BITFIELD_RANGE(0, device->memory.type_count);
1906    device->memory.protected_mem_types = 0;
1907    device->memory.dynamic_visible_mem_types = 0;
1908    device->memory.compressed_mem_types = 0;
1909 
1910    const uint32_t base_types_count = device->memory.type_count;
1911    for (int i = 0; i < base_types_count; i++) {
1912       bool skip = false;
1913 
1914       if (device->memory.types[i].propertyFlags &
1915           VK_MEMORY_PROPERTY_PROTECTED_BIT) {
1916          device->memory.protected_mem_types |= BITFIELD_BIT(i);
1917          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
1918          skip = true;
1919       }
1920 
1921       if (device->memory.types[i].compressed) {
1922          device->memory.compressed_mem_types |= BITFIELD_BIT(i);
1923          device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
1924          skip = true;
1925       }
1926 
1927       if (skip)
1928          continue;
1929 
1930       device->memory.dynamic_visible_mem_types |=
1931          BITFIELD_BIT(device->memory.type_count);
1932 
1933       assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1934       struct anv_memory_type *new_type =
1935          &device->memory.types[device->memory.type_count++];
1936       *new_type = device->memory.types[i];
1937       new_type->dynamic_visible = true;
1938    }
1939 
1940    assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
1941 
1942    for (unsigned i = 0; i < device->memory.type_count; i++) {
1943       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1944       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1945           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1946 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1947          device->memory.need_flush = true;
1948 #else
1949          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1950                           "Memory configuration requires flushing, but it's not implemented for this architecture");
1951 #endif
1952    }
1953 
1954    return VK_SUCCESS;
1955 }
1956 
1957 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1958 anv_physical_device_init_uuids(struct anv_physical_device *device)
1959 {
1960    const struct build_id_note *note =
1961       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1962    if (!note) {
1963       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1964                        "Failed to find build-id");
1965    }
1966 
1967    unsigned build_id_len = build_id_length(note);
1968    if (build_id_len < 20) {
1969       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1970                        "build-id too short.  It needs to be a SHA");
1971    }
1972 
1973    memcpy(device->driver_build_sha1, build_id_data(note), 20);
1974 
1975    struct mesa_sha1 sha1_ctx;
1976    uint8_t sha1[20];
1977    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1978 
1979    /* The pipeline cache UUID is used for determining when a pipeline cache is
1980     * invalid.  It needs both a driver build and the PCI ID of the device.
1981     */
1982    _mesa_sha1_init(&sha1_ctx);
1983    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1984    brw_device_sha1_update(&sha1_ctx, &device->info);
1985    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1986                      sizeof(device->always_use_bindless));
1987    _mesa_sha1_final(&sha1_ctx, sha1);
1988    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1989 
1990    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1991    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1992 
1993    return VK_SUCCESS;
1994 }
1995 
1996 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1997 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1998 {
1999 #ifdef ENABLE_SHADER_CACHE
2000    char renderer[10];
2001    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2002                                device->info.pci_device_id);
2003    assert(len == sizeof(renderer) - 2);
2004 
2005    char timestamp[41];
2006    _mesa_sha1_format(timestamp, device->driver_build_sha1);
2007 
2008    const uint64_t driver_flags =
2009       brw_get_compiler_config_value(device->compiler);
2010    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2011 #endif
2012 }
2013 
2014 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2015 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2016 {
2017 #ifdef ENABLE_SHADER_CACHE
2018    if (device->vk.disk_cache) {
2019       disk_cache_destroy(device->vk.disk_cache);
2020       device->vk.disk_cache = NULL;
2021    }
2022 #else
2023    assert(device->vk.disk_cache == NULL);
2024 #endif
2025 }
2026 
2027 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2028  * queue overrides.
2029  *
2030  * To override the number queues:
2031  *  * "gc" is for graphics queues with compute support
2032  *  * "g" is for graphics queues with no compute support
2033  *  * "c" is for compute queues with no graphics support
2034  *  * "v" is for video queues with no graphics support
2035  *  * "b" is for copy (blitter) queues with no graphics support
2036  *
2037  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2038  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2039  * with compute-only support.
2040  *
2041  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2042  * include 1 queue with compute-only support, but it will not change the
2043  * number of graphics+compute queues.
2044  *
2045  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2046  * to include 1 queue with compute-only support, and it would override the
2047  * number of graphics+compute queues to be 0.
2048  */
2049 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2050 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2051 {
2052    int gc_override = -1;
2053    int g_override = -1;
2054    int c_override = -1;
2055    int v_override = -1;
2056    int blit_override = -1;
2057    const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2058 
2059    if (env_ == NULL)
2060       return;
2061 
2062    char *env = strdup(env_);
2063    char *save = NULL;
2064    char *next = strtok_r(env, ",", &save);
2065    while (next != NULL) {
2066       if (strncmp(next, "gc=", 3) == 0) {
2067          gc_override = strtol(next + 3, NULL, 0);
2068       } else if (strncmp(next, "g=", 2) == 0) {
2069          g_override = strtol(next + 2, NULL, 0);
2070       } else if (strncmp(next, "c=", 2) == 0) {
2071          c_override = strtol(next + 2, NULL, 0);
2072       } else if (strncmp(next, "v=", 2) == 0) {
2073          v_override = strtol(next + 2, NULL, 0);
2074       } else if (strncmp(next, "b=", 2) == 0) {
2075          blit_override = strtol(next + 2, NULL, 0);
2076       } else {
2077          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2078       }
2079       next = strtok_r(NULL, ",", &save);
2080    }
2081    free(env);
2082    if (gc_override >= 0)
2083       *gc_count = gc_override;
2084    if (g_override >= 0)
2085       *g_count = g_override;
2086    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2087       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2088                 "Vulkan specification");
2089    if (c_override >= 0)
2090       *c_count = c_override;
2091    if (v_override >= 0)
2092       *v_count = v_override;
2093    if (blit_override >= 0)
2094       *blit_count = blit_override;
2095 }
2096 
2097 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2098 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2099 {
2100    uint32_t family_count = 0;
2101    VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2102                                VK_QUEUE_SPARSE_BINDING_BIT : 0;
2103    VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2104                                  VK_QUEUE_PROTECTED_BIT : 0;
2105 
2106    if (pdevice->engine_info) {
2107       int gc_count =
2108          intel_engines_count(pdevice->engine_info,
2109                              INTEL_ENGINE_CLASS_RENDER);
2110       int v_count =
2111          intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2112       int g_count = 0;
2113       int c_count = 0;
2114       /* Not only the Kernel needs to have vm_control, but it also needs to
2115        * have a new enough GuC and the interface to tell us so. This is
2116        * implemented in the common layer by is_guc_semaphore_functional() and
2117        * results in devinfo->engine_class_supported_count being adjusted,
2118        * which we read below.
2119        */
2120       const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2121       /* For now we're choosing to not expose non-render engines on i915.ko
2122        * even when the Kernel allows it. We have data suggesting it's not an
2123        * obvious win in terms of performance.
2124        */
2125       const bool can_use_non_render_engines =
2126          kernel_supports_non_render_engines &&
2127          pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2128 
2129       if (can_use_non_render_engines) {
2130          c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2131       }
2132       enum intel_engine_class compute_class =
2133          c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
2134 
2135       int blit_count = 0;
2136       if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2137          blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2138       }
2139 
2140       anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2141 
2142       if (gc_count > 0) {
2143          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2144             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2145                           VK_QUEUE_COMPUTE_BIT |
2146                           VK_QUEUE_TRANSFER_BIT |
2147                           sparse_flags |
2148                           protected_flag,
2149             .queueCount = gc_count,
2150             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2151             .supports_perf = true,
2152          };
2153       }
2154       if (g_count > 0) {
2155          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2156             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2157                           VK_QUEUE_TRANSFER_BIT |
2158                           sparse_flags |
2159                           protected_flag,
2160             .queueCount = g_count,
2161             .engine_class = INTEL_ENGINE_CLASS_RENDER,
2162          };
2163       }
2164       if (c_count > 0) {
2165          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2166             .queueFlags = VK_QUEUE_COMPUTE_BIT |
2167                           VK_QUEUE_TRANSFER_BIT |
2168                           sparse_flags |
2169                           protected_flag,
2170             .queueCount = c_count,
2171             .engine_class = compute_class,
2172          };
2173       }
2174       if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2175          /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2176           * to the first VCS engine instance.
2177           *
2178           * We should be able to query HEVC support from the kernel using the engine query uAPI,
2179           * but this appears to be broken :
2180           *    https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2181           *
2182           * When this bug is fixed we should be able to check HEVC support to determine the
2183           * correct number of queues.
2184           */
2185          /* TODO: enable protected content on video queue */
2186          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2187             .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2188                           (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2189             .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2190             .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2191          };
2192       }
2193       if (blit_count > 0) {
2194          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2195             .queueFlags = VK_QUEUE_TRANSFER_BIT |
2196                           protected_flag,
2197             .queueCount = blit_count,
2198             .engine_class = INTEL_ENGINE_CLASS_COPY,
2199          };
2200       }
2201    } else {
2202       /* Default to a single render queue */
2203       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2204          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2205                        VK_QUEUE_COMPUTE_BIT |
2206                        VK_QUEUE_TRANSFER_BIT |
2207                        sparse_flags,
2208          .queueCount = 1,
2209          .engine_class = INTEL_ENGINE_CLASS_RENDER,
2210       };
2211       family_count = 1;
2212    }
2213    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2214    pdevice->queue.family_count = family_count;
2215 }
2216 
2217 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2218 anv_physical_device_get_parameters(struct anv_physical_device *device)
2219 {
2220    switch (device->info.kmd_type) {
2221    case INTEL_KMD_TYPE_I915:
2222       return anv_i915_physical_device_get_parameters(device);
2223    case INTEL_KMD_TYPE_XE:
2224       return anv_xe_physical_device_get_parameters(device);
2225    default:
2226       unreachable("Missing");
2227       return VK_ERROR_UNKNOWN;
2228    }
2229 }
2230 
2231 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2232 anv_physical_device_try_create(struct vk_instance *vk_instance,
2233                                struct _drmDevice *drm_device,
2234                                struct vk_physical_device **out)
2235 {
2236    struct anv_instance *instance =
2237       container_of(vk_instance, struct anv_instance, vk);
2238 
2239    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2240        drm_device->bustype != DRM_BUS_PCI ||
2241        drm_device->deviceinfo.pci->vendor_id != 0x8086)
2242       return VK_ERROR_INCOMPATIBLE_DRIVER;
2243 
2244    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2245    const char *path = drm_device->nodes[DRM_NODE_RENDER];
2246    VkResult result;
2247    int fd;
2248    int master_fd = -1;
2249 
2250    process_intel_debug_variable();
2251 
2252    fd = open(path, O_RDWR | O_CLOEXEC);
2253    if (fd < 0) {
2254       if (errno == ENOMEM) {
2255          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2256                           "Unable to open device %s: out of memory", path);
2257       }
2258       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2259                        "Unable to open device %s: %m", path);
2260    }
2261 
2262    struct intel_device_info devinfo;
2263    if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2264       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2265       goto fail_fd;
2266    }
2267 
2268    if (devinfo.ver < 9) {
2269       /* Silently fail here, hasvk should pick up this device. */
2270       result = VK_ERROR_INCOMPATIBLE_DRIVER;
2271       goto fail_fd;
2272    } else if (devinfo.probe_forced) {
2273       /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2274        * unsupported device support. No need to print a warning message.
2275        */
2276    } else if (devinfo.ver > 20) {
2277       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2278                          "Vulkan not yet supported on %s", devinfo.name);
2279       goto fail_fd;
2280    }
2281 
2282    if (devinfo.ver == 20 && instance->disable_xe2_ccs)
2283       intel_debug |= DEBUG_NO_CCS;
2284 
2285    /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2286     * need to always disable preemption :
2287     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2288     *    - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2289     */
2290    if (devinfo.verx10 == 120)
2291       BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2292 
2293    if (!devinfo.has_context_isolation) {
2294       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2295                          "Vulkan requires context isolation for %s", devinfo.name);
2296       goto fail_fd;
2297    }
2298 
2299    struct anv_physical_device *device =
2300       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2301                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2302    if (device == NULL) {
2303       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2304       goto fail_fd;
2305    }
2306 
2307    struct vk_physical_device_dispatch_table dispatch_table;
2308    vk_physical_device_dispatch_table_from_entrypoints(
2309       &dispatch_table, &anv_physical_device_entrypoints, true);
2310    vk_physical_device_dispatch_table_from_entrypoints(
2311       &dispatch_table, &wsi_physical_device_entrypoints, false);
2312 
2313    result = vk_physical_device_init(&device->vk, &instance->vk,
2314                                     NULL, NULL, NULL, /* We set up extensions later */
2315                                     &dispatch_table);
2316    if (result != VK_SUCCESS) {
2317       vk_error(instance, result);
2318       goto fail_alloc;
2319    }
2320    device->instance = instance;
2321 
2322    assert(strlen(path) < ARRAY_SIZE(device->path));
2323    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2324 
2325    device->info = devinfo;
2326 
2327    device->local_fd = fd;
2328    result = anv_physical_device_get_parameters(device);
2329    if (result != VK_SUCCESS)
2330       goto fail_base;
2331 
2332    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2333                                               device->info.aperture_bytes;
2334 
2335    if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2336       vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2337                 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2338       goto fail_base;
2339    }
2340 
2341    /* We currently only have the right bits for instructions in Gen12+. If the
2342     * kernel ever starts supporting that feature on previous generations,
2343     * we'll need to edit genxml prior to enabling here.
2344     */
2345    device->has_protected_contexts = device->info.ver >= 12 &&
2346       intel_gem_supports_protected_context(fd, device->info.kmd_type);
2347 
2348    /* Just pick one; they're all the same */
2349    device->has_astc_ldr =
2350       isl_format_supports_sampling(&device->info,
2351                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2352    if (!device->has_astc_ldr &&
2353        driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2354       device->emu_astc_ldr = true;
2355    if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2356       device->flush_astc_ldr_void_extent_denorms =
2357          device->has_astc_ldr && !device->emu_astc_ldr;
2358    }
2359    device->disable_fcv = device->info.verx10 >= 125 ||
2360                          instance->disable_fcv;
2361 
2362    result = anv_physical_device_init_heaps(device, fd);
2363    if (result != VK_SUCCESS)
2364       goto fail_base;
2365 
2366    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2367       device->has_exec_timeline = false;
2368 
2369    device->has_cooperative_matrix =
2370       device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2371 
2372    unsigned st_idx = 0;
2373 
2374    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2375    if (!device->has_exec_timeline)
2376       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2377    device->sync_types[st_idx++] = &device->sync_syncobj_type;
2378 
2379    /* anv_bo_sync_type is only supported with i915 for now  */
2380    if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2381       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2382          device->sync_types[st_idx++] = &anv_bo_sync_type;
2383 
2384       if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2385          device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2386          device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2387       }
2388    } else {
2389       assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2390       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2391       assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2392    }
2393 
2394    device->sync_types[st_idx++] = NULL;
2395    assert(st_idx <= ARRAY_SIZE(device->sync_types));
2396    device->vk.supported_sync_types = device->sync_types;
2397 
2398    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2399 
2400    device->always_use_bindless =
2401       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2402 
2403    device->use_call_secondary =
2404       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2405 
2406    device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2407    device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2408 
2409    device->uses_ex_bso = device->info.verx10 >= 125;
2410 
2411    /* For now always use indirect descriptors. We'll update this
2412     * to !uses_ex_bso when all the infrastructure is built up.
2413     */
2414    device->indirect_descriptors =
2415       !device->uses_ex_bso ||
2416       driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2417 
2418    device->alloc_aux_tt_mem =
2419       device->info.has_aux_map && device->info.verx10 >= 125;
2420    /* Check if we can read the GPU timestamp register from the CPU */
2421    uint64_t u64_ignore;
2422    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2423                                                                device->info.kmd_type,
2424                                                                &u64_ignore);
2425 
2426    device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2427 
2428    /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2429    if (debug_get_bool_option("ANV_SPARSE", true)) {
2430       if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2431          if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2432             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2433          else
2434             device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2435       } else {
2436          if (device->info.ver >= 12 && device->has_exec_timeline)
2437             device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2438       }
2439    }
2440    if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2441       if (instance->has_fake_sparse)
2442          device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2443    }
2444 
2445    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2446       driQueryOptionb(&instance->dri_options, "always_flush_cache");
2447 
2448    device->compiler = brw_compiler_create(NULL, &device->info);
2449    if (device->compiler == NULL) {
2450       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2451       goto fail_base;
2452    }
2453    device->compiler->shader_debug_log = compiler_debug_log;
2454    device->compiler->shader_perf_log = compiler_perf_log;
2455    device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2456    device->compiler->use_bindless_sampler_offset = false;
2457    device->compiler->spilling_rate =
2458       driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2459 
2460    isl_device_init(&device->isl_dev, &device->info);
2461    device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2462    device->isl_dev.sampler_route_to_lsc =
2463       driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2464 
2465    result = anv_physical_device_init_uuids(device);
2466    if (result != VK_SUCCESS)
2467       goto fail_compiler;
2468 
2469    anv_physical_device_init_va_ranges(device);
2470 
2471    anv_physical_device_init_disk_cache(device);
2472 
2473    if (instance->vk.enabled_extensions.KHR_display) {
2474       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2475       if (master_fd >= 0) {
2476          /* fail if we don't have permission to even render on this device */
2477          if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2478             close(master_fd);
2479             master_fd = -1;
2480          }
2481       }
2482    }
2483    device->master_fd = master_fd;
2484 
2485    device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2486    intel_common_update_device_info(fd, &device->info);
2487 
2488    anv_physical_device_init_queue_families(device);
2489 
2490    anv_physical_device_init_perf(device, fd);
2491 
2492    /* Gather major/minor before WSI. */
2493    struct stat st;
2494 
2495    if (stat(primary_path, &st) == 0) {
2496       device->has_master = true;
2497       device->master_major = major(st.st_rdev);
2498       device->master_minor = minor(st.st_rdev);
2499    } else {
2500       device->has_master = false;
2501       device->master_major = 0;
2502       device->master_minor = 0;
2503    }
2504 
2505    if (stat(path, &st) == 0) {
2506       device->has_local = true;
2507       device->local_major = major(st.st_rdev);
2508       device->local_minor = minor(st.st_rdev);
2509    } else {
2510       device->has_local = false;
2511       device->local_major = 0;
2512       device->local_minor = 0;
2513    }
2514 
2515    get_device_extensions(device, &device->vk.supported_extensions);
2516    get_features(device, &device->vk.supported_features);
2517    get_properties(device, &device->vk.properties);
2518 
2519    result = anv_init_wsi(device);
2520    if (result != VK_SUCCESS)
2521       goto fail_perf;
2522 
2523    anv_measure_device_init(device);
2524 
2525    anv_genX(&device->info, init_physical_device_state)(device);
2526 
2527    *out = &device->vk;
2528 
2529    return VK_SUCCESS;
2530 
2531 fail_perf:
2532    intel_perf_free(device->perf);
2533    free(device->engine_info);
2534    anv_physical_device_free_disk_cache(device);
2535 fail_compiler:
2536    ralloc_free(device->compiler);
2537 fail_base:
2538    vk_physical_device_finish(&device->vk);
2539 fail_alloc:
2540    vk_free(&instance->vk.alloc, device);
2541 fail_fd:
2542    close(fd);
2543    if (master_fd != -1)
2544       close(master_fd);
2545    return result;
2546 }
2547 
2548 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2549 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2550 {
2551    struct anv_physical_device *device =
2552       container_of(vk_device, struct anv_physical_device, vk);
2553 
2554    anv_finish_wsi(device);
2555    anv_measure_device_destroy(device);
2556    free(device->engine_info);
2557    anv_physical_device_free_disk_cache(device);
2558    ralloc_free(device->compiler);
2559    intel_perf_free(device->perf);
2560    close(device->local_fd);
2561    if (device->master_fd >= 0)
2562       close(device->master_fd);
2563    vk_physical_device_finish(&device->vk);
2564    vk_free(&device->instance->vk.alloc, device);
2565 }
2566 
2567 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2568 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2569 {
2570 
2571    /*
2572     * For Xe2+:
2573     * Bspec 60411: Timestamp register can hold 64-bit value
2574     *
2575     * Platforms < Xe2:
2576     * Bpsec 46111: Timestamp register can hold only 36-bit
2577     *              value
2578     */
2579    const VkQueueFamilyProperties anv_queue_family_properties_template =
2580    {
2581       .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2582       .minImageTransferGranularity = { 1, 1, 1 },
2583    };
2584 
2585    return anv_queue_family_properties_template;
2586 }
2587 
2588 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2589 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2590                                          uint32_t family_index)
2591 {
2592    const struct anv_queue_family *family = &device->queue.families[family_index];
2593    VkQueueFamilyProperties properties =
2594       get_anv_queue_family_properties_template(device);
2595 
2596    properties.queueFlags = family->queueFlags;
2597    properties.queueCount = family->queueCount;
2598    return properties;
2599 }
2600 
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2601 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2602     VkPhysicalDevice                            physicalDevice,
2603     uint32_t*                                   pQueueFamilyPropertyCount,
2604     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2605 {
2606    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2607    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2608                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2609 
2610    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2611       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2612       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2613          p->queueFamilyProperties =
2614             anv_device_physical_get_queue_properties(pdevice, i);
2615 
2616          vk_foreach_struct(ext, p->pNext) {
2617             switch (ext->sType) {
2618             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2619                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2620                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2621 
2622                /* Deliberately sorted low to high */
2623                VkQueueGlobalPriorityKHR all_priorities[] = {
2624                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2625                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2626                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2627                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2628                };
2629 
2630                uint32_t count = 0;
2631                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2632                   if (all_priorities[i] > pdevice->max_context_priority)
2633                      break;
2634 
2635                   properties->priorities[count++] = all_priorities[i];
2636                }
2637                properties->priorityCount = count;
2638                break;
2639             }
2640             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2641                VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2642                   (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2643                prop->queryResultStatusSupport = VK_TRUE;
2644                break;
2645             }
2646             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2647                VkQueueFamilyVideoPropertiesKHR *prop =
2648                   (VkQueueFamilyVideoPropertiesKHR *)ext;
2649                if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2650                   prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2651                                                VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2652                }
2653 
2654                if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2655                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2656                                                 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2657                }
2658                break;
2659             }
2660             default:
2661                vk_debug_ignored_stype(ext->sType);
2662             }
2663          }
2664       }
2665    }
2666 }
2667 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2668 void anv_GetPhysicalDeviceMemoryProperties(
2669     VkPhysicalDevice                            physicalDevice,
2670     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2671 {
2672    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2673 
2674    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2675    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2676       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2677          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2678          .heapIndex     = physical_device->memory.types[i].heapIndex,
2679       };
2680    }
2681 
2682    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2683    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2684       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2685          .size    = physical_device->memory.heaps[i].size,
2686          .flags   = physical_device->memory.heaps[i].flags,
2687       };
2688    }
2689 }
2690 
2691 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2692 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2693                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2694 {
2695    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2696 
2697    if (!device->vk.supported_extensions.EXT_memory_budget)
2698       return;
2699 
2700    anv_update_meminfo(device, device->local_fd);
2701 
2702    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2703    for (size_t i = 0; i < device->memory.heap_count; i++) {
2704       if (device->memory.heaps[i].is_local_mem) {
2705          total_vram_heaps_size += device->memory.heaps[i].size;
2706       } else {
2707          total_sys_heaps_size += device->memory.heaps[i].size;
2708       }
2709    }
2710 
2711    for (size_t i = 0; i < device->memory.heap_count; i++) {
2712       VkDeviceSize heap_size = device->memory.heaps[i].size;
2713       VkDeviceSize heap_used = device->memory.heaps[i].used;
2714       VkDeviceSize heap_budget, total_heaps_size;
2715       uint64_t mem_available = 0;
2716 
2717       if (device->memory.heaps[i].is_local_mem) {
2718          total_heaps_size = total_vram_heaps_size;
2719          if (device->vram_non_mappable.size > 0 && i == 0) {
2720             mem_available = device->vram_non_mappable.available;
2721          } else {
2722             mem_available = device->vram_mappable.available;
2723          }
2724       } else {
2725          total_heaps_size = total_sys_heaps_size;
2726          mem_available = MIN2(device->sys.available, total_heaps_size);
2727       }
2728 
2729       double heap_proportion = (double) heap_size / total_heaps_size;
2730       VkDeviceSize available_prop = mem_available * heap_proportion;
2731 
2732       /*
2733        * Let's not incite the app to starve the system: report at most 90% of
2734        * the available heap memory.
2735        */
2736       uint64_t heap_available = available_prop * 9 / 10;
2737       heap_budget = MIN2(heap_size, heap_used + heap_available);
2738 
2739       /*
2740        * Round down to the nearest MB
2741        */
2742       heap_budget &= ~((1ull << 20) - 1);
2743 
2744       /*
2745        * The heapBudget value must be non-zero for array elements less than
2746        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2747        * value must be less than or equal to VkMemoryHeap::size for each heap.
2748        */
2749       assert(0 < heap_budget && heap_budget <= heap_size);
2750 
2751       memoryBudget->heapUsage[i] = heap_used;
2752       memoryBudget->heapBudget[i] = heap_budget;
2753    }
2754 
2755    /* The heapBudget and heapUsage values must be zero for array elements
2756     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2757     */
2758    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2759       memoryBudget->heapBudget[i] = 0;
2760       memoryBudget->heapUsage[i] = 0;
2761    }
2762 }
2763 
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2764 void anv_GetPhysicalDeviceMemoryProperties2(
2765     VkPhysicalDevice                            physicalDevice,
2766     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2767 {
2768    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2769                                          &pMemoryProperties->memoryProperties);
2770 
2771    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2772       switch (ext->sType) {
2773       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2774          anv_get_memory_budget(physicalDevice, (void*)ext);
2775          break;
2776       default:
2777          vk_debug_ignored_stype(ext->sType);
2778          break;
2779       }
2780    }
2781 }
2782 
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2783 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2784     VkPhysicalDevice                            physicalDevice,
2785     VkSampleCountFlagBits                       samples,
2786     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
2787 {
2788    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2789 
2790    assert(pMultisampleProperties->sType ==
2791           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2792 
2793    VkExtent2D grid_size;
2794    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2795       grid_size.width = 1;
2796       grid_size.height = 1;
2797    } else {
2798       grid_size.width = 0;
2799       grid_size.height = 0;
2800    }
2801    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2802 
2803    vk_foreach_struct(ext, pMultisampleProperties->pNext)
2804       vk_debug_ignored_stype(ext->sType);
2805 }
2806 
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2807 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2808     VkPhysicalDevice                            physicalDevice,
2809     uint32_t*                                   pFragmentShadingRateCount,
2810     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
2811 {
2812    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2813    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2814                           pFragmentShadingRates, pFragmentShadingRateCount);
2815 
2816 #define append_rate(_samples, _width, _height)                                      \
2817    do {                                                                             \
2818       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2819          __r->sampleCounts = _samples;                                              \
2820          __r->fragmentSize = (VkExtent2D) {                                         \
2821             .width = _width,                                                        \
2822             .height = _height,                                                      \
2823          };                                                                         \
2824       }                                                                             \
2825    } while (0)
2826 
2827    VkSampleCountFlags sample_counts =
2828       isl_device_get_sample_counts(&physical_device->isl_dev);
2829 
2830    /* BSpec 47003: There are a number of restrictions on the sample count
2831     * based off the coarse pixel size.
2832     */
2833    static const VkSampleCountFlags cp_size_sample_limits[] = {
2834       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
2835              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2836       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2837       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2838       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2839       [16] = ISL_SAMPLE_COUNT_1_BIT,
2840    };
2841 
2842    for (uint32_t x = 4; x >= 1; x /= 2) {
2843        for (uint32_t y = 4; y >= 1; y /= 2) {
2844           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
2845              /* BSpec 47003:
2846               *   "CPsize 1x4 and 4x1 are not supported"
2847               */
2848              if ((x == 1 && y == 4) || (x == 4 && y == 1))
2849                 continue;
2850 
2851              /* For size {1, 1}, the sample count must be ~0
2852               *
2853               * 4x2 is also a specially case.
2854               */
2855              if (x == 1 && y == 1)
2856                 append_rate(~0, x, y);
2857              else if (x == 4 && y == 2)
2858                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
2859              else
2860                 append_rate(cp_size_sample_limits[x * y], x, y);
2861           } else {
2862              /* For size {1, 1}, the sample count must be ~0 */
2863              if (x == 1 && y == 1)
2864                 append_rate(~0, x, y);
2865              else
2866                 append_rate(sample_counts, x, y);
2867           }
2868        }
2869    }
2870 
2871 #undef append_rate
2872 
2873    return vk_outarray_status(&out);
2874 }
2875 
2876 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)2877 convert_component_type(enum intel_cooperative_matrix_component_type t)
2878 {
2879    switch (t) {
2880    case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
2881    case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
2882    case INTEL_CMAT_SINT32:  return VK_COMPONENT_TYPE_SINT32_KHR;
2883    case INTEL_CMAT_SINT8:   return VK_COMPONENT_TYPE_SINT8_KHR;
2884    case INTEL_CMAT_UINT32:  return VK_COMPONENT_TYPE_UINT32_KHR;
2885    case INTEL_CMAT_UINT8:   return VK_COMPONENT_TYPE_UINT8_KHR;
2886    }
2887    unreachable("invalid cooperative matrix component type in configuration");
2888 }
2889 
2890 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)2891 convert_scope(enum intel_cmat_scope scope)
2892 {
2893    switch (scope) {
2894    case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
2895    default:
2896       unreachable("invalid cooperative matrix scope in configuration");
2897    }
2898 }
2899 
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2900 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
2901    VkPhysicalDevice                            physicalDevice,
2902    uint32_t*                                   pPropertyCount,
2903    VkCooperativeMatrixPropertiesKHR*           pProperties)
2904 {
2905    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2906    const struct intel_device_info *devinfo = &pdevice->info;
2907 
2908    assert(anv_has_cooperative_matrix(pdevice));
2909 
2910    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2911 
2912    for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
2913       const struct intel_cooperative_matrix_configuration *cfg =
2914          &devinfo->cooperative_matrix_configurations[i];
2915 
2916       if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
2917          break;
2918 
2919       vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
2920          prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
2921 
2922          prop->MSize = cfg->m;
2923          prop->NSize = cfg->n;
2924          prop->KSize = cfg->k;
2925 
2926          prop->AType      = convert_component_type(cfg->a);
2927          prop->BType      = convert_component_type(cfg->b);
2928          prop->CType      = convert_component_type(cfg->c);
2929          prop->ResultType = convert_component_type(cfg->result);
2930 
2931          prop->saturatingAccumulation = VK_FALSE;
2932          prop->scope = convert_scope(cfg->scope);
2933       }
2934 
2935       /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
2936        *
2937        *    For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
2938        *    cooperative matrix operand must be present if and only if
2939        *    VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
2940        *    VK_TRUE.
2941        *
2942        * As a result, we have to advertise integer configs both with and
2943        * without this flag set.
2944        *
2945        * The DPAS instruction does not support the .sat modifier, so only
2946        * advertise the configurations when the DPAS would be lowered.
2947        *
2948        * FINISHME: It should be possible to do better than full lowering on
2949        * platforms that support DPAS. Emit a DPAS with a NULL accumulator
2950        * argument, then perform the correct sequence of saturating add
2951        * instructions.
2952        */
2953       if (cfg->a != INTEL_CMAT_FLOAT16 &&
2954           (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
2955          vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
2956             prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
2957 
2958             prop->MSize = cfg->m;
2959             prop->NSize = cfg->n;
2960             prop->KSize = cfg->k;
2961 
2962             prop->AType      = convert_component_type(cfg->a);
2963             prop->BType      = convert_component_type(cfg->b);
2964             prop->CType      = convert_component_type(cfg->c);
2965             prop->ResultType = convert_component_type(cfg->result);
2966 
2967             prop->saturatingAccumulation = VK_TRUE;
2968             prop->scope = convert_scope(cfg->scope);
2969          }
2970       }
2971    }
2972 
2973    return vk_outarray_status(&out);
2974 }
2975 
2976 static const VkTimeDomainKHR anv_time_domains[] = {
2977    VK_TIME_DOMAIN_DEVICE_KHR,
2978    VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
2979 #ifdef CLOCK_MONOTONIC_RAW
2980    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
2981 #endif
2982 };
2983 
anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)2984 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
2985    VkPhysicalDevice                             physicalDevice,
2986    uint32_t                                     *pTimeDomainCount,
2987    VkTimeDomainKHR                              *pTimeDomains)
2988 {
2989    int d;
2990    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
2991 
2992    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
2993       vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
2994          *i = anv_time_domains[d];
2995       }
2996    }
2997 
2998    return vk_outarray_status(&out);
2999 }
3000