1 /* Copyright © 2024 Intel Corporation
2 * SPDX-License-Identifier: MIT
3 */
4
5 #include "anv_private.h"
6 #include "anv_api_version.h"
7 #include "anv_measure.h"
8
9 #include "i915/anv_device.h"
10 #include "xe/anv_device.h"
11
12 #include "common/intel_common.h"
13 #include "common/intel_uuid.h"
14
15 #include "perf/intel_perf.h"
16
17 #include "git_sha1.h"
18
19 #include "util/disk_cache.h"
20 #include "util/mesa-sha1.h"
21
22 #include <xf86drm.h>
23 #include <fcntl.h>
24 #ifdef MAJOR_IN_SYSMACROS
25 #include <sys/sysmacros.h>
26 #endif
27
28 /* This is probably far to big but it reflects the max size used for messages
29 * in OpenGLs KHR_debug.
30 */
31 #define MAX_DEBUG_MESSAGE_LENGTH 4096
32
33 static void
compiler_debug_log(void * data,UNUSED unsigned * id,const char * fmt,...)34 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
35 {
36 char str[MAX_DEBUG_MESSAGE_LENGTH];
37 struct anv_device *device = (struct anv_device *)data;
38 UNUSED struct anv_instance *instance = device->physical->instance;
39
40 va_list args;
41 va_start(args, fmt);
42 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
43 va_end(args);
44
45 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
46 }
47
48 static void
compiler_perf_log(UNUSED void * data,UNUSED unsigned * id,const char * fmt,...)49 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
50 {
51 va_list args;
52 va_start(args, fmt);
53
54 if (INTEL_DEBUG(DEBUG_PERF))
55 mesa_logd_v(fmt, args);
56
57 va_end(args);
58 }
59
60 static void
get_device_extensions(const struct anv_physical_device * device,struct vk_device_extension_table * ext)61 get_device_extensions(const struct anv_physical_device *device,
62 struct vk_device_extension_table *ext)
63 {
64 const bool has_syncobj_wait =
65 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
66
67 const bool rt_enabled = ANV_SUPPORT_RT && device->info.has_ray_tracing;
68
69 *ext = (struct vk_device_extension_table) {
70 .KHR_8bit_storage = true,
71 .KHR_16bit_storage = !device->instance->no_16bit,
72 .KHR_acceleration_structure = rt_enabled,
73 .KHR_bind_memory2 = true,
74 .KHR_buffer_device_address = true,
75 .KHR_calibrated_timestamps = device->has_reg_timestamp,
76 .KHR_compute_shader_derivatives = true,
77 .KHR_copy_commands2 = true,
78 .KHR_cooperative_matrix = anv_has_cooperative_matrix(device),
79 .KHR_create_renderpass2 = true,
80 .KHR_dedicated_allocation = true,
81 .KHR_deferred_host_operations = true,
82 .KHR_depth_stencil_resolve = true,
83 .KHR_descriptor_update_template = true,
84 .KHR_device_group = true,
85 .KHR_draw_indirect_count = true,
86 .KHR_driver_properties = true,
87 .KHR_dynamic_rendering = true,
88 .KHR_external_fence = has_syncobj_wait,
89 .KHR_external_fence_fd = has_syncobj_wait,
90 .KHR_external_memory = true,
91 .KHR_external_memory_fd = true,
92 .KHR_external_semaphore = true,
93 .KHR_external_semaphore_fd = true,
94 .KHR_format_feature_flags2 = true,
95 .KHR_fragment_shading_rate = device->info.ver >= 11,
96 .KHR_get_memory_requirements2 = true,
97 .KHR_global_priority = device->max_context_priority >=
98 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
99 .KHR_image_format_list = true,
100 .KHR_imageless_framebuffer = true,
101 #ifdef ANV_USE_WSI_PLATFORM
102 .KHR_incremental_present = true,
103 #endif
104 .KHR_index_type_uint8 = true,
105 .KHR_line_rasterization = true,
106 .KHR_load_store_op_none = true,
107 .KHR_maintenance1 = true,
108 .KHR_maintenance2 = true,
109 .KHR_maintenance3 = true,
110 .KHR_maintenance4 = true,
111 .KHR_maintenance5 = true,
112 .KHR_maintenance6 = true,
113 .KHR_maintenance7 = true,
114 .KHR_map_memory2 = true,
115 .KHR_multiview = true,
116 .KHR_performance_query =
117 device->perf &&
118 (intel_perf_has_hold_preemption(device->perf) ||
119 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
120 device->use_call_secondary,
121 .KHR_pipeline_executable_properties = true,
122 .KHR_pipeline_library = true,
123 /* Hide these behind dri configs for now since we cannot implement it reliably on
124 * all surfaces yet. There is no surface capability query for present wait/id,
125 * but the feature is useful enough to hide behind an opt-in mechanism for now.
126 * If the instance only enables surface extensions that unconditionally support present wait,
127 * we can also expose the extension that way. */
128 .KHR_present_id =
129 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
130 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
131 .KHR_present_wait =
132 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
133 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
134 .KHR_push_descriptor = true,
135 .KHR_ray_query = rt_enabled,
136 .KHR_ray_tracing_maintenance1 = rt_enabled,
137 .KHR_ray_tracing_pipeline = rt_enabled,
138 .KHR_ray_tracing_position_fetch = rt_enabled,
139 .KHR_relaxed_block_layout = true,
140 .KHR_sampler_mirror_clamp_to_edge = true,
141 .KHR_sampler_ycbcr_conversion = true,
142 .KHR_separate_depth_stencil_layouts = true,
143 .KHR_shader_atomic_int64 = true,
144 .KHR_shader_clock = true,
145 .KHR_shader_draw_parameters = true,
146 .KHR_shader_expect_assume = true,
147 .KHR_shader_float16_int8 = !device->instance->no_16bit,
148 .KHR_shader_float_controls = true,
149 .KHR_shader_float_controls2 = true,
150 .KHR_shader_integer_dot_product = true,
151 .KHR_shader_maximal_reconvergence = true,
152 .KHR_shader_non_semantic_info = true,
153 .KHR_shader_quad_control = true,
154 .KHR_shader_relaxed_extended_instruction = true,
155 .KHR_shader_subgroup_extended_types = true,
156 .KHR_shader_subgroup_rotate = true,
157 .KHR_shader_subgroup_uniform_control_flow = true,
158 .KHR_shader_terminate_invocation = true,
159 .KHR_spirv_1_4 = true,
160 .KHR_storage_buffer_storage_class = true,
161 #ifdef ANV_USE_WSI_PLATFORM
162 .KHR_swapchain = true,
163 .KHR_swapchain_mutable_format = true,
164 #endif
165 .KHR_synchronization2 = true,
166 .KHR_timeline_semaphore = true,
167 .KHR_uniform_buffer_standard_layout = true,
168 .KHR_variable_pointers = true,
169 .KHR_vertex_attribute_divisor = true,
170 .KHR_video_queue = device->video_decode_enabled || device->video_encode_enabled,
171 .KHR_video_decode_queue = device->video_decode_enabled,
172 .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && device->video_decode_enabled,
173 .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && device->video_decode_enabled,
174 .KHR_video_encode_queue = device->video_encode_enabled,
175 .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && device->video_encode_enabled,
176 .KHR_video_encode_h265 = device->info.ver >= 12 && VIDEO_CODEC_H265ENC && device->video_encode_enabled,
177 .KHR_vulkan_memory_model = true,
178 .KHR_workgroup_memory_explicit_layout = true,
179 .KHR_zero_initialize_workgroup_memory = true,
180 .EXT_4444_formats = true,
181 .EXT_attachment_feedback_loop_layout = true,
182 .EXT_attachment_feedback_loop_dynamic_state = true,
183 .EXT_border_color_swizzle = true,
184 .EXT_buffer_device_address = true,
185 .EXT_calibrated_timestamps = device->has_reg_timestamp,
186 .EXT_color_write_enable = true,
187 .EXT_conditional_rendering = true,
188 .EXT_conservative_rasterization = true,
189 .EXT_custom_border_color = true,
190 .EXT_depth_bias_control = true,
191 .EXT_depth_clamp_zero_one = true,
192 .EXT_depth_clip_control = true,
193 .EXT_depth_range_unrestricted = device->info.ver >= 20,
194 .EXT_depth_clip_enable = true,
195 .EXT_descriptor_buffer = true,
196 .EXT_descriptor_indexing = true,
197 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
198 .EXT_display_control = true,
199 #endif
200 .EXT_dynamic_rendering_unused_attachments = true,
201 .EXT_extended_dynamic_state = true,
202 .EXT_extended_dynamic_state2 = true,
203 .EXT_extended_dynamic_state3 = true,
204 .EXT_external_memory_dma_buf = true,
205 .EXT_external_memory_host = true,
206 .EXT_fragment_shader_interlock = true,
207 .EXT_global_priority = device->max_context_priority >=
208 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
209 .EXT_global_priority_query = device->max_context_priority >=
210 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
211 .EXT_graphics_pipeline_library = !debug_get_bool_option("ANV_NO_GPL", false),
212 .EXT_host_query_reset = true,
213 .EXT_image_2d_view_of_3d = true,
214 /* Because of Xe2 PAT selected compression and the Vulkan spec
215 * requirement to always return the same memory types for Images with
216 * same properties we can't support EXT_image_compression_control on Xe2+
217 */
218 .EXT_image_compression_control = device->instance->compression_control_enabled &&
219 device->info.ver < 20,
220 .EXT_image_robustness = true,
221 .EXT_image_drm_format_modifier = true,
222 .EXT_image_sliced_view_of_3d = true,
223 .EXT_image_view_min_lod = true,
224 .EXT_index_type_uint8 = true,
225 .EXT_inline_uniform_block = true,
226 .EXT_legacy_dithering = true,
227 .EXT_legacy_vertex_attributes = true,
228 .EXT_line_rasterization = true,
229 .EXT_load_store_op_none = true,
230 .EXT_map_memory_placed = device->info.has_mmap_offset,
231 /* Enable the extension only if we have support on both the local &
232 * system memory
233 */
234 .EXT_memory_budget = (!device->info.has_local_mem ||
235 device->vram_mappable.available > 0) &&
236 device->sys.available,
237 .EXT_mesh_shader = device->info.has_mesh_shading,
238 .EXT_mutable_descriptor_type = true,
239 .EXT_nested_command_buffer = true,
240 .EXT_non_seamless_cube_map = true,
241 .EXT_pci_bus_info = true,
242 .EXT_physical_device_drm = true,
243 .EXT_pipeline_creation_cache_control = true,
244 .EXT_pipeline_creation_feedback = true,
245 .EXT_pipeline_library_group_handles = rt_enabled,
246 .EXT_pipeline_robustness = true,
247 .EXT_post_depth_coverage = true,
248 .EXT_primitives_generated_query = true,
249 .EXT_primitive_topology_list_restart = true,
250 .EXT_private_data = true,
251 .EXT_provoking_vertex = true,
252 .EXT_queue_family_foreign = true,
253 .EXT_robustness2 = true,
254 .EXT_sample_locations = true,
255 .EXT_sampler_filter_minmax = true,
256 .EXT_scalar_block_layout = true,
257 .EXT_separate_stencil_usage = true,
258 .EXT_shader_atomic_float = true,
259 .EXT_shader_atomic_float2 = true,
260 .EXT_shader_demote_to_helper_invocation = true,
261 .EXT_shader_module_identifier = true,
262 .EXT_shader_replicated_composites = true,
263 .EXT_shader_stencil_export = true,
264 .EXT_shader_subgroup_ballot = true,
265 .EXT_shader_subgroup_vote = true,
266 .EXT_shader_viewport_index_layer = true,
267 .EXT_subgroup_size_control = true,
268 #ifdef ANV_USE_WSI_PLATFORM
269 .EXT_swapchain_maintenance1 = true,
270 #endif
271 .EXT_texel_buffer_alignment = true,
272 .EXT_tooling_info = true,
273 .EXT_transform_feedback = true,
274 .EXT_vertex_attribute_divisor = true,
275 .EXT_vertex_input_dynamic_state = true,
276 .EXT_ycbcr_image_arrays = true,
277 .AMD_buffer_marker = true,
278 .AMD_texture_gather_bias_lod = device->info.ver >= 20,
279 #if DETECT_OS_ANDROID
280 .ANDROID_external_memory_android_hardware_buffer = true,
281 .ANDROID_native_buffer = true,
282 #endif
283 .GOOGLE_decorate_string = true,
284 .GOOGLE_hlsl_functionality1 = true,
285 .GOOGLE_user_type = true,
286 .INTEL_performance_query = device->perf &&
287 intel_perf_has_hold_preemption(device->perf),
288 .INTEL_shader_integer_functions2 = true,
289 .EXT_multi_draw = true,
290 .NV_compute_shader_derivatives = true,
291 .MESA_image_alignment_control = true,
292 .VALVE_mutable_descriptor_type = true,
293 };
294 }
295
296 static void
get_features(const struct anv_physical_device * pdevice,struct vk_features * features)297 get_features(const struct anv_physical_device *pdevice,
298 struct vk_features *features)
299 {
300 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
301
302 const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
303
304 const bool mesh_shader =
305 pdevice->vk.supported_extensions.EXT_mesh_shader;
306
307 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
308
309 *features = (struct vk_features) {
310 /* Vulkan 1.0 */
311 .robustBufferAccess = true,
312 .fullDrawIndexUint32 = true,
313 .imageCubeArray = true,
314 .independentBlend = true,
315 .geometryShader = true,
316 .tessellationShader = true,
317 .sampleRateShading = true,
318 .dualSrcBlend = true,
319 .logicOp = true,
320 .multiDrawIndirect = true,
321 .drawIndirectFirstInstance = true,
322 .depthClamp = true,
323 .depthBiasClamp = true,
324 .fillModeNonSolid = true,
325 .depthBounds = pdevice->info.ver >= 12,
326 .wideLines = true,
327 .largePoints = true,
328 .alphaToOne = true,
329 .multiViewport = true,
330 .samplerAnisotropy = true,
331 .textureCompressionETC2 = true,
332 .textureCompressionASTC_LDR = pdevice->has_astc_ldr ||
333 pdevice->emu_astc_ldr,
334 .textureCompressionBC = true,
335 .occlusionQueryPrecise = true,
336 .pipelineStatisticsQuery = true,
337 .vertexPipelineStoresAndAtomics = true,
338 .fragmentStoresAndAtomics = true,
339 .shaderTessellationAndGeometryPointSize = true,
340 .shaderImageGatherExtended = true,
341 .shaderStorageImageExtendedFormats = true,
342 .shaderStorageImageMultisample = false,
343 /* Gfx12.5 has all the required format supported in HW for typed
344 * read/writes
345 */
346 .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125,
347 .shaderStorageImageWriteWithoutFormat = true,
348 .shaderUniformBufferArrayDynamicIndexing = true,
349 .shaderSampledImageArrayDynamicIndexing = true,
350 .shaderStorageBufferArrayDynamicIndexing = true,
351 .shaderStorageImageArrayDynamicIndexing = true,
352 .shaderClipDistance = true,
353 .shaderCullDistance = true,
354 .shaderFloat64 = pdevice->info.has_64bit_float ||
355 pdevice->instance->fp64_workaround_enabled,
356 .shaderInt64 = true,
357 .shaderInt16 = true,
358 .shaderResourceMinLod = true,
359 .shaderResourceResidency = has_sparse_or_fake,
360 .sparseBinding = has_sparse_or_fake,
361 .sparseResidencyAliased = has_sparse_or_fake,
362 .sparseResidencyBuffer = has_sparse_or_fake,
363 .sparseResidencyImage2D = has_sparse_or_fake,
364 .sparseResidencyImage3D = has_sparse_or_fake,
365 .sparseResidency2Samples = has_sparse_or_fake,
366 .sparseResidency4Samples = has_sparse_or_fake,
367 .sparseResidency8Samples = has_sparse_or_fake &&
368 pdevice->info.verx10 != 125,
369 .sparseResidency16Samples = has_sparse_or_fake &&
370 pdevice->info.verx10 != 125,
371 .variableMultisampleRate = true,
372 .inheritedQueries = true,
373
374 /* Vulkan 1.1 */
375 .storageBuffer16BitAccess = !pdevice->instance->no_16bit,
376 .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit,
377 .storagePushConstant16 = true,
378 .storageInputOutput16 = false,
379 .multiview = true,
380 .multiviewGeometryShader = true,
381 .multiviewTessellationShader = true,
382 .variablePointersStorageBuffer = true,
383 .variablePointers = true,
384 .protectedMemory = pdevice->has_protected_contexts,
385 .samplerYcbcrConversion = true,
386 .shaderDrawParameters = true,
387
388 /* Vulkan 1.2 */
389 .samplerMirrorClampToEdge = true,
390 .drawIndirectCount = true,
391 .storageBuffer8BitAccess = true,
392 .uniformAndStorageBuffer8BitAccess = true,
393 .storagePushConstant8 = true,
394 .shaderBufferInt64Atomics = true,
395 .shaderSharedInt64Atomics = false,
396 .shaderFloat16 = !pdevice->instance->no_16bit,
397 .shaderInt8 = !pdevice->instance->no_16bit,
398
399 .descriptorIndexing = true,
400 .shaderInputAttachmentArrayDynamicIndexing = false,
401 .shaderUniformTexelBufferArrayDynamicIndexing = true,
402 .shaderStorageTexelBufferArrayDynamicIndexing = true,
403 .shaderUniformBufferArrayNonUniformIndexing = true,
404 .shaderSampledImageArrayNonUniformIndexing = true,
405 .shaderStorageBufferArrayNonUniformIndexing = true,
406 .shaderStorageImageArrayNonUniformIndexing = true,
407 .shaderInputAttachmentArrayNonUniformIndexing = false,
408 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
409 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
410 .descriptorBindingUniformBufferUpdateAfterBind = true,
411 .descriptorBindingSampledImageUpdateAfterBind = true,
412 .descriptorBindingStorageImageUpdateAfterBind = true,
413 .descriptorBindingStorageBufferUpdateAfterBind = true,
414 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
415 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
416 .descriptorBindingUpdateUnusedWhilePending = true,
417 .descriptorBindingPartiallyBound = true,
418 .descriptorBindingVariableDescriptorCount = true,
419 .runtimeDescriptorArray = true,
420
421 .samplerFilterMinmax = true,
422 .scalarBlockLayout = true,
423 .imagelessFramebuffer = true,
424 .uniformBufferStandardLayout = true,
425 .shaderSubgroupExtendedTypes = true,
426 .separateDepthStencilLayouts = true,
427 .hostQueryReset = true,
428 .timelineSemaphore = true,
429 .bufferDeviceAddress = true,
430 .bufferDeviceAddressCaptureReplay = true,
431 .bufferDeviceAddressMultiDevice = false,
432 .vulkanMemoryModel = true,
433 .vulkanMemoryModelDeviceScope = true,
434 .vulkanMemoryModelAvailabilityVisibilityChains = true,
435 .shaderOutputViewportIndex = true,
436 .shaderOutputLayer = true,
437 .subgroupBroadcastDynamicId = true,
438
439 /* Vulkan 1.3 */
440 .robustImageAccess = true,
441 .inlineUniformBlock = true,
442 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
443 .pipelineCreationCacheControl = true,
444 .privateData = true,
445 .shaderDemoteToHelperInvocation = true,
446 .shaderTerminateInvocation = true,
447 .subgroupSizeControl = true,
448 .computeFullSubgroups = true,
449 .synchronization2 = true,
450 .textureCompressionASTC_HDR = false,
451 .shaderZeroInitializeWorkgroupMemory = true,
452 .dynamicRendering = true,
453 .shaderIntegerDotProduct = true,
454 .maintenance4 = true,
455
456 /* VK_EXT_4444_formats */
457 .formatA4R4G4B4 = true,
458 .formatA4B4G4R4 = false,
459
460 /* VK_KHR_acceleration_structure */
461 .accelerationStructure = rt_enabled,
462 .accelerationStructureCaptureReplay = false, /* TODO */
463 .accelerationStructureIndirectBuild = false, /* TODO */
464 .accelerationStructureHostCommands = false,
465 .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled,
466
467 /* VK_EXT_border_color_swizzle */
468 .borderColorSwizzle = true,
469 .borderColorSwizzleFromImage = true,
470
471 /* VK_EXT_color_write_enable */
472 .colorWriteEnable = true,
473
474 /* VK_EXT_image_2d_view_of_3d */
475 .image2DViewOf3D = true,
476 .sampler2DViewOf3D = true,
477
478 /* VK_EXT_image_sliced_view_of_3d */
479 .imageSlicedViewOf3D = true,
480
481 /* VK_KHR_compute_shader_derivatives */
482 .computeDerivativeGroupQuads = true,
483 .computeDerivativeGroupLinear = true,
484
485 /* VK_EXT_conditional_rendering */
486 .conditionalRendering = true,
487 .inheritedConditionalRendering = true,
488
489 /* VK_EXT_custom_border_color */
490 .customBorderColors = true,
491 .customBorderColorWithoutFormat = true,
492
493 /* VK_EXT_depth_clamp_zero_one */
494 .depthClampZeroOne = true,
495
496 /* VK_EXT_depth_clip_enable */
497 .depthClipEnable = true,
498
499 /* VK_EXT_fragment_shader_interlock */
500 .fragmentShaderSampleInterlock = true,
501 .fragmentShaderPixelInterlock = true,
502 .fragmentShaderShadingRateInterlock = false,
503
504 /* VK_EXT_global_priority_query */
505 .globalPriorityQuery = true,
506
507 /* VK_EXT_graphics_pipeline_library */
508 .graphicsPipelineLibrary =
509 pdevice->vk.supported_extensions.EXT_graphics_pipeline_library,
510
511 /* VK_KHR_fragment_shading_rate */
512 .pipelineFragmentShadingRate = true,
513 .primitiveFragmentShadingRate =
514 pdevice->info.has_coarse_pixel_primitive_and_cb,
515 .attachmentFragmentShadingRate =
516 pdevice->info.has_coarse_pixel_primitive_and_cb,
517
518 /* VK_EXT_image_view_min_lod */
519 .minLod = true,
520
521 /* VK_EXT_index_type_uint8 */
522 .indexTypeUint8 = true,
523
524 /* VK_EXT_line_rasterization */
525 /* Rectangular lines must use the strict algorithm, which is not
526 * supported for wide lines prior to ICL. See rasterization_mode for
527 * details and how the HW states are programmed.
528 */
529 .rectangularLines = pdevice->info.ver >= 10,
530 .bresenhamLines = true,
531 /* Support for Smooth lines with MSAA was removed on gfx11. From the
532 * BSpec section "Multisample ModesState" table for "AA Line Support
533 * Requirements":
534 *
535 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
536 *
537 * Fortunately, this isn't a case most people care about.
538 */
539 .smoothLines = pdevice->info.ver < 10,
540 .stippledRectangularLines = false,
541 .stippledBresenhamLines = true,
542 .stippledSmoothLines = false,
543
544 /* VK_NV_mesh_shader */
545 .taskShaderNV = false,
546 .meshShaderNV = false,
547
548 /* VK_EXT_mesh_shader */
549 .taskShader = mesh_shader,
550 .meshShader = mesh_shader,
551 .multiviewMeshShader = false,
552 .primitiveFragmentShadingRateMeshShader = mesh_shader,
553 .meshShaderQueries = mesh_shader,
554
555 /* VK_EXT_mutable_descriptor_type */
556 .mutableDescriptorType = true,
557
558 /* VK_KHR_performance_query */
559 .performanceCounterQueryPools = true,
560 /* HW only supports a single configuration at a time. */
561 .performanceCounterMultipleQueryPools = false,
562
563 /* VK_KHR_pipeline_executable_properties */
564 .pipelineExecutableInfo = true,
565
566 /* VK_EXT_primitives_generated_query */
567 .primitivesGeneratedQuery = true,
568 .primitivesGeneratedQueryWithRasterizerDiscard = false,
569 .primitivesGeneratedQueryWithNonZeroStreams = false,
570
571 /* VK_EXT_pipeline_library_group_handles */
572 .pipelineLibraryGroupHandles = true,
573
574 /* VK_EXT_provoking_vertex */
575 .provokingVertexLast = true,
576 .transformFeedbackPreservesProvokingVertex = true,
577
578 /* VK_KHR_ray_query */
579 .rayQuery = rt_enabled,
580
581 /* VK_KHR_ray_tracing_maintenance1 */
582 .rayTracingMaintenance1 = rt_enabled,
583 .rayTracingPipelineTraceRaysIndirect2 = rt_enabled,
584
585 /* VK_KHR_ray_tracing_pipeline */
586 .rayTracingPipeline = rt_enabled,
587 .rayTracingPipelineShaderGroupHandleCaptureReplay = false,
588 .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
589 .rayTracingPipelineTraceRaysIndirect = rt_enabled,
590 .rayTraversalPrimitiveCulling = rt_enabled,
591
592 /* VK_EXT_robustness2 */
593 .robustBufferAccess2 = true,
594 .robustImageAccess2 = true,
595 .nullDescriptor = true,
596
597 /* VK_EXT_shader_replicated_composites */
598 .shaderReplicatedComposites = true,
599
600 /* VK_EXT_shader_atomic_float */
601 .shaderBufferFloat32Atomics = true,
602 .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc,
603 .shaderBufferFloat64Atomics =
604 pdevice->info.has_64bit_float && pdevice->info.has_lsc,
605 .shaderBufferFloat64AtomicAdd = pdevice->info.ver >= 20,
606 .shaderSharedFloat32Atomics = true,
607 .shaderSharedFloat32AtomicAdd = false,
608 .shaderSharedFloat64Atomics = false,
609 .shaderSharedFloat64AtomicAdd = false,
610 .shaderImageFloat32Atomics = true,
611 .shaderImageFloat32AtomicAdd = pdevice->info.ver >= 20,
612 .sparseImageFloat32Atomics = false,
613 .sparseImageFloat32AtomicAdd = false,
614
615 /* VK_EXT_shader_atomic_float2 */
616 .shaderBufferFloat16Atomics = pdevice->info.has_lsc,
617 .shaderBufferFloat16AtomicAdd = false,
618 .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc,
619 .shaderBufferFloat32AtomicMinMax = true,
620 .shaderBufferFloat64AtomicMinMax =
621 pdevice->info.has_64bit_float && pdevice->info.has_lsc &&
622 pdevice->info.ver < 20,
623 .shaderSharedFloat16Atomics = pdevice->info.has_lsc,
624 .shaderSharedFloat16AtomicAdd = false,
625 .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc,
626 .shaderSharedFloat32AtomicMinMax = true,
627 .shaderSharedFloat64AtomicMinMax = false,
628 .shaderImageFloat32AtomicMinMax = false,
629 .sparseImageFloat32AtomicMinMax = false,
630
631 /* VK_KHR_shader_clock */
632 .shaderSubgroupClock = true,
633 .shaderDeviceClock = false,
634
635 /* VK_INTEL_shader_integer_functions2 */
636 .shaderIntegerFunctions2 = true,
637
638 /* VK_EXT_shader_module_identifier */
639 .shaderModuleIdentifier = true,
640
641 /* VK_KHR_shader_subgroup_uniform_control_flow */
642 .shaderSubgroupUniformControlFlow = true,
643
644 /* VK_EXT_texel_buffer_alignment */
645 .texelBufferAlignment = true,
646
647 /* VK_EXT_transform_feedback */
648 .transformFeedback = true,
649 .geometryStreams = true,
650
651 /* VK_KHR_vertex_attribute_divisor */
652 .vertexAttributeInstanceRateDivisor = true,
653 .vertexAttributeInstanceRateZeroDivisor = true,
654
655 /* VK_KHR_workgroup_memory_explicit_layout */
656 .workgroupMemoryExplicitLayout = true,
657 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
658 .workgroupMemoryExplicitLayout8BitAccess = true,
659 .workgroupMemoryExplicitLayout16BitAccess = true,
660
661 /* VK_EXT_ycbcr_image_arrays */
662 .ycbcrImageArrays = true,
663
664 /* VK_EXT_extended_dynamic_state */
665 .extendedDynamicState = true,
666
667 /* VK_EXT_extended_dynamic_state2 */
668 .extendedDynamicState2 = true,
669 .extendedDynamicState2LogicOp = true,
670 .extendedDynamicState2PatchControlPoints = true,
671
672 /* VK_EXT_extended_dynamic_state3 */
673 .extendedDynamicState3PolygonMode = true,
674 .extendedDynamicState3TessellationDomainOrigin = true,
675 .extendedDynamicState3RasterizationStream = true,
676 .extendedDynamicState3LineStippleEnable = true,
677 .extendedDynamicState3LineRasterizationMode = true,
678 .extendedDynamicState3LogicOpEnable = true,
679 .extendedDynamicState3AlphaToOneEnable = true,
680 .extendedDynamicState3DepthClipEnable = true,
681 .extendedDynamicState3DepthClampEnable = true,
682 .extendedDynamicState3DepthClipNegativeOneToOne = true,
683 .extendedDynamicState3ProvokingVertexMode = true,
684 .extendedDynamicState3ColorBlendEnable = true,
685 .extendedDynamicState3ColorWriteMask = true,
686 .extendedDynamicState3ColorBlendEquation = true,
687 .extendedDynamicState3SampleLocationsEnable = true,
688 .extendedDynamicState3SampleMask = true,
689 .extendedDynamicState3ConservativeRasterizationMode = true,
690 .extendedDynamicState3AlphaToCoverageEnable = true,
691 .extendedDynamicState3RasterizationSamples = true,
692
693 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
694 .extendedDynamicState3ViewportWScalingEnable = false,
695 .extendedDynamicState3ViewportSwizzle = false,
696 .extendedDynamicState3ShadingRateImageEnable = false,
697 .extendedDynamicState3CoverageToColorEnable = false,
698 .extendedDynamicState3CoverageToColorLocation = false,
699 .extendedDynamicState3CoverageModulationMode = false,
700 .extendedDynamicState3CoverageModulationTableEnable = false,
701 .extendedDynamicState3CoverageModulationTable = false,
702 .extendedDynamicState3CoverageReductionMode = false,
703 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
704 .extendedDynamicState3ColorBlendAdvanced = false,
705
706 /* VK_EXT_multi_draw */
707 .multiDraw = true,
708
709 /* VK_EXT_non_seamless_cube_map */
710 .nonSeamlessCubeMap = true,
711
712 /* VK_EXT_primitive_topology_list_restart */
713 .primitiveTopologyListRestart = true,
714 .primitiveTopologyPatchListRestart = true,
715
716 /* VK_EXT_depth_clip_control */
717 .depthClipControl = true,
718
719 /* VK_KHR_present_id */
720 .presentId = pdevice->vk.supported_extensions.KHR_present_id,
721
722 /* VK_KHR_present_wait */
723 .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
724
725 /* VK_EXT_vertex_input_dynamic_state */
726 .vertexInputDynamicState = true,
727
728 /* VK_KHR_ray_tracing_position_fetch */
729 .rayTracingPositionFetch = rt_enabled,
730
731 /* VK_EXT_dynamic_rendering_unused_attachments */
732 .dynamicRenderingUnusedAttachments = true,
733
734 /* VK_EXT_depth_bias_control */
735 .depthBiasControl = true,
736 .floatRepresentation = true,
737 .leastRepresentableValueForceUnormRepresentation = false,
738 .depthBiasExact = true,
739
740 /* VK_EXT_pipeline_robustness */
741 .pipelineRobustness = true,
742
743 /* VK_KHR_maintenance5 */
744 .maintenance5 = true,
745
746 /* VK_KHR_maintenance6 */
747 .maintenance6 = true,
748
749 /* VK_EXT_nested_command_buffer */
750 .nestedCommandBuffer = true,
751 .nestedCommandBufferRendering = true,
752 .nestedCommandBufferSimultaneousUse = false,
753
754 /* VK_KHR_cooperative_matrix */
755 .cooperativeMatrix = anv_has_cooperative_matrix(pdevice),
756
757 /* VK_KHR_shader_maximal_reconvergence */
758 .shaderMaximalReconvergence = true,
759
760 /* VK_KHR_shader_subgroup_rotate */
761 .shaderSubgroupRotate = true,
762 .shaderSubgroupRotateClustered = true,
763
764 /* VK_EXT_attachment_feedback_loop_layout */
765 .attachmentFeedbackLoopLayout = true,
766
767 /* VK_EXT_attachment_feedback_loop_dynamic_state */
768 .attachmentFeedbackLoopDynamicState = true,
769
770 /* VK_KHR_shader_expect_assume */
771 .shaderExpectAssume = true,
772
773 /* VK_EXT_descriptor_buffer */
774 .descriptorBuffer = true,
775 .descriptorBufferCaptureReplay = true,
776 .descriptorBufferImageLayoutIgnored = false,
777 .descriptorBufferPushDescriptors = true,
778
779 /* VK_EXT_map_memory_placed */
780 .memoryMapPlaced = true,
781 .memoryMapRangePlaced = false,
782 .memoryUnmapReserve = true,
783
784 /* VK_KHR_shader_quad_control */
785 .shaderQuadControl = true,
786
787 #ifdef ANV_USE_WSI_PLATFORM
788 /* VK_EXT_swapchain_maintenance1 */
789 .swapchainMaintenance1 = true,
790 #endif
791
792 /* VK_EXT_image_compression_control */
793 .imageCompressionControl = true,
794
795 /* VK_KHR_shader_float_controls2 */
796 .shaderFloatControls2 = true,
797
798 /* VK_EXT_legacy_vertex_attributes */
799 .legacyVertexAttributes = true,
800
801 /* VK_EXT_legacy_dithering */
802 .legacyDithering = true,
803
804 /* VK_MESA_image_alignment_control */
805 .imageAlignmentControl = true,
806
807 /* VK_KHR_maintenance7 */
808 .maintenance7 = true,
809
810 /* VK_KHR_shader_relaxed_extended_instruction */
811 .shaderRelaxedExtendedInstruction = true,
812 };
813
814 /* The new DOOM and Wolfenstein games require depthBounds without
815 * checking for it. They seem to run fine without it so just claim it's
816 * there and accept the consequences.
817 */
818 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
819 features->depthBounds = true;
820 }
821
822 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
823
824 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
825 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
826
827 static VkDeviceSize
anx_get_physical_device_max_heap_size(const struct anv_physical_device * pdevice)828 anx_get_physical_device_max_heap_size(const struct anv_physical_device *pdevice)
829 {
830 VkDeviceSize ret = 0;
831
832 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
833 if (pdevice->memory.heaps[i].size > ret)
834 ret = pdevice->memory.heaps[i].size;
835 }
836
837 return ret;
838 }
839
840 static void
get_properties_1_1(const struct anv_physical_device * pdevice,struct vk_properties * p)841 get_properties_1_1(const struct anv_physical_device *pdevice,
842 struct vk_properties *p)
843 {
844 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
845 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
846 memset(p->deviceLUID, 0, VK_LUID_SIZE);
847 p->deviceNodeMask = 0;
848 p->deviceLUIDValid = false;
849
850 p->subgroupSize = BRW_SUBGROUP_SIZE;
851 VkShaderStageFlags scalar_stages = 0;
852 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
853 scalar_stages |= mesa_to_vk_shader_stage(stage);
854 }
855 if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
856 scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
857 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
858 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
859 VK_SHADER_STAGE_MISS_BIT_KHR |
860 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
861 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
862 }
863 if (pdevice->vk.supported_extensions.EXT_mesh_shader) {
864 scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
865 VK_SHADER_STAGE_MESH_BIT_EXT;
866 }
867 p->subgroupSupportedStages = scalar_stages;
868 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
869 VK_SUBGROUP_FEATURE_VOTE_BIT |
870 VK_SUBGROUP_FEATURE_BALLOT_BIT |
871 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
872 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
873 VK_SUBGROUP_FEATURE_QUAD_BIT |
874 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
875 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
876 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
877 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
878 p->subgroupQuadOperationsInAllStages = true;
879
880 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
881 p->maxMultiviewViewCount = 16;
882 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
883 /* Our protected implementation is a memory encryption mechanism, it
884 * shouldn't page fault, but it hangs the HW so in terms of user visibility
885 * it's similar to a fault.
886 */
887 p->protectedNoFault = false;
888 /* This value doesn't matter for us today as our per-stage descriptors are
889 * the real limit.
890 */
891 p->maxPerSetDescriptors = 1024;
892
893 for (uint32_t i = 0; i < pdevice->memory.heap_count; i++) {
894 p->maxMemoryAllocationSize = MAX2(p->maxMemoryAllocationSize,
895 pdevice->memory.heaps[i].size);
896 }
897 }
898
899 static void
get_properties_1_2(const struct anv_physical_device * pdevice,struct vk_properties * p)900 get_properties_1_2(const struct anv_physical_device *pdevice,
901 struct vk_properties *p)
902 {
903 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
904 memset(p->driverName, 0, sizeof(p->driverName));
905 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
906 "Intel open-source Mesa driver");
907 memset(p->driverInfo, 0, sizeof(p->driverInfo));
908 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
909 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
910
911 p->conformanceVersion = (VkConformanceVersion) {
912 .major = 1,
913 .minor = 3,
914 .subminor = 6,
915 .patch = 0,
916 };
917
918 p->denormBehaviorIndependence =
919 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
920 p->roundingModeIndependence =
921 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
922
923 /* Broadwell does not support HF denorms and there are restrictions
924 * other gens. According to Kabylake's PRM:
925 *
926 * "math - Extended Math Function
927 * [...]
928 * Restriction : Half-float denorms are always retained."
929 */
930 p->shaderDenormFlushToZeroFloat16 = false;
931 p->shaderDenormPreserveFloat16 = true;
932 p->shaderRoundingModeRTEFloat16 = true;
933 p->shaderRoundingModeRTZFloat16 = true;
934 p->shaderSignedZeroInfNanPreserveFloat16 = true;
935
936 p->shaderDenormFlushToZeroFloat32 = true;
937 p->shaderDenormPreserveFloat32 = true;
938 p->shaderRoundingModeRTEFloat32 = true;
939 p->shaderRoundingModeRTZFloat32 = true;
940 p->shaderSignedZeroInfNanPreserveFloat32 = true;
941
942 p->shaderDenormFlushToZeroFloat64 = true;
943 p->shaderDenormPreserveFloat64 = true;
944 p->shaderRoundingModeRTEFloat64 = true;
945 p->shaderRoundingModeRTZFloat64 = true;
946 p->shaderSignedZeroInfNanPreserveFloat64 = true;
947
948 /* It's a bit hard to exactly map our implementation to the limits
949 * described by Vulkan. The bindless surface handle in the extended
950 * message descriptors is 20 bits and it's an index into the table of
951 * RENDER_SURFACE_STATE structs that starts at bindless surface base
952 * address. This means that we can have at must 1M surface states
953 * allocated at any given time. Since most image views take two
954 * descriptors, this means we have a limit of about 500K image views.
955 *
956 * However, since we allocate surface states at vkCreateImageView time,
957 * this means our limit is actually something on the order of 500K image
958 * views allocated at any time. The actual limit describe by Vulkan, on
959 * the other hand, is a limit of how many you can have in a descriptor set.
960 * Assuming anyone using 1M descriptors will be using the same image view
961 * twice a bunch of times (or a bunch of null descriptors), we can safely
962 * advertise a larger limit here.
963 */
964 const unsigned max_bindless_views =
965 anv_physical_device_bindless_heap_size(pdevice, false) / ANV_SURFACE_STATE_SIZE;
966 p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
967 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
968 p->shaderSampledImageArrayNonUniformIndexingNative = false;
969 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
970 p->shaderStorageImageArrayNonUniformIndexingNative = false;
971 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
972 p->robustBufferAccessUpdateAfterBind = true;
973 p->quadDivergentImplicitLod = false;
974 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
975 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
976 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
977 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
978 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
979 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
980 p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
981 p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
982 p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
983 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
984 p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
985 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
986 p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
987 p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
988 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
989
990 /* We support all of the depth resolve modes */
991 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
992 VK_RESOLVE_MODE_AVERAGE_BIT |
993 VK_RESOLVE_MODE_MIN_BIT |
994 VK_RESOLVE_MODE_MAX_BIT;
995 /* Average doesn't make sense for stencil so we don't support that */
996 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
997 VK_RESOLVE_MODE_MIN_BIT |
998 VK_RESOLVE_MODE_MAX_BIT;
999 p->independentResolveNone = true;
1000 p->independentResolve = true;
1001
1002 p->filterMinmaxSingleComponentFormats = true;
1003 p->filterMinmaxImageComponentMapping = true;
1004
1005 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1006
1007 p->framebufferIntegerColorSampleCounts =
1008 isl_device_get_sample_counts(&pdevice->isl_dev);
1009 }
1010
1011 static void
get_properties_1_3(const struct anv_physical_device * pdevice,struct vk_properties * p)1012 get_properties_1_3(const struct anv_physical_device *pdevice,
1013 struct vk_properties *p)
1014 {
1015 if (pdevice->info.ver >= 20)
1016 p->minSubgroupSize = 16;
1017 else
1018 p->minSubgroupSize = 8;
1019 p->maxSubgroupSize = 32;
1020 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
1021 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
1022 VK_SHADER_STAGE_TASK_BIT_EXT |
1023 VK_SHADER_STAGE_MESH_BIT_EXT;
1024
1025 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1026 p->maxPerStageDescriptorInlineUniformBlocks =
1027 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1028 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1029 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1030 p->maxDescriptorSetInlineUniformBlocks =
1031 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1032 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1033 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
1034 p->maxInlineUniformTotalSize = UINT16_MAX;
1035
1036 p->integerDotProduct8BitUnsignedAccelerated = false;
1037 p->integerDotProduct8BitSignedAccelerated = false;
1038 p->integerDotProduct8BitMixedSignednessAccelerated = false;
1039 p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1040 p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1041 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1042 p->integerDotProduct16BitUnsignedAccelerated = false;
1043 p->integerDotProduct16BitSignedAccelerated = false;
1044 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1045 p->integerDotProduct32BitUnsignedAccelerated = false;
1046 p->integerDotProduct32BitSignedAccelerated = false;
1047 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1048 p->integerDotProduct64BitUnsignedAccelerated = false;
1049 p->integerDotProduct64BitSignedAccelerated = false;
1050 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1051 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1052 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1053 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1054 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
1055 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
1056 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
1057 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1058 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1059 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1060 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1061 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1062 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1063 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1064 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1065 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1066
1067 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
1068 * Base Address:
1069 *
1070 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
1071 * specifies the base address of the first element of the surface,
1072 * computed in software by adding the surface base address to the
1073 * byte offset of the element in the buffer. The base address must
1074 * be aligned to element size."
1075 *
1076 * The typed dataport messages require that things be texel aligned.
1077 * Otherwise, we may just load/store the wrong data or, in the worst
1078 * case, there may be hangs.
1079 */
1080 p->storageTexelBufferOffsetAlignmentBytes = 16;
1081 p->storageTexelBufferOffsetSingleTexelAlignment = true;
1082
1083 /* The sampler, however, is much more forgiving and it can handle
1084 * arbitrary byte alignment for linear and buffer surfaces. It's
1085 * hard to find a good PRM citation for this but years of empirical
1086 * experience demonstrate that this is true.
1087 */
1088 p->uniformTexelBufferOffsetAlignmentBytes = 1;
1089 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1090
1091 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
1092 }
1093
1094 static void
get_properties(const struct anv_physical_device * pdevice,struct vk_properties * props)1095 get_properties(const struct anv_physical_device *pdevice,
1096 struct vk_properties *props)
1097 {
1098
1099 const struct intel_device_info *devinfo = &pdevice->info;
1100
1101 const uint32_t max_ssbos = UINT16_MAX;
1102 const uint32_t max_textures = UINT16_MAX;
1103 const uint32_t max_samplers = UINT16_MAX;
1104 const uint32_t max_images = UINT16_MAX;
1105 const VkDeviceSize max_heap_size = anx_get_physical_device_max_heap_size(pdevice);
1106
1107 /* Claim a high per-stage limit since we have bindless. */
1108 const uint32_t max_per_stage = UINT32_MAX;
1109
1110 const uint32_t max_workgroup_size =
1111 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1112
1113 const bool has_sparse_or_fake = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED;
1114 const bool sparse_uses_trtt = pdevice->sparse_type == ANV_SPARSE_TYPE_TRTT;
1115
1116 uint64_t sparse_addr_space_size =
1117 !has_sparse_or_fake ? 0 :
1118 sparse_uses_trtt ? pdevice->va.trtt.size :
1119 pdevice->va.high_heap.size;
1120
1121 VkSampleCountFlags sample_counts =
1122 isl_device_get_sample_counts(&pdevice->isl_dev);
1123
1124 #if DETECT_OS_ANDROID
1125 /* Used to fill struct VkPhysicalDevicePresentationPropertiesANDROID */
1126 uint64_t front_rendering_usage = 0;
1127 struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
1128 if (gralloc != NULL) {
1129 u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
1130 u_gralloc_destroy(&gralloc);
1131 }
1132 #endif /* DETECT_OS_ANDROID */
1133
1134 *props = (struct vk_properties) {
1135 .apiVersion = ANV_API_VERSION,
1136 .driverVersion = vk_get_driver_version(),
1137 .vendorID = pdevice->instance->force_vk_vendor != 0 ?
1138 pdevice->instance->force_vk_vendor : 0x8086,
1139 .deviceID = pdevice->info.pci_device_id,
1140 .deviceType = pdevice->info.has_local_mem ?
1141 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1142 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1143
1144 /* Limits: */
1145 .maxImageDimension1D = (1 << 14),
1146 .maxImageDimension2D = (1 << 14),
1147 .maxImageDimension3D = (1 << 11),
1148 .maxImageDimensionCube = (1 << 14),
1149 .maxImageArrayLayers = (1 << 11),
1150 .maxTexelBufferElements = 128 * 1024 * 1024,
1151 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1152 .maxStorageBufferRange = MIN3(pdevice->isl_dev.max_buffer_size, max_heap_size, UINT32_MAX),
1153 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1154 .maxMemoryAllocationCount = UINT32_MAX,
1155 .maxSamplerAllocationCount = 64 * 1024,
1156 .bufferImageGranularity = 1,
1157 .sparseAddressSpaceSize = sparse_addr_space_size,
1158 .maxBoundDescriptorSets = MAX_SETS,
1159 .maxPerStageDescriptorSamplers = max_samplers,
1160 .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1161 .maxPerStageDescriptorStorageBuffers = max_ssbos,
1162 .maxPerStageDescriptorSampledImages = max_textures,
1163 .maxPerStageDescriptorStorageImages = max_images,
1164 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1165 .maxPerStageResources = max_per_stage,
1166 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1167 .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1168 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1169 .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1170 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1171 .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1172 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1173 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1174 .maxVertexInputAttributes = MAX_VES,
1175 .maxVertexInputBindings = MAX_VBS,
1176 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1177 *
1178 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1179 */
1180 .maxVertexInputAttributeOffset = 2047,
1181 /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1182 *
1183 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1184 */
1185 .maxVertexInputBindingStride = 4095,
1186 .maxVertexOutputComponents = 128,
1187 .maxTessellationGenerationLevel = 64,
1188 .maxTessellationPatchSize = 32,
1189 .maxTessellationControlPerVertexInputComponents = 128,
1190 .maxTessellationControlPerVertexOutputComponents = 128,
1191 .maxTessellationControlPerPatchOutputComponents = 128,
1192 .maxTessellationControlTotalOutputComponents = 2048,
1193 .maxTessellationEvaluationInputComponents = 128,
1194 .maxTessellationEvaluationOutputComponents = 128,
1195 .maxGeometryShaderInvocations = 32,
1196 .maxGeometryInputComponents = 128,
1197 .maxGeometryOutputComponents = 128,
1198 .maxGeometryOutputVertices = 256,
1199 .maxGeometryTotalOutputComponents = 1024,
1200 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1201 .maxFragmentOutputAttachments = 8,
1202 .maxFragmentDualSrcAttachments = 1,
1203 .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1204 .maxComputeSharedMemorySize = intel_device_info_get_max_slm_size(&pdevice->info),
1205 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1206 .maxComputeWorkGroupInvocations = max_workgroup_size,
1207 .maxComputeWorkGroupSize = {
1208 max_workgroup_size,
1209 max_workgroup_size,
1210 max_workgroup_size,
1211 },
1212 .subPixelPrecisionBits = 8,
1213 .subTexelPrecisionBits = 8,
1214 .mipmapPrecisionBits = 8,
1215 .maxDrawIndexedIndexValue = UINT32_MAX,
1216 .maxDrawIndirectCount = UINT32_MAX,
1217 .maxSamplerLodBias = 16,
1218 .maxSamplerAnisotropy = 16,
1219 .maxViewports = MAX_VIEWPORTS,
1220 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1221 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1222 .viewportSubPixelBits = 13, /* We take a float? */
1223 .minMemoryMapAlignment = 4096, /* A page */
1224 /* The dataport requires texel alignment so we need to assume a worst
1225 * case of R32G32B32A32 which is 16 bytes.
1226 */
1227 .minTexelBufferOffsetAlignment = 16,
1228 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1229 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1230 .minTexelOffset = -8,
1231 .maxTexelOffset = 7,
1232 .minTexelGatherOffset = -32,
1233 .maxTexelGatherOffset = 31,
1234 .minInterpolationOffset = -0.5,
1235 .maxInterpolationOffset = 0.4375,
1236 .subPixelInterpolationOffsetBits = 4,
1237 .maxFramebufferWidth = (1 << 14),
1238 .maxFramebufferHeight = (1 << 14),
1239 .maxFramebufferLayers = (1 << 11),
1240 .framebufferColorSampleCounts = sample_counts,
1241 .framebufferDepthSampleCounts = sample_counts,
1242 .framebufferStencilSampleCounts = sample_counts,
1243 .framebufferNoAttachmentsSampleCounts = sample_counts,
1244 .maxColorAttachments = MAX_RTS,
1245 .sampledImageColorSampleCounts = sample_counts,
1246 .sampledImageIntegerSampleCounts = sample_counts,
1247 .sampledImageDepthSampleCounts = sample_counts,
1248 .sampledImageStencilSampleCounts = sample_counts,
1249 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1250 .maxSampleMaskWords = 1,
1251 .timestampComputeAndGraphics = true,
1252 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1253 .maxClipDistances = 8,
1254 .maxCullDistances = 8,
1255 .maxCombinedClipAndCullDistances = 8,
1256 .discreteQueuePriorities = 2,
1257 .pointSizeRange = { 0.125, 255.875 },
1258 /* While SKL and up support much wider lines than we are setting here,
1259 * in practice we run into conformance issues if we go past this limit.
1260 * Since the Windows driver does the same, it's probably fair to assume
1261 * that no one needs more than this.
1262 */
1263 .lineWidthRange = { 0.0, 8.0 },
1264 .pointSizeGranularity = (1.0 / 8.0),
1265 .lineWidthGranularity = (1.0 / 128.0),
1266 .strictLines = false,
1267 .standardSampleLocations = true,
1268 .optimalBufferCopyOffsetAlignment = 128,
1269 .optimalBufferCopyRowPitchAlignment = 128,
1270 .nonCoherentAtomSize = 64,
1271
1272 /* Sparse: */
1273 .sparseResidencyStandard2DBlockShape = has_sparse_or_fake,
1274 .sparseResidencyStandard2DMultisampleBlockShape = false,
1275 .sparseResidencyStandard3DBlockShape = has_sparse_or_fake,
1276 .sparseResidencyAlignedMipSize = false,
1277 .sparseResidencyNonResidentStrict = has_sparse_or_fake,
1278
1279 /* VK_KHR_cooperative_matrix */
1280 .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1281 };
1282
1283 snprintf(props->deviceName, sizeof(props->deviceName),
1284 "%s", pdevice->info.name);
1285 memcpy(props->pipelineCacheUUID,
1286 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
1287
1288 get_properties_1_1(pdevice, props);
1289 get_properties_1_2(pdevice, props);
1290 get_properties_1_3(pdevice, props);
1291
1292 /* VK_KHR_acceleration_structure */
1293 {
1294 props->maxGeometryCount = (1u << 24) - 1;
1295 props->maxInstanceCount = (1u << 24) - 1;
1296 props->maxPrimitiveCount = (1u << 29) - 1;
1297 props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
1298 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
1299 props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
1300 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
1301 props->minAccelerationStructureScratchOffsetAlignment = 64;
1302 }
1303
1304 /* VK_KHR_compute_shader_derivatives */
1305 {
1306 props->meshAndTaskShaderDerivatives = pdevice->info.has_mesh_shading;
1307 }
1308
1309 /* VK_KHR_fragment_shading_rate */
1310 {
1311 props->primitiveFragmentShadingRateWithMultipleViewports =
1312 pdevice->info.has_coarse_pixel_primitive_and_cb;
1313 props->layeredShadingRateAttachments =
1314 pdevice->info.has_coarse_pixel_primitive_and_cb;
1315 props->fragmentShadingRateNonTrivialCombinerOps =
1316 pdevice->info.has_coarse_pixel_primitive_and_cb;
1317 props->maxFragmentSize = (VkExtent2D) { 4, 4 };
1318 props->maxFragmentSizeAspectRatio =
1319 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1320 2 : 4;
1321 props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
1322 (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
1323 props->maxFragmentShadingRateRasterizationSamples =
1324 pdevice->info.has_coarse_pixel_primitive_and_cb ?
1325 VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
1326 props->fragmentShadingRateWithShaderDepthStencilWrites = false;
1327 props->fragmentShadingRateWithSampleMask = true;
1328 props->fragmentShadingRateWithShaderSampleMask = false;
1329 props->fragmentShadingRateWithConservativeRasterization = true;
1330 props->fragmentShadingRateWithFragmentShaderInterlock = true;
1331 props->fragmentShadingRateWithCustomSampleLocations = true;
1332 props->fragmentShadingRateStrictMultiplyCombiner = true;
1333
1334 if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
1335 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1336 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
1337 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1338 } else {
1339 /* Those must be 0 if attachmentFragmentShadingRate is not supported. */
1340 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1341 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
1342 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
1343 }
1344 }
1345
1346 /* VK_KHR_maintenance5 */
1347 {
1348 props->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1349 props->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1350 props->depthStencilSwizzleOneSupport = true;
1351 props->polygonModePointSize = true;
1352 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1353 props->nonStrictWideLinesUseParallelogram = false;
1354 }
1355
1356 /* VK_KHR_maintenance6 */
1357 {
1358 props->blockTexelViewCompatibleMultipleLayers = true;
1359 props->maxCombinedImageSamplerDescriptorCount = 3;
1360 props->fragmentShadingRateClampCombinerInputs = true;
1361 }
1362
1363 /* VK_KHR_maintenance7 */
1364 {
1365 props->robustFragmentShadingRateAttachmentAccess = true;
1366 props->separateDepthStencilAttachmentAccess = true;
1367 props->maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1368 props->maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1369 props->maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1370 props->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1371 props->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1372 props->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS;
1373 }
1374
1375 /* VK_KHR_performance_query */
1376 {
1377 props->allowCommandBufferQueryCopies = false;
1378 }
1379
1380 /* VK_KHR_push_descriptor */
1381 {
1382 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1383 }
1384
1385 /* VK_KHR_ray_tracing_pipeline */
1386 {
1387 /* TODO */
1388 props->shaderGroupHandleSize = 32;
1389 props->maxRayRecursionDepth = 31;
1390 /* MemRay::hitGroupSRStride is 16 bits */
1391 props->maxShaderGroupStride = UINT16_MAX;
1392 /* MemRay::hitGroupSRBasePtr requires 16B alignment */
1393 props->shaderGroupBaseAlignment = 16;
1394 props->shaderGroupHandleAlignment = 16;
1395 props->shaderGroupHandleCaptureReplaySize = 32;
1396 props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
1397 props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
1398 }
1399
1400 /* VK_KHR_vertex_attribute_divisor */
1401 {
1402 props->maxVertexAttribDivisor = UINT32_MAX / 16;
1403 props->supportsNonZeroFirstInstance = true;
1404 }
1405
1406 /* VK_EXT_conservative_rasterization */
1407 {
1408 /* There's nothing in the public docs about this value as far as I can
1409 * tell. However, this is the value the Windows driver reports and
1410 * there's a comment on a rejected HW feature in the internal docs that
1411 * says:
1412 *
1413 * "This is similar to conservative rasterization, except the
1414 * primitive area is not extended by 1/512 and..."
1415 *
1416 * That's a bit of an obtuse reference but it's the best we've got for
1417 * now.
1418 */
1419 props->primitiveOverestimationSize = 1.0f / 512.0f;
1420 props->maxExtraPrimitiveOverestimationSize = 0.0f;
1421 props->extraPrimitiveOverestimationSizeGranularity = 0.0f;
1422 props->primitiveUnderestimation = false;
1423 props->conservativePointAndLineRasterization = false;
1424 props->degenerateTrianglesRasterized = true;
1425 props->degenerateLinesRasterized = false;
1426 props->fullyCoveredFragmentShaderInputVariable = false;
1427 props->conservativeRasterizationPostDepthCoverage = true;
1428 }
1429
1430 /* VK_EXT_custom_border_color */
1431 {
1432 props->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
1433 }
1434
1435 /* VK_EXT_descriptor_buffer */
1436 {
1437 props->combinedImageSamplerDescriptorSingleArray = true;
1438 props->bufferlessPushDescriptors = true;
1439 /* Written to the buffer before a timeline semaphore is signaled, but
1440 * after vkQueueSubmit().
1441 */
1442 props->allowSamplerImageViewPostSubmitCreation = true;
1443 props->descriptorBufferOffsetAlignment = ANV_SURFACE_STATE_SIZE;
1444
1445 if (pdevice->uses_ex_bso) {
1446 props->maxDescriptorBufferBindings = MAX_SETS;
1447 props->maxResourceDescriptorBufferBindings = MAX_SETS;
1448 props->maxSamplerDescriptorBufferBindings = MAX_SETS;
1449 props->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1450 } else {
1451 props->maxDescriptorBufferBindings = 3; /* resources, samplers, push (we don't care about push) */
1452 props->maxResourceDescriptorBufferBindings = 1;
1453 props->maxSamplerDescriptorBufferBindings = 1;
1454 props->maxEmbeddedImmutableSamplerBindings = 1;
1455 }
1456 props->maxEmbeddedImmutableSamplers = MAX_EMBEDDED_SAMPLERS;
1457
1458 /* Storing a 64bit address */
1459 props->bufferCaptureReplayDescriptorDataSize = 8;
1460 props->imageCaptureReplayDescriptorDataSize = 8;
1461 /* Offset inside the reserved border color pool */
1462 props->samplerCaptureReplayDescriptorDataSize = 4;
1463
1464 /* Not affected by replay */
1465 props->imageViewCaptureReplayDescriptorDataSize = 0;
1466 /* The acceleration structure virtual address backing is coming from a
1467 * buffer, so as long as that buffer is captured/replayed correctly we
1468 * should always get the same address.
1469 */
1470 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1471
1472 props->samplerDescriptorSize = ANV_SAMPLER_STATE_SIZE;
1473 props->combinedImageSamplerDescriptorSize = align(ANV_SURFACE_STATE_SIZE + ANV_SAMPLER_STATE_SIZE,
1474 ANV_SURFACE_STATE_SIZE);
1475 props->sampledImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1476 props->storageImageDescriptorSize = ANV_SURFACE_STATE_SIZE;
1477 props->uniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1478 props->robustUniformTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1479 props->storageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1480 props->robustStorageTexelBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1481 props->uniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1482 props->robustUniformBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1483 props->storageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1484 props->robustStorageBufferDescriptorSize = ANV_SURFACE_STATE_SIZE;
1485 props->inputAttachmentDescriptorSize = ANV_SURFACE_STATE_SIZE;
1486 props->accelerationStructureDescriptorSize = sizeof(struct anv_address_range_descriptor);
1487 props->maxSamplerDescriptorBufferRange = pdevice->va.dynamic_visible_pool.size;
1488 props->maxResourceDescriptorBufferRange = anv_physical_device_bindless_heap_size(pdevice,
1489 true);
1490 props->resourceDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1491 props->descriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1492 props->samplerDescriptorBufferAddressSpaceSize = pdevice->va.dynamic_visible_pool.size;
1493 }
1494
1495 /* VK_EXT_extended_dynamic_state3 */
1496 {
1497 props->dynamicPrimitiveTopologyUnrestricted = true;
1498 }
1499
1500 /* VK_EXT_external_memory_host */
1501 {
1502 props->minImportedHostPointerAlignment = 4096;
1503 }
1504
1505 /* VK_EXT_graphics_pipeline_library */
1506 {
1507 props->graphicsPipelineLibraryFastLinking = true;
1508 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1509 }
1510
1511 /* VK_EXT_legacy_vertex_attributes */
1512 {
1513 props->nativeUnalignedPerformance = true;
1514 }
1515
1516 /* VK_EXT_line_rasterization */
1517 {
1518 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond) Sampling
1519 * Rules - Legacy Mode", it says the following:
1520 *
1521 * "Note that the device divides a pixel into a 16x16 array of
1522 * subpixels, referenced by their upper left corners."
1523 *
1524 * This is the only known reference in the PRMs to the subpixel
1525 * precision of line rasterization and a "16x16 array of subpixels"
1526 * implies 4 subpixel precision bits. Empirical testing has shown that 4
1527 * subpixel precision bits applies to all line rasterization types.
1528 */
1529 props->lineSubPixelPrecisionBits = 4;
1530 }
1531
1532 /* VK_EXT_map_memory_placed */
1533 {
1534 props->minPlacedMemoryMapAlignment = 4096;
1535 }
1536
1537 /* VK_EXT_mesh_shader */
1538 {
1539 /* Bounded by the maximum representable size in
1540 * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
1541 */
1542 const uint32_t max_slm_size = 64 * 1024;
1543
1544 /* Bounded by the maximum representable size in
1545 * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
1546 */
1547 const uint32_t max_workgroup_size = 1 << 10;
1548
1549 /* 3DMESH_3D limitation. */
1550 const uint32_t max_threadgroup_count = 1 << 22;
1551
1552 /* 3DMESH_3D limitation. */
1553 const uint32_t max_threadgroup_xyz = 65535;
1554
1555 const uint32_t max_urb_size = 64 * 1024;
1556
1557 props->maxTaskWorkGroupTotalCount = max_threadgroup_count;
1558 props->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
1559 props->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
1560 props->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
1561
1562 props->maxTaskWorkGroupInvocations = max_workgroup_size;
1563 props->maxTaskWorkGroupSize[0] = max_workgroup_size;
1564 props->maxTaskWorkGroupSize[1] = max_workgroup_size;
1565 props->maxTaskWorkGroupSize[2] = max_workgroup_size;
1566
1567 /* TUE header with padding */
1568 const uint32_t task_payload_reserved = 32;
1569
1570 props->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
1571 props->maxTaskSharedMemorySize = max_slm_size;
1572 props->maxTaskPayloadAndSharedMemorySize =
1573 props->maxTaskPayloadSize +
1574 props->maxTaskSharedMemorySize;
1575
1576 props->maxMeshWorkGroupTotalCount = max_threadgroup_count;
1577 props->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
1578 props->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
1579 props->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
1580
1581 props->maxMeshWorkGroupInvocations = max_workgroup_size;
1582 props->maxMeshWorkGroupSize[0] = max_workgroup_size;
1583 props->maxMeshWorkGroupSize[1] = max_workgroup_size;
1584 props->maxMeshWorkGroupSize[2] = max_workgroup_size;
1585
1586 props->maxMeshSharedMemorySize = max_slm_size;
1587 props->maxMeshPayloadAndSharedMemorySize =
1588 props->maxTaskPayloadSize +
1589 props->maxMeshSharedMemorySize;
1590
1591 /* Unfortunately spec's formula for the max output size doesn't match our hardware
1592 * (because some per-primitive and per-vertex attributes have alignment restrictions),
1593 * so we have to advertise the minimum value mandated by the spec to not overflow it.
1594 */
1595 props->maxMeshOutputPrimitives = 256;
1596 props->maxMeshOutputVertices = 256;
1597
1598 /* NumPrim + Primitive Data List */
1599 const uint32_t max_indices_memory =
1600 ALIGN(sizeof(uint32_t) +
1601 sizeof(uint32_t) * props->maxMeshOutputVertices, 32);
1602
1603 props->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
1604
1605 props->maxMeshPayloadAndOutputMemorySize =
1606 props->maxTaskPayloadSize +
1607 props->maxMeshOutputMemorySize;
1608
1609 props->maxMeshOutputComponents = 128;
1610
1611 /* RTAIndex is 11-bits wide */
1612 props->maxMeshOutputLayers = 1 << 11;
1613
1614 props->maxMeshMultiviewViewCount = 1;
1615
1616 /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
1617 props->meshOutputPerVertexGranularity = 8;
1618 /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
1619 props->meshOutputPerPrimitiveGranularity = 8;
1620
1621 /* SIMD16 */
1622 props->maxPreferredTaskWorkGroupInvocations = 16;
1623 props->maxPreferredMeshWorkGroupInvocations = 16;
1624
1625 props->prefersLocalInvocationVertexOutput = false;
1626 props->prefersLocalInvocationPrimitiveOutput = false;
1627 props->prefersCompactVertexOutput = false;
1628 props->prefersCompactPrimitiveOutput = false;
1629
1630 /* Spec minimum values */
1631 assert(props->maxTaskWorkGroupTotalCount >= (1U << 22));
1632 assert(props->maxTaskWorkGroupCount[0] >= 65535);
1633 assert(props->maxTaskWorkGroupCount[1] >= 65535);
1634 assert(props->maxTaskWorkGroupCount[2] >= 65535);
1635
1636 assert(props->maxTaskWorkGroupInvocations >= 128);
1637 assert(props->maxTaskWorkGroupSize[0] >= 128);
1638 assert(props->maxTaskWorkGroupSize[1] >= 128);
1639 assert(props->maxTaskWorkGroupSize[2] >= 128);
1640
1641 assert(props->maxTaskPayloadSize >= 16384);
1642 assert(props->maxTaskSharedMemorySize >= 32768);
1643 assert(props->maxTaskPayloadAndSharedMemorySize >= 32768);
1644
1645
1646 assert(props->maxMeshWorkGroupTotalCount >= (1U << 22));
1647 assert(props->maxMeshWorkGroupCount[0] >= 65535);
1648 assert(props->maxMeshWorkGroupCount[1] >= 65535);
1649 assert(props->maxMeshWorkGroupCount[2] >= 65535);
1650
1651 assert(props->maxMeshWorkGroupInvocations >= 128);
1652 assert(props->maxMeshWorkGroupSize[0] >= 128);
1653 assert(props->maxMeshWorkGroupSize[1] >= 128);
1654 assert(props->maxMeshWorkGroupSize[2] >= 128);
1655
1656 assert(props->maxMeshSharedMemorySize >= 28672);
1657 assert(props->maxMeshPayloadAndSharedMemorySize >= 28672);
1658 assert(props->maxMeshOutputMemorySize >= 32768);
1659 assert(props->maxMeshPayloadAndOutputMemorySize >= 48128);
1660
1661 assert(props->maxMeshOutputComponents >= 128);
1662
1663 assert(props->maxMeshOutputVertices >= 256);
1664 assert(props->maxMeshOutputPrimitives >= 256);
1665 assert(props->maxMeshOutputLayers >= 8);
1666 assert(props->maxMeshMultiviewViewCount >= 1);
1667 }
1668
1669 /* VK_EXT_multi_draw */
1670 {
1671 props->maxMultiDrawCount = 2048;
1672 }
1673
1674 /* VK_EXT_nested_command_buffer */
1675 {
1676 props->maxCommandBufferNestingLevel = UINT32_MAX;
1677 }
1678
1679 /* VK_EXT_pci_bus_info */
1680 {
1681 props->pciDomain = pdevice->info.pci_domain;
1682 props->pciBus = pdevice->info.pci_bus;
1683 props->pciDevice = pdevice->info.pci_dev;
1684 props->pciFunction = pdevice->info.pci_func;
1685 }
1686
1687 /* VK_EXT_physical_device_drm */
1688 {
1689 props->drmHasPrimary = pdevice->has_master;
1690 props->drmPrimaryMajor = pdevice->master_major;
1691 props->drmPrimaryMinor = pdevice->master_minor;
1692 props->drmHasRender = pdevice->has_local;
1693 props->drmRenderMajor = pdevice->local_major;
1694 props->drmRenderMinor = pdevice->local_minor;
1695 }
1696
1697 /* VK_EXT_pipeline_robustness */
1698 {
1699 props->defaultRobustnessStorageBuffers =
1700 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1701 props->defaultRobustnessUniformBuffers =
1702 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1703 props->defaultRobustnessVertexInputs =
1704 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1705 props->defaultRobustnessImages =
1706 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1707 }
1708
1709 /* VK_EXT_provoking_vertex */
1710 {
1711 props->provokingVertexModePerPipeline = true;
1712 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1713 }
1714
1715 /* VK_EXT_robustness2 */
1716 {
1717 props->robustStorageBufferAccessSizeAlignment =
1718 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
1719 props->robustUniformBufferAccessSizeAlignment =
1720 ANV_UBO_ALIGNMENT;
1721 }
1722
1723 /* VK_EXT_sample_locations */
1724 {
1725 props->sampleLocationSampleCounts =
1726 isl_device_get_sample_counts(&pdevice->isl_dev);
1727
1728 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
1729 props->maxSampleLocationGridSize.width = 1;
1730 props->maxSampleLocationGridSize.height = 1;
1731
1732 props->sampleLocationCoordinateRange[0] = 0;
1733 props->sampleLocationCoordinateRange[1] = 0.9375;
1734 props->sampleLocationSubPixelBits = 4;
1735
1736 props->variableSampleLocations = true;
1737 }
1738
1739 /* VK_EXT_shader_module_identifier */
1740 {
1741 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1742 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1743 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1744 vk_shaderModuleIdentifierAlgorithmUUID,
1745 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1746 }
1747
1748 /* VK_EXT_transform_feedback */
1749 {
1750 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
1751 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
1752 props->maxTransformFeedbackBufferSize = (1ull << 32);
1753 props->maxTransformFeedbackStreamDataSize = 128 * 4;
1754 props->maxTransformFeedbackBufferDataSize = 128 * 4;
1755 props->maxTransformFeedbackBufferDataStride = 2048;
1756 props->transformFeedbackQueries = true;
1757 props->transformFeedbackStreamsLinesTriangles = false;
1758 props->transformFeedbackRasterizationStreamSelect = false;
1759 props->transformFeedbackDraw = true;
1760 }
1761
1762 /* VK_ANDROID_native_buffer */
1763 #if DETECT_OS_ANDROID
1764 {
1765 props->sharedImage = front_rendering_usage ? VK_TRUE : VK_FALSE;
1766 }
1767 #endif /* DETECT_OS_ANDROID */
1768
1769
1770 /* VK_MESA_image_alignment_control */
1771 {
1772 /* We support 4k/64k tiling alignments on most platforms */
1773 props->supportedImageAlignmentMask = (1 << 12) | (1 << 16);
1774 }
1775 }
1776
1777 static VkResult MUST_CHECK
anv_init_meminfo(struct anv_physical_device * device,int fd)1778 anv_init_meminfo(struct anv_physical_device *device, int fd)
1779 {
1780 const struct intel_device_info *devinfo = &device->info;
1781
1782 device->sys.region = &devinfo->mem.sram.mem;
1783 device->sys.size = devinfo->mem.sram.mappable.size;
1784 device->sys.available = devinfo->mem.sram.mappable.free;
1785
1786 device->vram_mappable.region = &devinfo->mem.vram.mem;
1787 device->vram_mappable.size = devinfo->mem.vram.mappable.size;
1788 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1789
1790 device->vram_non_mappable.region = &devinfo->mem.vram.mem;
1791 device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
1792 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1793
1794 return VK_SUCCESS;
1795 }
1796
1797 static void
anv_update_meminfo(struct anv_physical_device * device,int fd)1798 anv_update_meminfo(struct anv_physical_device *device, int fd)
1799 {
1800 if (!intel_device_info_update_memory_info(&device->info, fd))
1801 return;
1802
1803 const struct intel_device_info *devinfo = &device->info;
1804 device->sys.available = devinfo->mem.sram.mappable.free;
1805 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
1806 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
1807 }
1808
1809 static VkResult
anv_physical_device_init_heaps(struct anv_physical_device * device,int fd)1810 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
1811 {
1812 VkResult result = anv_init_meminfo(device, fd);
1813 if (result != VK_SUCCESS)
1814 return result;
1815
1816 assert(device->sys.size != 0);
1817
1818 if (anv_physical_device_has_vram(device)) {
1819 /* We can create 2 or 3 different heaps when we have local memory
1820 * support, first heap with local memory size and second with system
1821 * memory size and the third is added only if part of the vram is
1822 * mappable to the host.
1823 */
1824 device->memory.heap_count = 2;
1825 device->memory.heaps[0] = (struct anv_memory_heap) {
1826 /* If there is a vram_non_mappable, use that for the device only
1827 * heap. Otherwise use the vram_mappable.
1828 */
1829 .size = device->vram_non_mappable.size != 0 ?
1830 device->vram_non_mappable.size : device->vram_mappable.size,
1831 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1832 .is_local_mem = true,
1833 };
1834 device->memory.heaps[1] = (struct anv_memory_heap) {
1835 .size = device->sys.size,
1836 .flags = 0,
1837 .is_local_mem = false,
1838 };
1839 /* Add an additional smaller vram mappable heap if we can't map all the
1840 * vram to the host.
1841 */
1842 if (device->vram_non_mappable.size > 0) {
1843 device->memory.heap_count++;
1844 device->memory.heaps[2] = (struct anv_memory_heap) {
1845 .size = device->vram_mappable.size,
1846 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1847 .is_local_mem = true,
1848 };
1849 }
1850 } else {
1851 device->memory.heap_count = 1;
1852 device->memory.heaps[0] = (struct anv_memory_heap) {
1853 .size = device->sys.size,
1854 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1855 .is_local_mem = false,
1856 };
1857 }
1858
1859 switch (device->info.kmd_type) {
1860 case INTEL_KMD_TYPE_XE:
1861 result = anv_xe_physical_device_init_memory_types(device);
1862 break;
1863 case INTEL_KMD_TYPE_I915:
1864 default:
1865 result = anv_i915_physical_device_init_memory_types(device);
1866 break;
1867 }
1868
1869 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1870
1871 if (result != VK_SUCCESS)
1872 return result;
1873
1874 /* Some games (e.g., Total War: WARHAMMER III) sometimes seem to expect to
1875 * find memory types both with and without
1876 * VK_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_BIT. So here we duplicate all our
1877 * memory types just to make these games happy.
1878 * This behavior is not spec-compliant as we still only have one heap that
1879 * is now inconsistent with some of the memory types, but the game doesn't
1880 * seem to care about it.
1881 */
1882 if (device->instance->anv_fake_nonlocal_memory &&
1883 !anv_physical_device_has_vram(device)) {
1884 const uint32_t base_types_count = device->memory.type_count;
1885 for (int i = 0; i < base_types_count; i++) {
1886 if (!(device->memory.types[i].propertyFlags &
1887 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
1888 continue;
1889
1890 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1891 struct anv_memory_type *new_type =
1892 &device->memory.types[device->memory.type_count++];
1893 *new_type = device->memory.types[i];
1894
1895 device->memory.types[i].propertyFlags &=
1896 ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1897 }
1898 }
1899
1900 /* Replicate all non protected memory types for descriptor buffers because
1901 * we want to identify memory allocations to place them in the right memory
1902 * heap.
1903 */
1904 device->memory.default_buffer_mem_types =
1905 BITFIELD_RANGE(0, device->memory.type_count);
1906 device->memory.protected_mem_types = 0;
1907 device->memory.dynamic_visible_mem_types = 0;
1908 device->memory.compressed_mem_types = 0;
1909
1910 const uint32_t base_types_count = device->memory.type_count;
1911 for (int i = 0; i < base_types_count; i++) {
1912 bool skip = false;
1913
1914 if (device->memory.types[i].propertyFlags &
1915 VK_MEMORY_PROPERTY_PROTECTED_BIT) {
1916 device->memory.protected_mem_types |= BITFIELD_BIT(i);
1917 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
1918 skip = true;
1919 }
1920
1921 if (device->memory.types[i].compressed) {
1922 device->memory.compressed_mem_types |= BITFIELD_BIT(i);
1923 device->memory.default_buffer_mem_types &= (~BITFIELD_BIT(i));
1924 skip = true;
1925 }
1926
1927 if (skip)
1928 continue;
1929
1930 device->memory.dynamic_visible_mem_types |=
1931 BITFIELD_BIT(device->memory.type_count);
1932
1933 assert(device->memory.type_count < ARRAY_SIZE(device->memory.types));
1934 struct anv_memory_type *new_type =
1935 &device->memory.types[device->memory.type_count++];
1936 *new_type = device->memory.types[i];
1937 new_type->dynamic_visible = true;
1938 }
1939
1940 assert(device->memory.type_count <= VK_MAX_MEMORY_TYPES);
1941
1942 for (unsigned i = 0; i < device->memory.type_count; i++) {
1943 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
1944 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
1945 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
1946 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
1947 device->memory.need_flush = true;
1948 #else
1949 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1950 "Memory configuration requires flushing, but it's not implemented for this architecture");
1951 #endif
1952 }
1953
1954 return VK_SUCCESS;
1955 }
1956
1957 static VkResult
anv_physical_device_init_uuids(struct anv_physical_device * device)1958 anv_physical_device_init_uuids(struct anv_physical_device *device)
1959 {
1960 const struct build_id_note *note =
1961 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
1962 if (!note) {
1963 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1964 "Failed to find build-id");
1965 }
1966
1967 unsigned build_id_len = build_id_length(note);
1968 if (build_id_len < 20) {
1969 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1970 "build-id too short. It needs to be a SHA");
1971 }
1972
1973 memcpy(device->driver_build_sha1, build_id_data(note), 20);
1974
1975 struct mesa_sha1 sha1_ctx;
1976 uint8_t sha1[20];
1977 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
1978
1979 /* The pipeline cache UUID is used for determining when a pipeline cache is
1980 * invalid. It needs both a driver build and the PCI ID of the device.
1981 */
1982 _mesa_sha1_init(&sha1_ctx);
1983 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
1984 brw_device_sha1_update(&sha1_ctx, &device->info);
1985 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
1986 sizeof(device->always_use_bindless));
1987 _mesa_sha1_final(&sha1_ctx, sha1);
1988 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
1989
1990 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
1991 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
1992
1993 return VK_SUCCESS;
1994 }
1995
1996 static void
anv_physical_device_init_disk_cache(struct anv_physical_device * device)1997 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
1998 {
1999 #ifdef ENABLE_SHADER_CACHE
2000 char renderer[10];
2001 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
2002 device->info.pci_device_id);
2003 assert(len == sizeof(renderer) - 2);
2004
2005 char timestamp[41];
2006 _mesa_sha1_format(timestamp, device->driver_build_sha1);
2007
2008 const uint64_t driver_flags =
2009 brw_get_compiler_config_value(device->compiler);
2010 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
2011 #endif
2012 }
2013
2014 static void
anv_physical_device_free_disk_cache(struct anv_physical_device * device)2015 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
2016 {
2017 #ifdef ENABLE_SHADER_CACHE
2018 if (device->vk.disk_cache) {
2019 disk_cache_destroy(device->vk.disk_cache);
2020 device->vk.disk_cache = NULL;
2021 }
2022 #else
2023 assert(device->vk.disk_cache == NULL);
2024 #endif
2025 }
2026
2027 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
2028 * queue overrides.
2029 *
2030 * To override the number queues:
2031 * * "gc" is for graphics queues with compute support
2032 * * "g" is for graphics queues with no compute support
2033 * * "c" is for compute queues with no graphics support
2034 * * "v" is for video queues with no graphics support
2035 * * "b" is for copy (blitter) queues with no graphics support
2036 *
2037 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
2038 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
2039 * with compute-only support.
2040 *
2041 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
2042 * include 1 queue with compute-only support, but it will not change the
2043 * number of graphics+compute queues.
2044 *
2045 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
2046 * to include 1 queue with compute-only support, and it would override the
2047 * number of graphics+compute queues to be 0.
2048 */
2049 static void
anv_override_engine_counts(int * gc_count,int * g_count,int * c_count,int * v_count,int * blit_count)2050 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count, int *blit_count)
2051 {
2052 int gc_override = -1;
2053 int g_override = -1;
2054 int c_override = -1;
2055 int v_override = -1;
2056 int blit_override = -1;
2057 const char *env_ = os_get_option("ANV_QUEUE_OVERRIDE");
2058
2059 if (env_ == NULL)
2060 return;
2061
2062 char *env = strdup(env_);
2063 char *save = NULL;
2064 char *next = strtok_r(env, ",", &save);
2065 while (next != NULL) {
2066 if (strncmp(next, "gc=", 3) == 0) {
2067 gc_override = strtol(next + 3, NULL, 0);
2068 } else if (strncmp(next, "g=", 2) == 0) {
2069 g_override = strtol(next + 2, NULL, 0);
2070 } else if (strncmp(next, "c=", 2) == 0) {
2071 c_override = strtol(next + 2, NULL, 0);
2072 } else if (strncmp(next, "v=", 2) == 0) {
2073 v_override = strtol(next + 2, NULL, 0);
2074 } else if (strncmp(next, "b=", 2) == 0) {
2075 blit_override = strtol(next + 2, NULL, 0);
2076 } else {
2077 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
2078 }
2079 next = strtok_r(NULL, ",", &save);
2080 }
2081 free(env);
2082 if (gc_override >= 0)
2083 *gc_count = gc_override;
2084 if (g_override >= 0)
2085 *g_count = g_override;
2086 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
2087 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
2088 "Vulkan specification");
2089 if (c_override >= 0)
2090 *c_count = c_override;
2091 if (v_override >= 0)
2092 *v_count = v_override;
2093 if (blit_override >= 0)
2094 *blit_count = blit_override;
2095 }
2096
2097 static void
anv_physical_device_init_queue_families(struct anv_physical_device * pdevice)2098 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
2099 {
2100 uint32_t family_count = 0;
2101 VkQueueFlags sparse_flags = pdevice->sparse_type != ANV_SPARSE_TYPE_NOT_SUPPORTED ?
2102 VK_QUEUE_SPARSE_BINDING_BIT : 0;
2103 VkQueueFlags protected_flag = pdevice->has_protected_contexts ?
2104 VK_QUEUE_PROTECTED_BIT : 0;
2105
2106 if (pdevice->engine_info) {
2107 int gc_count =
2108 intel_engines_count(pdevice->engine_info,
2109 INTEL_ENGINE_CLASS_RENDER);
2110 int v_count =
2111 intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO);
2112 int g_count = 0;
2113 int c_count = 0;
2114 /* Not only the Kernel needs to have vm_control, but it also needs to
2115 * have a new enough GuC and the interface to tell us so. This is
2116 * implemented in the common layer by is_guc_semaphore_functional() and
2117 * results in devinfo->engine_class_supported_count being adjusted,
2118 * which we read below.
2119 */
2120 const bool kernel_supports_non_render_engines = pdevice->has_vm_control;
2121 /* For now we're choosing to not expose non-render engines on i915.ko
2122 * even when the Kernel allows it. We have data suggesting it's not an
2123 * obvious win in terms of performance.
2124 */
2125 const bool can_use_non_render_engines =
2126 kernel_supports_non_render_engines &&
2127 pdevice->info.kmd_type == INTEL_KMD_TYPE_XE;
2128
2129 if (can_use_non_render_engines) {
2130 c_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2131 }
2132 enum intel_engine_class compute_class =
2133 c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
2134
2135 int blit_count = 0;
2136 if (pdevice->info.verx10 >= 125 && can_use_non_render_engines) {
2137 blit_count = pdevice->info.engine_class_supported_count[INTEL_ENGINE_CLASS_COPY];
2138 }
2139
2140 anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count, &blit_count);
2141
2142 if (gc_count > 0) {
2143 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2144 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2145 VK_QUEUE_COMPUTE_BIT |
2146 VK_QUEUE_TRANSFER_BIT |
2147 sparse_flags |
2148 protected_flag,
2149 .queueCount = gc_count,
2150 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2151 .supports_perf = true,
2152 };
2153 }
2154 if (g_count > 0) {
2155 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2156 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2157 VK_QUEUE_TRANSFER_BIT |
2158 sparse_flags |
2159 protected_flag,
2160 .queueCount = g_count,
2161 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2162 };
2163 }
2164 if (c_count > 0) {
2165 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2166 .queueFlags = VK_QUEUE_COMPUTE_BIT |
2167 VK_QUEUE_TRANSFER_BIT |
2168 sparse_flags |
2169 protected_flag,
2170 .queueCount = c_count,
2171 .engine_class = compute_class,
2172 };
2173 }
2174 if (v_count > 0 && (pdevice->video_decode_enabled || pdevice->video_encode_enabled)) {
2175 /* HEVC support on Gfx9 is only available on VCS0. So limit the number of video queues
2176 * to the first VCS engine instance.
2177 *
2178 * We should be able to query HEVC support from the kernel using the engine query uAPI,
2179 * but this appears to be broken :
2180 * https://gitlab.freedesktop.org/drm/intel/-/issues/8832
2181 *
2182 * When this bug is fixed we should be able to check HEVC support to determine the
2183 * correct number of queues.
2184 */
2185 /* TODO: enable protected content on video queue */
2186 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2187 .queueFlags = (pdevice->video_decode_enabled ? VK_QUEUE_VIDEO_DECODE_BIT_KHR : 0) |
2188 (pdevice->video_encode_enabled ? VK_QUEUE_VIDEO_ENCODE_BIT_KHR : 0),
2189 .queueCount = pdevice->info.ver == 9 ? MIN2(1, v_count) : v_count,
2190 .engine_class = INTEL_ENGINE_CLASS_VIDEO,
2191 };
2192 }
2193 if (blit_count > 0) {
2194 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2195 .queueFlags = VK_QUEUE_TRANSFER_BIT |
2196 protected_flag,
2197 .queueCount = blit_count,
2198 .engine_class = INTEL_ENGINE_CLASS_COPY,
2199 };
2200 }
2201 } else {
2202 /* Default to a single render queue */
2203 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
2204 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2205 VK_QUEUE_COMPUTE_BIT |
2206 VK_QUEUE_TRANSFER_BIT |
2207 sparse_flags,
2208 .queueCount = 1,
2209 .engine_class = INTEL_ENGINE_CLASS_RENDER,
2210 };
2211 family_count = 1;
2212 }
2213 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
2214 pdevice->queue.family_count = family_count;
2215 }
2216
2217 static VkResult
anv_physical_device_get_parameters(struct anv_physical_device * device)2218 anv_physical_device_get_parameters(struct anv_physical_device *device)
2219 {
2220 switch (device->info.kmd_type) {
2221 case INTEL_KMD_TYPE_I915:
2222 return anv_i915_physical_device_get_parameters(device);
2223 case INTEL_KMD_TYPE_XE:
2224 return anv_xe_physical_device_get_parameters(device);
2225 default:
2226 unreachable("Missing");
2227 return VK_ERROR_UNKNOWN;
2228 }
2229 }
2230
2231 VkResult
anv_physical_device_try_create(struct vk_instance * vk_instance,struct _drmDevice * drm_device,struct vk_physical_device ** out)2232 anv_physical_device_try_create(struct vk_instance *vk_instance,
2233 struct _drmDevice *drm_device,
2234 struct vk_physical_device **out)
2235 {
2236 struct anv_instance *instance =
2237 container_of(vk_instance, struct anv_instance, vk);
2238
2239 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
2240 drm_device->bustype != DRM_BUS_PCI ||
2241 drm_device->deviceinfo.pci->vendor_id != 0x8086)
2242 return VK_ERROR_INCOMPATIBLE_DRIVER;
2243
2244 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
2245 const char *path = drm_device->nodes[DRM_NODE_RENDER];
2246 VkResult result;
2247 int fd;
2248 int master_fd = -1;
2249
2250 process_intel_debug_variable();
2251
2252 fd = open(path, O_RDWR | O_CLOEXEC);
2253 if (fd < 0) {
2254 if (errno == ENOMEM) {
2255 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2256 "Unable to open device %s: out of memory", path);
2257 }
2258 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2259 "Unable to open device %s: %m", path);
2260 }
2261
2262 struct intel_device_info devinfo;
2263 if (!intel_get_device_info_from_fd(fd, &devinfo, 9, -1)) {
2264 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2265 goto fail_fd;
2266 }
2267
2268 if (devinfo.ver < 9) {
2269 /* Silently fail here, hasvk should pick up this device. */
2270 result = VK_ERROR_INCOMPATIBLE_DRIVER;
2271 goto fail_fd;
2272 } else if (devinfo.probe_forced) {
2273 /* If INTEL_FORCE_PROBE was used, then the user has opted-in for
2274 * unsupported device support. No need to print a warning message.
2275 */
2276 } else if (devinfo.ver > 20) {
2277 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2278 "Vulkan not yet supported on %s", devinfo.name);
2279 goto fail_fd;
2280 }
2281
2282 if (devinfo.ver == 20 && instance->disable_xe2_ccs)
2283 intel_debug |= DEBUG_NO_CCS;
2284
2285 /* Disable Wa_16013994831 on Gfx12.0 because we found other cases where we
2286 * need to always disable preemption :
2287 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5963
2288 * - https://gitlab.freedesktop.org/mesa/mesa/-/issues/5662
2289 */
2290 if (devinfo.verx10 == 120)
2291 BITSET_CLEAR(devinfo.workarounds, INTEL_WA_16013994831);
2292
2293 if (!devinfo.has_context_isolation) {
2294 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2295 "Vulkan requires context isolation for %s", devinfo.name);
2296 goto fail_fd;
2297 }
2298
2299 struct anv_physical_device *device =
2300 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
2301 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2302 if (device == NULL) {
2303 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2304 goto fail_fd;
2305 }
2306
2307 struct vk_physical_device_dispatch_table dispatch_table;
2308 vk_physical_device_dispatch_table_from_entrypoints(
2309 &dispatch_table, &anv_physical_device_entrypoints, true);
2310 vk_physical_device_dispatch_table_from_entrypoints(
2311 &dispatch_table, &wsi_physical_device_entrypoints, false);
2312
2313 result = vk_physical_device_init(&device->vk, &instance->vk,
2314 NULL, NULL, NULL, /* We set up extensions later */
2315 &dispatch_table);
2316 if (result != VK_SUCCESS) {
2317 vk_error(instance, result);
2318 goto fail_alloc;
2319 }
2320 device->instance = instance;
2321
2322 assert(strlen(path) < ARRAY_SIZE(device->path));
2323 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
2324
2325 device->info = devinfo;
2326
2327 device->local_fd = fd;
2328 result = anv_physical_device_get_parameters(device);
2329 if (result != VK_SUCCESS)
2330 goto fail_base;
2331
2332 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
2333 device->info.aperture_bytes;
2334
2335 if (device->gtt_size < (4ULL << 30 /* GiB */)) {
2336 vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2337 "GTT size too small: 0x%016"PRIx64, device->gtt_size);
2338 goto fail_base;
2339 }
2340
2341 /* We currently only have the right bits for instructions in Gen12+. If the
2342 * kernel ever starts supporting that feature on previous generations,
2343 * we'll need to edit genxml prior to enabling here.
2344 */
2345 device->has_protected_contexts = device->info.ver >= 12 &&
2346 intel_gem_supports_protected_context(fd, device->info.kmd_type);
2347
2348 /* Just pick one; they're all the same */
2349 device->has_astc_ldr =
2350 isl_format_supports_sampling(&device->info,
2351 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
2352 if (!device->has_astc_ldr &&
2353 driQueryOptionb(&device->instance->dri_options, "vk_require_astc"))
2354 device->emu_astc_ldr = true;
2355 if (devinfo.ver == 9 && !intel_device_info_is_9lp(&devinfo)) {
2356 device->flush_astc_ldr_void_extent_denorms =
2357 device->has_astc_ldr && !device->emu_astc_ldr;
2358 }
2359 device->disable_fcv = device->info.verx10 >= 125 ||
2360 instance->disable_fcv;
2361
2362 result = anv_physical_device_init_heaps(device, fd);
2363 if (result != VK_SUCCESS)
2364 goto fail_base;
2365
2366 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
2367 device->has_exec_timeline = false;
2368
2369 device->has_cooperative_matrix =
2370 device->info.cooperative_matrix_configurations[0].scope != INTEL_CMAT_SCOPE_NONE;
2371
2372 unsigned st_idx = 0;
2373
2374 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
2375 if (!device->has_exec_timeline)
2376 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
2377 device->sync_types[st_idx++] = &device->sync_syncobj_type;
2378
2379 /* anv_bo_sync_type is only supported with i915 for now */
2380 if (device->info.kmd_type == INTEL_KMD_TYPE_I915) {
2381 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
2382 device->sync_types[st_idx++] = &anv_bo_sync_type;
2383
2384 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
2385 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
2386 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
2387 }
2388 } else {
2389 assert(vk_sync_type_is_drm_syncobj(&device->sync_syncobj_type));
2390 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE);
2391 assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT);
2392 }
2393
2394 device->sync_types[st_idx++] = NULL;
2395 assert(st_idx <= ARRAY_SIZE(device->sync_types));
2396 device->vk.supported_sync_types = device->sync_types;
2397
2398 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
2399
2400 device->always_use_bindless =
2401 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
2402
2403 device->use_call_secondary =
2404 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
2405
2406 device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false);
2407 device->video_encode_enabled = debug_get_bool_option("ANV_VIDEO_ENCODE", false);
2408
2409 device->uses_ex_bso = device->info.verx10 >= 125;
2410
2411 /* For now always use indirect descriptors. We'll update this
2412 * to !uses_ex_bso when all the infrastructure is built up.
2413 */
2414 device->indirect_descriptors =
2415 !device->uses_ex_bso ||
2416 driQueryOptionb(&instance->dri_options, "force_indirect_descriptors");
2417
2418 device->alloc_aux_tt_mem =
2419 device->info.has_aux_map && device->info.verx10 >= 125;
2420 /* Check if we can read the GPU timestamp register from the CPU */
2421 uint64_t u64_ignore;
2422 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd,
2423 device->info.kmd_type,
2424 &u64_ignore);
2425
2426 device->uses_relocs = device->info.kmd_type != INTEL_KMD_TYPE_XE;
2427
2428 /* While xe.ko can use both vm_bind and TR-TT, i915.ko only has TR-TT. */
2429 if (debug_get_bool_option("ANV_SPARSE", true)) {
2430 if (device->info.kmd_type == INTEL_KMD_TYPE_XE) {
2431 if (debug_get_bool_option("ANV_SPARSE_USE_TRTT", false))
2432 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2433 else
2434 device->sparse_type = ANV_SPARSE_TYPE_VM_BIND;
2435 } else {
2436 if (device->info.ver >= 12 && device->has_exec_timeline)
2437 device->sparse_type = ANV_SPARSE_TYPE_TRTT;
2438 }
2439 }
2440 if (device->sparse_type == ANV_SPARSE_TYPE_NOT_SUPPORTED) {
2441 if (instance->has_fake_sparse)
2442 device->sparse_type = ANV_SPARSE_TYPE_FAKE;
2443 }
2444
2445 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
2446 driQueryOptionb(&instance->dri_options, "always_flush_cache");
2447
2448 device->compiler = brw_compiler_create(NULL, &device->info);
2449 if (device->compiler == NULL) {
2450 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2451 goto fail_base;
2452 }
2453 device->compiler->shader_debug_log = compiler_debug_log;
2454 device->compiler->shader_perf_log = compiler_perf_log;
2455 device->compiler->extended_bindless_surface_offset = device->uses_ex_bso;
2456 device->compiler->use_bindless_sampler_offset = false;
2457 device->compiler->spilling_rate =
2458 driQueryOptioni(&instance->dri_options, "shader_spilling_rate");
2459
2460 isl_device_init(&device->isl_dev, &device->info);
2461 device->isl_dev.buffer_length_in_aux_addr = !intel_needs_workaround(device->isl_dev.info, 14019708328);
2462 device->isl_dev.sampler_route_to_lsc =
2463 driQueryOptionb(&instance->dri_options, "intel_sampler_route_to_lsc");
2464
2465 result = anv_physical_device_init_uuids(device);
2466 if (result != VK_SUCCESS)
2467 goto fail_compiler;
2468
2469 anv_physical_device_init_va_ranges(device);
2470
2471 anv_physical_device_init_disk_cache(device);
2472
2473 if (instance->vk.enabled_extensions.KHR_display) {
2474 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
2475 if (master_fd >= 0) {
2476 /* fail if we don't have permission to even render on this device */
2477 if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) {
2478 close(master_fd);
2479 master_fd = -1;
2480 }
2481 }
2482 }
2483 device->master_fd = master_fd;
2484
2485 device->engine_info = intel_engine_get_info(fd, device->info.kmd_type);
2486 intel_common_update_device_info(fd, &device->info);
2487
2488 anv_physical_device_init_queue_families(device);
2489
2490 anv_physical_device_init_perf(device, fd);
2491
2492 /* Gather major/minor before WSI. */
2493 struct stat st;
2494
2495 if (stat(primary_path, &st) == 0) {
2496 device->has_master = true;
2497 device->master_major = major(st.st_rdev);
2498 device->master_minor = minor(st.st_rdev);
2499 } else {
2500 device->has_master = false;
2501 device->master_major = 0;
2502 device->master_minor = 0;
2503 }
2504
2505 if (stat(path, &st) == 0) {
2506 device->has_local = true;
2507 device->local_major = major(st.st_rdev);
2508 device->local_minor = minor(st.st_rdev);
2509 } else {
2510 device->has_local = false;
2511 device->local_major = 0;
2512 device->local_minor = 0;
2513 }
2514
2515 get_device_extensions(device, &device->vk.supported_extensions);
2516 get_features(device, &device->vk.supported_features);
2517 get_properties(device, &device->vk.properties);
2518
2519 result = anv_init_wsi(device);
2520 if (result != VK_SUCCESS)
2521 goto fail_perf;
2522
2523 anv_measure_device_init(device);
2524
2525 anv_genX(&device->info, init_physical_device_state)(device);
2526
2527 *out = &device->vk;
2528
2529 return VK_SUCCESS;
2530
2531 fail_perf:
2532 intel_perf_free(device->perf);
2533 free(device->engine_info);
2534 anv_physical_device_free_disk_cache(device);
2535 fail_compiler:
2536 ralloc_free(device->compiler);
2537 fail_base:
2538 vk_physical_device_finish(&device->vk);
2539 fail_alloc:
2540 vk_free(&instance->vk.alloc, device);
2541 fail_fd:
2542 close(fd);
2543 if (master_fd != -1)
2544 close(master_fd);
2545 return result;
2546 }
2547
2548 void
anv_physical_device_destroy(struct vk_physical_device * vk_device)2549 anv_physical_device_destroy(struct vk_physical_device *vk_device)
2550 {
2551 struct anv_physical_device *device =
2552 container_of(vk_device, struct anv_physical_device, vk);
2553
2554 anv_finish_wsi(device);
2555 anv_measure_device_destroy(device);
2556 free(device->engine_info);
2557 anv_physical_device_free_disk_cache(device);
2558 ralloc_free(device->compiler);
2559 intel_perf_free(device->perf);
2560 close(device->local_fd);
2561 if (device->master_fd >= 0)
2562 close(device->master_fd);
2563 vk_physical_device_finish(&device->vk);
2564 vk_free(&device->instance->vk.alloc, device);
2565 }
2566
2567 static const VkQueueFamilyProperties
get_anv_queue_family_properties_template(const struct anv_physical_device * device)2568 get_anv_queue_family_properties_template(const struct anv_physical_device *device)
2569 {
2570
2571 /*
2572 * For Xe2+:
2573 * Bspec 60411: Timestamp register can hold 64-bit value
2574 *
2575 * Platforms < Xe2:
2576 * Bpsec 46111: Timestamp register can hold only 36-bit
2577 * value
2578 */
2579 const VkQueueFamilyProperties anv_queue_family_properties_template =
2580 {
2581 .timestampValidBits = device->info.ver >= 20 ? 64 : 36,
2582 .minImageTransferGranularity = { 1, 1, 1 },
2583 };
2584
2585 return anv_queue_family_properties_template;
2586 }
2587
2588 static VkQueueFamilyProperties
anv_device_physical_get_queue_properties(const struct anv_physical_device * device,uint32_t family_index)2589 anv_device_physical_get_queue_properties(const struct anv_physical_device *device,
2590 uint32_t family_index)
2591 {
2592 const struct anv_queue_family *family = &device->queue.families[family_index];
2593 VkQueueFamilyProperties properties =
2594 get_anv_queue_family_properties_template(device);
2595
2596 properties.queueFlags = family->queueFlags;
2597 properties.queueCount = family->queueCount;
2598 return properties;
2599 }
2600
anv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2601 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2602 VkPhysicalDevice physicalDevice,
2603 uint32_t* pQueueFamilyPropertyCount,
2604 VkQueueFamilyProperties2* pQueueFamilyProperties)
2605 {
2606 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2607 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2608 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2609
2610 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2611 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2612 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2613 p->queueFamilyProperties =
2614 anv_device_physical_get_queue_properties(pdevice, i);
2615
2616 vk_foreach_struct(ext, p->pNext) {
2617 switch (ext->sType) {
2618 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2619 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2620 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2621
2622 /* Deliberately sorted low to high */
2623 VkQueueGlobalPriorityKHR all_priorities[] = {
2624 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2625 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2626 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2627 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2628 };
2629
2630 uint32_t count = 0;
2631 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2632 if (all_priorities[i] > pdevice->max_context_priority)
2633 break;
2634
2635 properties->priorities[count++] = all_priorities[i];
2636 }
2637 properties->priorityCount = count;
2638 break;
2639 }
2640 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2641 VkQueueFamilyQueryResultStatusPropertiesKHR *prop =
2642 (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2643 prop->queryResultStatusSupport = VK_TRUE;
2644 break;
2645 }
2646 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2647 VkQueueFamilyVideoPropertiesKHR *prop =
2648 (VkQueueFamilyVideoPropertiesKHR *)ext;
2649 if (queue_family->queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2650 prop->videoCodecOperations = VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR |
2651 VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2652 }
2653
2654 if (queue_family->queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2655 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR |
2656 VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2657 }
2658 break;
2659 }
2660 default:
2661 vk_debug_ignored_stype(ext->sType);
2662 }
2663 }
2664 }
2665 }
2666 }
2667
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2668 void anv_GetPhysicalDeviceMemoryProperties(
2669 VkPhysicalDevice physicalDevice,
2670 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2671 {
2672 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2673
2674 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2675 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2676 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2677 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2678 .heapIndex = physical_device->memory.types[i].heapIndex,
2679 };
2680 }
2681
2682 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2683 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2684 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2685 .size = physical_device->memory.heaps[i].size,
2686 .flags = physical_device->memory.heaps[i].flags,
2687 };
2688 }
2689 }
2690
2691 static void
anv_get_memory_budget(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2692 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2693 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2694 {
2695 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2696
2697 if (!device->vk.supported_extensions.EXT_memory_budget)
2698 return;
2699
2700 anv_update_meminfo(device, device->local_fd);
2701
2702 VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2703 for (size_t i = 0; i < device->memory.heap_count; i++) {
2704 if (device->memory.heaps[i].is_local_mem) {
2705 total_vram_heaps_size += device->memory.heaps[i].size;
2706 } else {
2707 total_sys_heaps_size += device->memory.heaps[i].size;
2708 }
2709 }
2710
2711 for (size_t i = 0; i < device->memory.heap_count; i++) {
2712 VkDeviceSize heap_size = device->memory.heaps[i].size;
2713 VkDeviceSize heap_used = device->memory.heaps[i].used;
2714 VkDeviceSize heap_budget, total_heaps_size;
2715 uint64_t mem_available = 0;
2716
2717 if (device->memory.heaps[i].is_local_mem) {
2718 total_heaps_size = total_vram_heaps_size;
2719 if (device->vram_non_mappable.size > 0 && i == 0) {
2720 mem_available = device->vram_non_mappable.available;
2721 } else {
2722 mem_available = device->vram_mappable.available;
2723 }
2724 } else {
2725 total_heaps_size = total_sys_heaps_size;
2726 mem_available = MIN2(device->sys.available, total_heaps_size);
2727 }
2728
2729 double heap_proportion = (double) heap_size / total_heaps_size;
2730 VkDeviceSize available_prop = mem_available * heap_proportion;
2731
2732 /*
2733 * Let's not incite the app to starve the system: report at most 90% of
2734 * the available heap memory.
2735 */
2736 uint64_t heap_available = available_prop * 9 / 10;
2737 heap_budget = MIN2(heap_size, heap_used + heap_available);
2738
2739 /*
2740 * Round down to the nearest MB
2741 */
2742 heap_budget &= ~((1ull << 20) - 1);
2743
2744 /*
2745 * The heapBudget value must be non-zero for array elements less than
2746 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2747 * value must be less than or equal to VkMemoryHeap::size for each heap.
2748 */
2749 assert(0 < heap_budget && heap_budget <= heap_size);
2750
2751 memoryBudget->heapUsage[i] = heap_used;
2752 memoryBudget->heapBudget[i] = heap_budget;
2753 }
2754
2755 /* The heapBudget and heapUsage values must be zero for array elements
2756 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2757 */
2758 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2759 memoryBudget->heapBudget[i] = 0;
2760 memoryBudget->heapUsage[i] = 0;
2761 }
2762 }
2763
anv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2764 void anv_GetPhysicalDeviceMemoryProperties2(
2765 VkPhysicalDevice physicalDevice,
2766 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2767 {
2768 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2769 &pMemoryProperties->memoryProperties);
2770
2771 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2772 switch (ext->sType) {
2773 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2774 anv_get_memory_budget(physicalDevice, (void*)ext);
2775 break;
2776 default:
2777 vk_debug_ignored_stype(ext->sType);
2778 break;
2779 }
2780 }
2781 }
2782
anv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2783 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
2784 VkPhysicalDevice physicalDevice,
2785 VkSampleCountFlagBits samples,
2786 VkMultisamplePropertiesEXT* pMultisampleProperties)
2787 {
2788 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2789
2790 assert(pMultisampleProperties->sType ==
2791 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
2792
2793 VkExtent2D grid_size;
2794 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
2795 grid_size.width = 1;
2796 grid_size.height = 1;
2797 } else {
2798 grid_size.width = 0;
2799 grid_size.height = 0;
2800 }
2801 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
2802
2803 vk_foreach_struct(ext, pMultisampleProperties->pNext)
2804 vk_debug_ignored_stype(ext->sType);
2805 }
2806
anv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2807 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
2808 VkPhysicalDevice physicalDevice,
2809 uint32_t* pFragmentShadingRateCount,
2810 VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
2811 {
2812 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2813 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
2814 pFragmentShadingRates, pFragmentShadingRateCount);
2815
2816 #define append_rate(_samples, _width, _height) \
2817 do { \
2818 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
2819 __r->sampleCounts = _samples; \
2820 __r->fragmentSize = (VkExtent2D) { \
2821 .width = _width, \
2822 .height = _height, \
2823 }; \
2824 } \
2825 } while (0)
2826
2827 VkSampleCountFlags sample_counts =
2828 isl_device_get_sample_counts(&physical_device->isl_dev);
2829
2830 /* BSpec 47003: There are a number of restrictions on the sample count
2831 * based off the coarse pixel size.
2832 */
2833 static const VkSampleCountFlags cp_size_sample_limits[] = {
2834 [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
2835 ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2836 [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2837 [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2838 [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
2839 [16] = ISL_SAMPLE_COUNT_1_BIT,
2840 };
2841
2842 for (uint32_t x = 4; x >= 1; x /= 2) {
2843 for (uint32_t y = 4; y >= 1; y /= 2) {
2844 if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
2845 /* BSpec 47003:
2846 * "CPsize 1x4 and 4x1 are not supported"
2847 */
2848 if ((x == 1 && y == 4) || (x == 4 && y == 1))
2849 continue;
2850
2851 /* For size {1, 1}, the sample count must be ~0
2852 *
2853 * 4x2 is also a specially case.
2854 */
2855 if (x == 1 && y == 1)
2856 append_rate(~0, x, y);
2857 else if (x == 4 && y == 2)
2858 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
2859 else
2860 append_rate(cp_size_sample_limits[x * y], x, y);
2861 } else {
2862 /* For size {1, 1}, the sample count must be ~0 */
2863 if (x == 1 && y == 1)
2864 append_rate(~0, x, y);
2865 else
2866 append_rate(sample_counts, x, y);
2867 }
2868 }
2869 }
2870
2871 #undef append_rate
2872
2873 return vk_outarray_status(&out);
2874 }
2875
2876 static VkComponentTypeKHR
convert_component_type(enum intel_cooperative_matrix_component_type t)2877 convert_component_type(enum intel_cooperative_matrix_component_type t)
2878 {
2879 switch (t) {
2880 case INTEL_CMAT_FLOAT16: return VK_COMPONENT_TYPE_FLOAT16_KHR;
2881 case INTEL_CMAT_FLOAT32: return VK_COMPONENT_TYPE_FLOAT32_KHR;
2882 case INTEL_CMAT_SINT32: return VK_COMPONENT_TYPE_SINT32_KHR;
2883 case INTEL_CMAT_SINT8: return VK_COMPONENT_TYPE_SINT8_KHR;
2884 case INTEL_CMAT_UINT32: return VK_COMPONENT_TYPE_UINT32_KHR;
2885 case INTEL_CMAT_UINT8: return VK_COMPONENT_TYPE_UINT8_KHR;
2886 }
2887 unreachable("invalid cooperative matrix component type in configuration");
2888 }
2889
2890 static VkScopeKHR
convert_scope(enum intel_cmat_scope scope)2891 convert_scope(enum intel_cmat_scope scope)
2892 {
2893 switch (scope) {
2894 case INTEL_CMAT_SCOPE_SUBGROUP: return VK_SCOPE_SUBGROUP_KHR;
2895 default:
2896 unreachable("invalid cooperative matrix scope in configuration");
2897 }
2898 }
2899
anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2900 VkResult anv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(
2901 VkPhysicalDevice physicalDevice,
2902 uint32_t* pPropertyCount,
2903 VkCooperativeMatrixPropertiesKHR* pProperties)
2904 {
2905 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2906 const struct intel_device_info *devinfo = &pdevice->info;
2907
2908 assert(anv_has_cooperative_matrix(pdevice));
2909
2910 VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2911
2912 for (int i = 0; i < ARRAY_SIZE(devinfo->cooperative_matrix_configurations); i++) {
2913 const struct intel_cooperative_matrix_configuration *cfg =
2914 &devinfo->cooperative_matrix_configurations[i];
2915
2916 if (cfg->scope == INTEL_CMAT_SCOPE_NONE)
2917 break;
2918
2919 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
2920 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
2921
2922 prop->MSize = cfg->m;
2923 prop->NSize = cfg->n;
2924 prop->KSize = cfg->k;
2925
2926 prop->AType = convert_component_type(cfg->a);
2927 prop->BType = convert_component_type(cfg->b);
2928 prop->CType = convert_component_type(cfg->c);
2929 prop->ResultType = convert_component_type(cfg->result);
2930
2931 prop->saturatingAccumulation = VK_FALSE;
2932 prop->scope = convert_scope(cfg->scope);
2933 }
2934
2935 /* VUID-RuntimeSpirv-saturatingAccumulation-08983 says:
2936 *
2937 * For OpCooperativeMatrixMulAddKHR, the SaturatingAccumulation
2938 * cooperative matrix operand must be present if and only if
2939 * VkCooperativeMatrixPropertiesKHR::saturatingAccumulation is
2940 * VK_TRUE.
2941 *
2942 * As a result, we have to advertise integer configs both with and
2943 * without this flag set.
2944 *
2945 * The DPAS instruction does not support the .sat modifier, so only
2946 * advertise the configurations when the DPAS would be lowered.
2947 *
2948 * FINISHME: It should be possible to do better than full lowering on
2949 * platforms that support DPAS. Emit a DPAS with a NULL accumulator
2950 * argument, then perform the correct sequence of saturating add
2951 * instructions.
2952 */
2953 if (cfg->a != INTEL_CMAT_FLOAT16 &&
2954 (devinfo->verx10 < 125 || debug_get_bool_option("INTEL_LOWER_DPAS", false))) {
2955 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, prop) {
2956 prop->sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR;
2957
2958 prop->MSize = cfg->m;
2959 prop->NSize = cfg->n;
2960 prop->KSize = cfg->k;
2961
2962 prop->AType = convert_component_type(cfg->a);
2963 prop->BType = convert_component_type(cfg->b);
2964 prop->CType = convert_component_type(cfg->c);
2965 prop->ResultType = convert_component_type(cfg->result);
2966
2967 prop->saturatingAccumulation = VK_TRUE;
2968 prop->scope = convert_scope(cfg->scope);
2969 }
2970 }
2971 }
2972
2973 return vk_outarray_status(&out);
2974 }
2975
2976 static const VkTimeDomainKHR anv_time_domains[] = {
2977 VK_TIME_DOMAIN_DEVICE_KHR,
2978 VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
2979 #ifdef CLOCK_MONOTONIC_RAW
2980 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
2981 #endif
2982 };
2983
anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)2984 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
2985 VkPhysicalDevice physicalDevice,
2986 uint32_t *pTimeDomainCount,
2987 VkTimeDomainKHR *pTimeDomains)
2988 {
2989 int d;
2990 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
2991
2992 for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
2993 vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
2994 *i = anv_time_domains[d];
2995 }
2996 }
2997
2998 return vk_outarray_status(&out);
2999 }
3000