1 /*
2 * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3 * SPDX-License-Identifier: MIT
4 */
5 #include "nvk_physical_device.h"
6
7 #include "nak.h"
8 #include "nvk_buffer.h"
9 #include "nvk_descriptor_types.h"
10 #include "nvk_entrypoints.h"
11 #include "nvk_format.h"
12 #include "nvk_image.h"
13 #include "nvk_image_view.h"
14 #include "nvk_instance.h"
15 #include "nvk_sampler.h"
16 #include "nvk_shader.h"
17 #include "nvk_wsi.h"
18 #include "nvkmd/nvkmd.h"
19 #include "nvkmd/nouveau/nvkmd_nouveau.h"
20 #include "git_sha1.h"
21 #include "util/disk_cache.h"
22 #include "util/mesa-sha1.h"
23
24 #include "vk_device.h"
25 #include "vk_drm_syncobj.h"
26 #include "vk_shader_module.h"
27 #include "vulkan/wsi/wsi_common.h"
28
29 #include <sys/sysmacros.h>
30
31 #include "cl90c0.h"
32 #include "cl91c0.h"
33 #include "cla097.h"
34 #include "cla0c0.h"
35 #include "cla1c0.h"
36 #include "clb097.h"
37 #include "clb0c0.h"
38 #include "clb197.h"
39 #include "clb1c0.h"
40 #include "clc097.h"
41 #include "clc0c0.h"
42 #include "clc1c0.h"
43 #include "clc397.h"
44 #include "clc3c0.h"
45 #include "clc597.h"
46 #include "clc5c0.h"
47 #include "clc997.h"
48
49 static bool
nvk_use_nak(const struct nv_device_info * info)50 nvk_use_nak(const struct nv_device_info *info)
51 {
52 const VkShaderStageFlags vk10_stages =
53 VK_SHADER_STAGE_VERTEX_BIT |
54 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
55 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
56 VK_SHADER_STAGE_GEOMETRY_BIT |
57 VK_SHADER_STAGE_FRAGMENT_BIT |
58 VK_SHADER_STAGE_COMPUTE_BIT;
59
60 return !(vk10_stages & ~nvk_nak_stages(info));
61 }
62
63 static uint32_t
nvk_get_vk_version(const struct nv_device_info * info)64 nvk_get_vk_version(const struct nv_device_info *info)
65 {
66 /* Version override takes priority */
67 const uint32_t version_override = vk_get_version_override();
68 if (version_override)
69 return version_override;
70
71 /* If we're using codegen for anything, lock to version 1.0 */
72 if (!nvk_use_nak(info))
73 return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
74
75 return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
76 }
77
78 static void
nvk_get_device_extensions(const struct nvk_instance * instance,const struct nv_device_info * info,bool has_tiled_bos,struct vk_device_extension_table * ext)79 nvk_get_device_extensions(const struct nvk_instance *instance,
80 const struct nv_device_info *info,
81 bool has_tiled_bos,
82 struct vk_device_extension_table *ext)
83 {
84 *ext = (struct vk_device_extension_table) {
85 .KHR_8bit_storage = true,
86 .KHR_16bit_storage = true,
87 .KHR_bind_memory2 = true,
88 .KHR_buffer_device_address = true,
89 .KHR_calibrated_timestamps = true,
90 .KHR_compute_shader_derivatives = nvk_use_nak(info),
91 .KHR_copy_commands2 = true,
92 .KHR_create_renderpass2 = true,
93 .KHR_dedicated_allocation = true,
94 .KHR_depth_stencil_resolve = true,
95 .KHR_descriptor_update_template = true,
96 .KHR_device_group = true,
97 .KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
98 .KHR_driver_properties = true,
99 .KHR_dynamic_rendering = true,
100 .KHR_dynamic_rendering_local_read = true,
101 .KHR_external_fence = true,
102 .KHR_external_fence_fd = true,
103 .KHR_external_memory = true,
104 .KHR_external_memory_fd = true,
105 .KHR_external_semaphore = true,
106 .KHR_external_semaphore_fd = true,
107 .KHR_format_feature_flags2 = true,
108 .KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
109 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
110 .KHR_get_memory_requirements2 = true,
111 .KHR_image_format_list = true,
112 .KHR_imageless_framebuffer = true,
113 #ifdef NVK_USE_WSI_PLATFORM
114 .KHR_incremental_present = true,
115 #endif
116 .KHR_index_type_uint8 = true,
117 .KHR_line_rasterization = true,
118 .KHR_load_store_op_none = true,
119 .KHR_maintenance1 = true,
120 .KHR_maintenance2 = true,
121 .KHR_maintenance3 = true,
122 .KHR_maintenance4 = true,
123 .KHR_maintenance5 = true,
124 .KHR_maintenance6 = true,
125 .KHR_maintenance7 = true,
126 .KHR_map_memory2 = true,
127 .KHR_multiview = true,
128 .KHR_pipeline_executable_properties = true,
129 .KHR_pipeline_library = true,
130 #ifdef NVK_USE_WSI_PLATFORM
131 /* Hide these behind dri configs for now since we cannot implement it
132 * reliably on all surfaces yet. There is no surface capability query
133 * for present wait/id, but the feature is useful enough to hide behind
134 * an opt-in mechanism for now. If the instance only enables surface
135 * extensions that unconditionally support present wait, we can also
136 * expose the extension that way.
137 */
138 .KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
139 wsi_common_vk_instance_supports_present_wait(&instance->vk),
140 .KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
141 wsi_common_vk_instance_supports_present_wait(&instance->vk),
142 #endif
143 .KHR_push_descriptor = true,
144 .KHR_relaxed_block_layout = true,
145 .KHR_sampler_mirror_clamp_to_edge = true,
146 .KHR_sampler_ycbcr_conversion = true,
147 .KHR_separate_depth_stencil_layouts = true,
148 .KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
149 nvk_use_nak(info),
150 .KHR_shader_clock = true,
151 .KHR_shader_draw_parameters = true,
152 .KHR_shader_expect_assume = true,
153 .KHR_shader_float_controls = true,
154 .KHR_shader_float16_int8 = true,
155 .KHR_shader_integer_dot_product = true,
156 .KHR_shader_maximal_reconvergence = true,
157 .KHR_shader_non_semantic_info = true,
158 .KHR_shader_relaxed_extended_instruction = true,
159 .KHR_shader_subgroup_extended_types = true,
160 .KHR_shader_subgroup_rotate = nvk_use_nak(info),
161 .KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
162 .KHR_shader_terminate_invocation =
163 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
164 .KHR_spirv_1_4 = true,
165 .KHR_storage_buffer_storage_class = true,
166 .KHR_timeline_semaphore = true,
167 #ifdef NVK_USE_WSI_PLATFORM
168 .KHR_swapchain = true,
169 .KHR_swapchain_mutable_format = true,
170 #endif
171 .KHR_synchronization2 = true,
172 .KHR_uniform_buffer_standard_layout = true,
173 .KHR_variable_pointers = true,
174 .KHR_vertex_attribute_divisor = true,
175 .KHR_vulkan_memory_model = nvk_use_nak(info),
176 .KHR_workgroup_memory_explicit_layout = true,
177 .KHR_zero_initialize_workgroup_memory = true,
178 .EXT_4444_formats = true,
179 .EXT_attachment_feedback_loop_layout = true,
180 .EXT_border_color_swizzle = true,
181 .EXT_buffer_device_address = true,
182 .EXT_calibrated_timestamps = true,
183 .EXT_conditional_rendering = true,
184 .EXT_conservative_rasterization = info->cls_eng3d >= MAXWELL_B,
185 .EXT_color_write_enable = true,
186 .EXT_custom_border_color = true,
187 .EXT_depth_bias_control = true,
188 .EXT_depth_clip_control = true,
189 .EXT_depth_clip_enable = true,
190 .EXT_depth_range_unrestricted = info->cls_eng3d >= VOLTA_A,
191 .EXT_descriptor_buffer = true,
192 .EXT_descriptor_indexing = true,
193 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
194 .EXT_display_control = true,
195 #endif
196 .EXT_image_drm_format_modifier = has_tiled_bos,
197 .EXT_dynamic_rendering_unused_attachments = true,
198 .EXT_extended_dynamic_state = true,
199 .EXT_extended_dynamic_state2 = true,
200 .EXT_extended_dynamic_state3 = true,
201 .EXT_external_memory_dma_buf = true,
202 .EXT_graphics_pipeline_library = true,
203 .EXT_host_query_reset = true,
204 .EXT_image_2d_view_of_3d = true,
205 .EXT_image_robustness = true,
206 .EXT_image_sliced_view_of_3d = true,
207 .EXT_image_view_min_lod = true,
208 .EXT_index_type_uint8 = true,
209 .EXT_inline_uniform_block = true,
210 .EXT_legacy_vertex_attributes = true,
211 .EXT_line_rasterization = true,
212 .EXT_load_store_op_none = true,
213 .EXT_map_memory_placed = true,
214 .EXT_memory_budget = true,
215 .EXT_multi_draw = true,
216 .EXT_mutable_descriptor_type = true,
217 .EXT_nested_command_buffer = true,
218 .EXT_non_seamless_cube_map = true,
219 .EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
220 .EXT_pipeline_creation_cache_control = true,
221 .EXT_pipeline_creation_feedback = true,
222 .EXT_pipeline_robustness = true,
223 .EXT_physical_device_drm = true,
224 .EXT_post_depth_coverage = true,
225 .EXT_primitive_topology_list_restart = true,
226 .EXT_private_data = true,
227 .EXT_primitives_generated_query = true,
228 .EXT_provoking_vertex = true,
229 .EXT_queue_family_foreign = true,
230 .EXT_robustness2 = true,
231 .EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
232 .EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
233 .EXT_scalar_block_layout = nvk_use_nak(info),
234 .EXT_separate_stencil_usage = true,
235 .EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
236 nvk_use_nak(info),
237 .EXT_shader_demote_to_helper_invocation = true,
238 .EXT_shader_module_identifier = true,
239 .EXT_shader_object = true,
240 .EXT_shader_replicated_composites = true,
241 .EXT_shader_subgroup_ballot = true,
242 .EXT_shader_subgroup_vote = true,
243 .EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
244 .EXT_subgroup_size_control = true,
245 #ifdef NVK_USE_WSI_PLATFORM
246 .EXT_swapchain_maintenance1 = true,
247 #endif
248 .EXT_texel_buffer_alignment = true,
249 .EXT_tooling_info = true,
250 .EXT_transform_feedback = true,
251 .EXT_vertex_attribute_divisor = true,
252 .EXT_vertex_input_dynamic_state = true,
253 .EXT_ycbcr_2plane_444_formats = true,
254 .EXT_ycbcr_image_arrays = true,
255 .GOOGLE_decorate_string = true,
256 .GOOGLE_hlsl_functionality1 = true,
257 .GOOGLE_user_type = true,
258 .NV_compute_shader_derivatives = nvk_use_nak(info),
259 .NV_shader_sm_builtins = true,
260 .VALVE_mutable_descriptor_type = true,
261 };
262 }
263
264 static void
nvk_get_device_features(const struct nv_device_info * info,const struct vk_device_extension_table * supported_extensions,struct vk_features * features)265 nvk_get_device_features(const struct nv_device_info *info,
266 const struct vk_device_extension_table *supported_extensions,
267 struct vk_features *features)
268 {
269 *features = (struct vk_features) {
270 /* Vulkan 1.0 */
271 .robustBufferAccess = true,
272 .fullDrawIndexUint32 = true,
273 .imageCubeArray = true,
274 .independentBlend = true,
275 .geometryShader = true,
276 .tessellationShader = true,
277 .sampleRateShading = true,
278 .dualSrcBlend = true,
279 .logicOp = true,
280 .multiDrawIndirect = true,
281 .drawIndirectFirstInstance = true,
282 .depthClamp = true,
283 .depthBiasClamp = true,
284 .fillModeNonSolid = true,
285 .depthBounds = true,
286 .wideLines = true,
287 .largePoints = true,
288 .alphaToOne = true,
289 .multiViewport = true,
290 .samplerAnisotropy = true,
291 .textureCompressionETC2 = false,
292 .textureCompressionBC = true,
293 .textureCompressionASTC_LDR = false,
294 .occlusionQueryPrecise = true,
295 .pipelineStatisticsQuery = true,
296 .vertexPipelineStoresAndAtomics = true,
297 .fragmentStoresAndAtomics = true,
298 .shaderTessellationAndGeometryPointSize = true,
299 .shaderImageGatherExtended = true,
300 .shaderStorageImageExtendedFormats = true,
301 .shaderStorageImageMultisample = true,
302 .shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
303 .shaderStorageImageWriteWithoutFormat = true,
304 .shaderUniformBufferArrayDynamicIndexing = true,
305 .shaderSampledImageArrayDynamicIndexing = true,
306 .shaderStorageBufferArrayDynamicIndexing = true,
307 .shaderStorageImageArrayDynamicIndexing = true,
308 .shaderClipDistance = true,
309 .shaderCullDistance = true,
310 .shaderFloat64 = true,
311 .shaderInt64 = true,
312 .shaderInt16 = true,
313 .shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
314 .shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
315 .sparseBinding = true,
316 .sparseResidency2Samples = info->cls_eng3d >= MAXWELL_B,
317 .sparseResidency4Samples = info->cls_eng3d >= MAXWELL_B,
318 .sparseResidency8Samples = info->cls_eng3d >= MAXWELL_B,
319 .sparseResidencyAliased = info->cls_eng3d >= MAXWELL_B,
320 .sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_B,
321 .sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_B,
322 .sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_B,
323 .variableMultisampleRate = true,
324 .inheritedQueries = true,
325
326 /* Vulkan 1.1 */
327 .storageBuffer16BitAccess = true,
328 .uniformAndStorageBuffer16BitAccess = true,
329 .storagePushConstant16 = true,
330 .multiview = true,
331 .multiviewGeometryShader = true,
332 .multiviewTessellationShader = true,
333 .variablePointersStorageBuffer = true,
334 .variablePointers = true,
335 .shaderDrawParameters = true,
336 .samplerYcbcrConversion = true,
337
338 /* Vulkan 1.2 */
339 .samplerMirrorClampToEdge = true,
340 .drawIndirectCount = info->cls_eng3d >= TURING_A,
341 .storageBuffer8BitAccess = true,
342 .uniformAndStorageBuffer8BitAccess = true,
343 .storagePushConstant8 = true,
344 .shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
345 nvk_use_nak(info),
346 .shaderSharedInt64Atomics = false, /* TODO */
347 /* TODO: Fp16 is currently busted on Turing and Volta due to instruction
348 * scheduling issues. Re-enable it once those are sorted.
349 */
350 .shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
351 .shaderInt8 = true,
352 .descriptorIndexing = true,
353 .shaderInputAttachmentArrayDynamicIndexing = true,
354 .shaderUniformTexelBufferArrayDynamicIndexing = true,
355 .shaderStorageTexelBufferArrayDynamicIndexing = true,
356 .shaderUniformBufferArrayNonUniformIndexing = true,
357 .shaderSampledImageArrayNonUniformIndexing = true,
358 .shaderStorageBufferArrayNonUniformIndexing = true,
359 .shaderStorageImageArrayNonUniformIndexing = true,
360 .shaderInputAttachmentArrayNonUniformIndexing = true,
361 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
362 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
363 .descriptorBindingUniformBufferUpdateAfterBind = true,
364 .descriptorBindingSampledImageUpdateAfterBind = true,
365 .descriptorBindingStorageImageUpdateAfterBind = true,
366 .descriptorBindingStorageBufferUpdateAfterBind = true,
367 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
368 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
369 .descriptorBindingUpdateUnusedWhilePending = true,
370 .descriptorBindingPartiallyBound = true,
371 .descriptorBindingVariableDescriptorCount = true,
372 .runtimeDescriptorArray = true,
373 .samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
374 .scalarBlockLayout = nvk_use_nak(info),
375 .imagelessFramebuffer = true,
376 .uniformBufferStandardLayout = true,
377 .shaderSubgroupExtendedTypes = true,
378 .separateDepthStencilLayouts = true,
379 .hostQueryReset = true,
380 .timelineSemaphore = true,
381 .bufferDeviceAddress = true,
382 .bufferDeviceAddressCaptureReplay = true,
383 .bufferDeviceAddressMultiDevice = false,
384 .vulkanMemoryModel = nvk_use_nak(info),
385 .vulkanMemoryModelDeviceScope = nvk_use_nak(info),
386 .vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
387 .shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
388 .shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
389 .subgroupBroadcastDynamicId = nvk_use_nak(info),
390
391 /* Vulkan 1.3 */
392 .robustImageAccess = true,
393 .inlineUniformBlock = true,
394 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
395 .pipelineCreationCacheControl = true,
396 .privateData = true,
397 .shaderDemoteToHelperInvocation = true,
398 .shaderTerminateInvocation = true,
399 .subgroupSizeControl = true,
400 .computeFullSubgroups = true,
401 .synchronization2 = true,
402 .shaderZeroInitializeWorkgroupMemory = true,
403 .dynamicRendering = true,
404 .shaderIntegerDotProduct = true,
405 .maintenance4 = true,
406
407 /* VK_KHR_compute_shader_derivatives */
408 .computeDerivativeGroupQuads = true,
409 .computeDerivativeGroupLinear = true,
410
411 /* VK_KHR_dynamic_rendering_local_read */
412 .dynamicRenderingLocalRead = true,
413
414 /* VK_KHR_fragment_shader_barycentric */
415 .fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
416 (nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
417
418 /* VK_KHR_index_type_uint8 */
419 .indexTypeUint8 = true,
420
421 /* VK_KHR_line_rasterization */
422 .rectangularLines = true,
423 .bresenhamLines = true,
424 .smoothLines = true,
425 .stippledRectangularLines = true,
426 .stippledBresenhamLines = true,
427 .stippledSmoothLines = true,
428
429 /* VK_KHR_maintenance5 */
430 .maintenance5 = true,
431
432 /* VK_KHR_maintenance6 */
433 .maintenance6 = true,
434
435 /* VK_KHR_maintenance7 */
436 .maintenance7 = true,
437
438 /* VK_KHR_pipeline_executable_properties */
439 .pipelineExecutableInfo = true,
440
441 /* VK_KHR_present_id */
442 .presentId = supported_extensions->KHR_present_id,
443
444 /* VK_KHR_present_wait */
445 .presentWait = supported_extensions->KHR_present_wait,
446
447 /* VK_KHR_shader_clock */
448 .shaderSubgroupClock = true,
449 .shaderDeviceClock = true,
450
451 /* VK_KHR_shader_expect_assume */
452 .shaderExpectAssume = true,
453
454 /* VK_KHR_shader_maximal_reconvergence */
455 .shaderMaximalReconvergence = true,
456
457 /* VK_KHR_shader_subgroup_rotate */
458 .shaderSubgroupRotate = nvk_use_nak(info),
459 .shaderSubgroupRotateClustered = nvk_use_nak(info),
460
461 /* VK_KHR_vertex_attribute_divisor */
462 .vertexAttributeInstanceRateDivisor = true,
463 .vertexAttributeInstanceRateZeroDivisor = true,
464
465 /* VK_KHR_workgroup_memory_explicit_layout */
466 .workgroupMemoryExplicitLayout = true,
467 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
468 .workgroupMemoryExplicitLayout8BitAccess = nvk_use_nak(info),
469 .workgroupMemoryExplicitLayout16BitAccess = nvk_use_nak(info),
470
471 /* VK_EXT_4444_formats */
472 .formatA4R4G4B4 = true,
473 .formatA4B4G4R4 = true,
474
475 /* VK_EXT_attachment_feedback_loop_layout */
476 .attachmentFeedbackLoopLayout = true,
477
478 /* VK_EXT_border_color_swizzle */
479 .borderColorSwizzle = true,
480 .borderColorSwizzleFromImage = false,
481
482 /* VK_EXT_buffer_device_address */
483 .bufferDeviceAddressCaptureReplayEXT = true,
484
485 /* VK_EXT_color_write_enable */
486 .colorWriteEnable = true,
487
488 /* VK_EXT_conditional_rendering */
489 .conditionalRendering = true,
490 .inheritedConditionalRendering = true,
491
492 /* VK_EXT_custom_border_color */
493 .customBorderColors = true,
494 .customBorderColorWithoutFormat = true,
495
496 /* VK_EXT_depth_bias_control */
497 .depthBiasControl = true,
498 .leastRepresentableValueForceUnormRepresentation = true,
499 .floatRepresentation = false,
500 .depthBiasExact = true,
501
502 /* VK_EXT_depth_clip_control */
503 .depthClipControl = true,
504
505 /* VK_EXT_depth_clip_enable */
506 .depthClipEnable = true,
507
508 /* VK_EXT_descriptor_buffer */
509 .descriptorBuffer = true,
510 .descriptorBufferCaptureReplay = true,
511 .descriptorBufferImageLayoutIgnored = true,
512 .descriptorBufferPushDescriptors = false,
513
514 /* VK_EXT_dynamic_rendering_unused_attachments */
515 .dynamicRenderingUnusedAttachments = true,
516
517 /* VK_EXT_extended_dynamic_state */
518 .extendedDynamicState = true,
519
520 /* VK_EXT_extended_dynamic_state2 */
521 .extendedDynamicState2 = true,
522 .extendedDynamicState2LogicOp = true,
523 .extendedDynamicState2PatchControlPoints = true,
524
525 /* VK_EXT_extended_dynamic_state3 */
526 .extendedDynamicState3TessellationDomainOrigin = true,
527 .extendedDynamicState3DepthClampEnable = true,
528 .extendedDynamicState3PolygonMode = true,
529 .extendedDynamicState3RasterizationSamples = true,
530 .extendedDynamicState3SampleMask = true,
531 .extendedDynamicState3AlphaToCoverageEnable = true,
532 .extendedDynamicState3AlphaToOneEnable = true,
533 .extendedDynamicState3LogicOpEnable = true,
534 .extendedDynamicState3ColorBlendEnable = true,
535 .extendedDynamicState3ColorBlendEquation = true,
536 .extendedDynamicState3ColorWriteMask = true,
537 .extendedDynamicState3RasterizationStream = true,
538 .extendedDynamicState3ConservativeRasterizationMode = false,
539 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
540 .extendedDynamicState3DepthClipEnable = true,
541 .extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
542 .extendedDynamicState3ColorBlendAdvanced = false,
543 .extendedDynamicState3ProvokingVertexMode = true,
544 .extendedDynamicState3LineRasterizationMode = true,
545 .extendedDynamicState3LineStippleEnable = true,
546 .extendedDynamicState3DepthClipNegativeOneToOne = true,
547 .extendedDynamicState3ViewportWScalingEnable = false,
548 .extendedDynamicState3ViewportSwizzle = false,
549 .extendedDynamicState3CoverageToColorEnable = false,
550 .extendedDynamicState3CoverageToColorLocation = false,
551 .extendedDynamicState3CoverageModulationMode = false,
552 .extendedDynamicState3CoverageModulationTableEnable = false,
553 .extendedDynamicState3CoverageModulationTable = false,
554 .extendedDynamicState3CoverageReductionMode = false,
555 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
556 .extendedDynamicState3ShadingRateImageEnable = false,
557
558 /* VK_EXT_graphics_pipeline_library */
559 .graphicsPipelineLibrary = true,
560
561 /* VK_EXT_image_2d_view_of_3d */
562 .image2DViewOf3D = true,
563 .sampler2DViewOf3D = true,
564
565 /* VK_EXT_image_sliced_view_of_3d */
566 .imageSlicedViewOf3D = true,
567
568 #ifdef NVK_USE_WSI_PLATFORM
569 /* VK_EXT_swapchain_maintenance1 */
570 .swapchainMaintenance1 = true,
571 #endif
572
573 /* VK_EXT_image_view_min_lod */
574 .minLod = true,
575
576 /* VK_EXT_legacy_vertex_attributes */
577 .legacyVertexAttributes = true,
578
579 /* VK_EXT_map_memory_placed */
580 .memoryMapPlaced = true,
581 .memoryMapRangePlaced = false,
582 .memoryUnmapReserve = true,
583
584 /* VK_EXT_multi_draw */
585 .multiDraw = true,
586
587 /* VK_EXT_mutable_descriptor_type */
588 .mutableDescriptorType = true,
589
590 /* VK_EXT_nested_command_buffer */
591 .nestedCommandBuffer = true,
592 .nestedCommandBufferRendering = true,
593 .nestedCommandBufferSimultaneousUse = true,
594
595 /* VK_EXT_non_seamless_cube_map */
596 .nonSeamlessCubeMap = true,
597
598 /* VK_EXT_pipeline_robustness */
599 .pipelineRobustness = true,
600
601 /* VK_EXT_primitive_topology_list_restart */
602 .primitiveTopologyListRestart = true,
603 .primitiveTopologyPatchListRestart = true,
604
605 /* VK_EXT_primitives_generated_query */
606 .primitivesGeneratedQuery = true,
607 .primitivesGeneratedQueryWithNonZeroStreams = true,
608 .primitivesGeneratedQueryWithRasterizerDiscard = true,
609
610 /* VK_EXT_provoking_vertex */
611 .provokingVertexLast = true,
612 .transformFeedbackPreservesProvokingVertex = true,
613
614 /* VK_EXT_robustness2 */
615 .robustBufferAccess2 = true,
616 .robustImageAccess2 = true,
617 .nullDescriptor = true,
618
619 /* VK_EXT_shader_image_atomic_int64 */
620 .shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
621 nvk_use_nak(info),
622 .sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
623 nvk_use_nak(info),
624
625 /* VK_EXT_shader_module_identifier */
626 .shaderModuleIdentifier = true,
627
628 /* VK_EXT_shader_object */
629 .shaderObject = true,
630
631 /* VK_EXT_shader_replicated_composites */
632 .shaderReplicatedComposites = true,
633
634 /* VK_KHR_shader_subgroup_uniform_control_flow */
635 .shaderSubgroupUniformControlFlow = nvk_use_nak(info),
636
637 /* VK_EXT_texel_buffer_alignment */
638 .texelBufferAlignment = true,
639
640 /* VK_EXT_transform_feedback */
641 .transformFeedback = true,
642 .geometryStreams = true,
643
644 /* VK_EXT_vertex_input_dynamic_state */
645 .vertexInputDynamicState = true,
646
647 /* VK_EXT_ycbcr_2plane_444_formats */
648 .ycbcr2plane444Formats = true,
649
650 /* VK_EXT_ycbcr_image_arrays */
651 .ycbcrImageArrays = true,
652
653 /* VK_NV_shader_sm_builtins */
654 .shaderSMBuiltins = true,
655
656 /* VK_KHR_shader_relaxed_extended_instruction */
657 .shaderRelaxedExtendedInstruction = true,
658 };
659 }
660
661 static void
nvk_get_device_properties(const struct nvk_instance * instance,const struct nv_device_info * info,bool conformant,struct vk_properties * properties)662 nvk_get_device_properties(const struct nvk_instance *instance,
663 const struct nv_device_info *info,
664 bool conformant,
665 struct vk_properties *properties)
666 {
667 const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
668 VK_SAMPLE_COUNT_2_BIT |
669 VK_SAMPLE_COUNT_4_BIT |
670 VK_SAMPLE_COUNT_8_BIT;
671
672 assert(sample_counts <= (NVK_MAX_SAMPLES << 1) - 1);
673
674 uint64_t os_page_size = 4096;
675 os_get_page_size(&os_page_size);
676
677 *properties = (struct vk_properties) {
678 .apiVersion = nvk_get_vk_version(info),
679 .driverVersion = vk_get_driver_version(),
680 .vendorID = instance->force_vk_vendor != 0 ?
681 instance->force_vk_vendor : NVIDIA_VENDOR_ID,
682 .deviceID = info->device_id,
683 .deviceType = info->type == NV_DEVICE_TYPE_DIS ?
684 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
685 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
686
687 /* Vulkan 1.0 limits */
688 .maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
689 .maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
690 .maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
691 .maxImageDimensionCube = 0x8000,
692 .maxImageArrayLayers = 2048,
693 .maxTexelBufferElements = 128 * 1024 * 1024,
694 .maxUniformBufferRange = 65536,
695 .maxStorageBufferRange = UINT32_MAX,
696 .maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
697 .maxMemoryAllocationCount = 4096,
698 .maxSamplerAllocationCount = 4000,
699 .bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
700 .sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
701 .maxBoundDescriptorSets = NVK_MAX_SETS,
702 .maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
703 .maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
704 .maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
705 .maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
706 .maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
707 .maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
708 .maxPerStageResources = UINT32_MAX,
709 .maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
710 .maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
711 .maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
712 .maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
713 .maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
714 .maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
715 .maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
716 .maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
717 .maxVertexInputAttributes = 32,
718 .maxVertexInputBindings = 32,
719 .maxVertexInputAttributeOffset = 2047,
720 .maxVertexInputBindingStride = 2048,
721 .maxVertexOutputComponents = 128,
722 .maxTessellationGenerationLevel = 64,
723 .maxTessellationPatchSize = 32,
724 .maxTessellationControlPerVertexInputComponents = 128,
725 .maxTessellationControlPerVertexOutputComponents = 128,
726 .maxTessellationControlPerPatchOutputComponents = 120,
727 .maxTessellationControlTotalOutputComponents = 4216,
728 .maxTessellationEvaluationInputComponents = 128,
729 .maxTessellationEvaluationOutputComponents = 128,
730 .maxGeometryShaderInvocations = 32,
731 .maxGeometryInputComponents = 128,
732 .maxGeometryOutputComponents = 128,
733 .maxGeometryOutputVertices = 1024,
734 .maxGeometryTotalOutputComponents = 1024,
735 .maxFragmentInputComponents = 128,
736 .maxFragmentOutputAttachments = NVK_MAX_RTS,
737 .maxFragmentDualSrcAttachments = 1,
738 .maxFragmentCombinedOutputResources = 16,
739 .maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
740 .maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
741 .maxComputeWorkGroupInvocations = 1024,
742 .maxComputeWorkGroupSize = {1024, 1024, 64},
743 .subPixelPrecisionBits = 8,
744 .subTexelPrecisionBits = 8,
745 .mipmapPrecisionBits = 8,
746 .maxDrawIndexedIndexValue = UINT32_MAX,
747 .maxDrawIndirectCount = UINT32_MAX,
748 .maxSamplerLodBias = 15,
749 .maxSamplerAnisotropy = 16,
750 .maxViewports = NVK_MAX_VIEWPORTS,
751 .maxViewportDimensions = { 32768, 32768 },
752 .viewportBoundsRange = { -65536, 65536 },
753 .viewportSubPixelBits = 8,
754 .minMemoryMapAlignment = os_page_size,
755 .minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
756 .minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
757 .minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
758 .minTexelOffset = -8,
759 .maxTexelOffset = 7,
760 .minTexelGatherOffset = -32,
761 .maxTexelGatherOffset = 31,
762 .minInterpolationOffset = -0.5,
763 .maxInterpolationOffset = 0.4375,
764 .subPixelInterpolationOffsetBits = 4,
765 .maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
766 .maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
767 .maxFramebufferLayers = 2048,
768 .framebufferColorSampleCounts = sample_counts,
769 .framebufferDepthSampleCounts = sample_counts,
770 .framebufferNoAttachmentsSampleCounts = sample_counts,
771 .framebufferStencilSampleCounts = sample_counts,
772 .maxColorAttachments = NVK_MAX_RTS,
773 .sampledImageColorSampleCounts = sample_counts,
774 .sampledImageIntegerSampleCounts = sample_counts,
775 .sampledImageDepthSampleCounts = sample_counts,
776 .sampledImageStencilSampleCounts = sample_counts,
777 .storageImageSampleCounts = sample_counts,
778 .maxSampleMaskWords = 1,
779 .timestampComputeAndGraphics = true,
780 .timestampPeriod = 1,
781 .maxClipDistances = 8,
782 .maxCullDistances = 8,
783 .maxCombinedClipAndCullDistances = 8,
784 .discreteQueuePriorities = 2,
785 .pointSizeRange = { 1.0, 2047.94 },
786 .lineWidthRange = { 1, 64 },
787 .pointSizeGranularity = 0.0625,
788 .lineWidthGranularity = 0.0625,
789 .strictLines = true,
790 .standardSampleLocations = true,
791 .optimalBufferCopyOffsetAlignment = 1,
792 .optimalBufferCopyRowPitchAlignment = 1,
793 .nonCoherentAtomSize = 64,
794
795 /* Vulkan 1.0 sparse properties */
796 .sparseResidencyNonResidentStrict = true,
797 .sparseResidencyAlignedMipSize = info->cls_eng3d < MAXWELL_B, /* DXVK/vkd3d-proton requires this to be advertised as VK_FALSE for FL12 */
798 .sparseResidencyStandard2DBlockShape = true,
799 .sparseResidencyStandard2DMultisampleBlockShape = true,
800 .sparseResidencyStandard3DBlockShape = true,
801
802 /* Vulkan 1.1 properties */
803 .subgroupSize = 32,
804 .subgroupSupportedStages = nvk_nak_stages(info),
805 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
806 VK_SUBGROUP_FEATURE_BALLOT_BIT |
807 VK_SUBGROUP_FEATURE_BASIC_BIT |
808 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
809 VK_SUBGROUP_FEATURE_QUAD_BIT |
810 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
811 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
812 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
813 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
814 VK_SUBGROUP_FEATURE_VOTE_BIT,
815 .subgroupQuadOperationsInAllStages = false,
816 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
817 .maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
818 .maxMultiviewInstanceIndex = UINT32_MAX,
819 .maxPerSetDescriptors = UINT32_MAX,
820 .maxMemoryAllocationSize = (1u << 31),
821
822 /* Vulkan 1.2 properties */
823 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
824 VK_RESOLVE_MODE_AVERAGE_BIT |
825 VK_RESOLVE_MODE_MIN_BIT |
826 VK_RESOLVE_MODE_MAX_BIT,
827 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
828 VK_RESOLVE_MODE_MIN_BIT |
829 VK_RESOLVE_MODE_MAX_BIT,
830 .independentResolveNone = true,
831 .independentResolve = true,
832 .driverID = VK_DRIVER_ID_MESA_NVK,
833 .conformanceVersion =
834 conformant ? (VkConformanceVersion) { 1, 3, 7, 3 }
835 : (VkConformanceVersion) { 0, 0, 0, 0 },
836 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
837 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
838 .shaderSignedZeroInfNanPreserveFloat16 = true,
839 .shaderSignedZeroInfNanPreserveFloat32 = true,
840 .shaderSignedZeroInfNanPreserveFloat64 = true,
841 .shaderDenormPreserveFloat16 = true,
842 .shaderDenormPreserveFloat32 = true,
843 .shaderDenormPreserveFloat64 = true,
844 .shaderDenormFlushToZeroFloat16 = false,
845 .shaderDenormFlushToZeroFloat32 = true,
846 .shaderDenormFlushToZeroFloat64 = false,
847 .shaderRoundingModeRTEFloat16 = true,
848 .shaderRoundingModeRTEFloat32 = true,
849 .shaderRoundingModeRTEFloat64 = true,
850 .shaderRoundingModeRTZFloat16 = false,
851 .shaderRoundingModeRTZFloat32 = true,
852 .shaderRoundingModeRTZFloat64 = true,
853 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
854 .shaderUniformBufferArrayNonUniformIndexingNative = false,
855 .shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
856 .shaderStorageBufferArrayNonUniformIndexingNative = true,
857 .shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
858 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
859 .robustBufferAccessUpdateAfterBind = true,
860 .quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
861 .maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
862 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
863 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
864 .maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
865 .maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
866 .maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
867 .maxPerStageUpdateAfterBindResources = UINT32_MAX,
868 .maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
869 .maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
870 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
871 .maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
872 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
873 .maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
874 .maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
875 .maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
876 .filterMinmaxSingleComponentFormats = true,
877 .filterMinmaxImageComponentMapping = true,
878 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
879 .framebufferIntegerColorSampleCounts = sample_counts,
880
881 /* Vulkan 1.3 properties */
882 .minSubgroupSize = 32,
883 .maxSubgroupSize = 32,
884 .maxComputeWorkgroupSubgroups = 1024 / 32,
885 .requiredSubgroupSizeStages = 0,
886 .maxInlineUniformBlockSize = 1 << 16,
887 .maxPerStageDescriptorInlineUniformBlocks = 32,
888 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
889 .maxDescriptorSetInlineUniformBlocks = 6 * 32,
890 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
891 .maxInlineUniformTotalSize = 1 << 16,
892 .integerDotProduct4x8BitPackedUnsignedAccelerated
893 = info->cls_eng3d >= VOLTA_A,
894 .integerDotProduct4x8BitPackedSignedAccelerated
895 = info->cls_eng3d >= VOLTA_A,
896 .integerDotProduct4x8BitPackedMixedSignednessAccelerated
897 = info->cls_eng3d >= VOLTA_A,
898 .storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
899 .storageTexelBufferOffsetSingleTexelAlignment = true,
900 .uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
901 .uniformTexelBufferOffsetSingleTexelAlignment = true,
902 .maxBufferSize = NVK_MAX_BUFFER_SIZE,
903
904 /* VK_KHR_compute_shader_derivatives */
905 .meshAndTaskShaderDerivatives = false,
906
907 /* VK_KHR_push_descriptor */
908 .maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
909
910 /* VK_EXT_conservative_rasterization */
911 .primitiveOverestimationSize = info->cls_eng3d >= VOLTA_A ? 1.0f / 512.0f : 0.0,
912 .maxExtraPrimitiveOverestimationSize = 0.75,
913 .extraPrimitiveOverestimationSizeGranularity = 0.25,
914 .primitiveUnderestimation = info->cls_eng3d >= VOLTA_A,
915 .conservativePointAndLineRasterization = true,
916 .degenerateLinesRasterized = info->cls_eng3d >= VOLTA_A,
917 .degenerateTrianglesRasterized = info->cls_eng3d >= PASCAL_A,
918 .fullyCoveredFragmentShaderInputVariable = false,
919 .conservativeRasterizationPostDepthCoverage = true,
920
921 /* VK_EXT_custom_border_color */
922 .maxCustomBorderColorSamplers = 4000,
923
924 /* VK_EXT_descriptor_buffer */
925 .combinedImageSamplerDescriptorSingleArray = true,
926 .bufferlessPushDescriptors = false,
927 .allowSamplerImageViewPostSubmitCreation = false,
928 .descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
929 .maxDescriptorBufferBindings = 32,
930 .maxResourceDescriptorBufferBindings = 32,
931 .maxSamplerDescriptorBufferBindings = 32,
932 .maxEmbeddedImmutableSamplerBindings = 32,
933 .maxEmbeddedImmutableSamplers = 4000,
934 .bufferCaptureReplayDescriptorDataSize = 0,
935 .imageCaptureReplayDescriptorDataSize = 0,
936 .imageViewCaptureReplayDescriptorDataSize =
937 sizeof(struct nvk_image_view_capture),
938 .samplerCaptureReplayDescriptorDataSize =
939 sizeof(struct nvk_sampler_capture),
940 .accelerationStructureCaptureReplayDescriptorDataSize = 0, // todo
941 .samplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
942 .combinedImageSamplerDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
943 .sampledImageDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
944 .storageImageDescriptorSize = sizeof(struct nvk_storage_image_descriptor),
945 .uniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
946 .robustUniformTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
947 .storageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
948 .robustStorageTexelBufferDescriptorSize = sizeof(struct nvk_edb_buffer_view_descriptor),
949 .uniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
950 .robustUniformBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
951 .storageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
952 .robustStorageBufferDescriptorSize = sizeof(union nvk_buffer_descriptor),
953 .inputAttachmentDescriptorSize = sizeof(struct nvk_sampled_image_descriptor),
954 .accelerationStructureDescriptorSize = 0,
955 .maxSamplerDescriptorBufferRange = UINT32_MAX,
956 .maxResourceDescriptorBufferRange = UINT32_MAX,
957 .samplerDescriptorBufferAddressSpaceSize = UINT32_MAX,
958 .resourceDescriptorBufferAddressSpaceSize = UINT32_MAX,
959 .descriptorBufferAddressSpaceSize = UINT32_MAX,
960
961 /* VK_EXT_extended_dynamic_state3 */
962 .dynamicPrimitiveTopologyUnrestricted = true,
963
964 /* VK_EXT_graphics_pipeline_library */
965 .graphicsPipelineLibraryFastLinking = true,
966 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
967
968 /* VK_KHR_line_rasterization */
969 .lineSubPixelPrecisionBits = 8,
970
971 /* VK_KHR_maintenance5 */
972 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
973 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
974 .depthStencilSwizzleOneSupport = true,
975 .polygonModePointSize = true,
976 .nonStrictSinglePixelWideLinesUseParallelogram = false,
977 .nonStrictWideLinesUseParallelogram = false,
978
979 /* VK_KHR_maintenance6 */
980 .blockTexelViewCompatibleMultipleLayers = true,
981 .maxCombinedImageSamplerDescriptorCount = 3,
982 .fragmentShadingRateClampCombinerInputs = false, /* TODO */
983
984 /* VK_KHR_maintenance7 */
985 .robustFragmentShadingRateAttachmentAccess = false,
986 .separateDepthStencilAttachmentAccess = false,
987 .maxDescriptorSetTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
988 .maxDescriptorSetTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
989 .maxDescriptorSetTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
990 .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
991 .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
992 .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS,
993
994 /* VK_EXT_legacy_vertex_attributes */
995 .nativeUnalignedPerformance = true,
996
997 /* VK_EXT_map_memory_placed */
998 .minPlacedMemoryMapAlignment = os_page_size,
999
1000 /* VK_EXT_multi_draw */
1001 .maxMultiDrawCount = UINT32_MAX,
1002
1003 /* VK_EXT_nested_command_buffer */
1004 .maxCommandBufferNestingLevel = UINT32_MAX,
1005
1006 /* VK_EXT_pci_bus_info */
1007 .pciDomain = info->pci.domain,
1008 .pciBus = info->pci.bus,
1009 .pciDevice = info->pci.dev,
1010 .pciFunction = info->pci.func,
1011
1012 /* VK_EXT_pipeline_robustness */
1013 .defaultRobustnessStorageBuffers =
1014 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
1015 .defaultRobustnessUniformBuffers =
1016 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT,
1017 .defaultRobustnessVertexInputs =
1018 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT,
1019 .defaultRobustnessImages =
1020 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT,
1021
1022 /* VK_EXT_physical_device_drm gets populated later */
1023
1024 /* VK_EXT_provoking_vertex */
1025 .provokingVertexModePerPipeline = true,
1026 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1027
1028 /* VK_EXT_robustness2 */
1029 .robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
1030 .robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
1031
1032 /* VK_EXT_sample_locations */
1033 .sampleLocationSampleCounts = sample_counts,
1034 .maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
1035 .sampleLocationCoordinateRange[0] = 0.0f,
1036 .sampleLocationCoordinateRange[1] = 0.9375f,
1037 .sampleLocationSubPixelBits = 4,
1038 .variableSampleLocations = true,
1039
1040 /* VK_EXT_shader_object */
1041 .shaderBinaryVersion = 0,
1042
1043 /* VK_EXT_transform_feedback */
1044 .maxTransformFeedbackStreams = 4,
1045 .maxTransformFeedbackBuffers = 4,
1046 .maxTransformFeedbackBufferSize = UINT32_MAX,
1047 .maxTransformFeedbackStreamDataSize = 2048,
1048 .maxTransformFeedbackBufferDataSize = 512,
1049 .maxTransformFeedbackBufferDataStride = 2048,
1050 .transformFeedbackQueries = true,
1051 .transformFeedbackStreamsLinesTriangles = false,
1052 .transformFeedbackRasterizationStreamSelect = true,
1053 .transformFeedbackDraw = true,
1054
1055 /* VK_KHR_vertex_attribute_divisor */
1056 .maxVertexAttribDivisor = UINT32_MAX,
1057 .supportsNonZeroFirstInstance = true,
1058
1059 /* VK_KHR_fragment_shader_barycentric */
1060 .triStripVertexOrderIndependentOfProvokingVertex = false,
1061
1062 /* VK_NV_shader_sm_builtins */
1063 .shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
1064 .shaderWarpsPerSM = info->max_warps_per_mp,
1065 };
1066
1067 /* Add the driver to the device name (like other Mesa drivers do) */
1068 if (!strcmp(info->device_name, info->chipset_name)) {
1069 snprintf(properties->deviceName, sizeof(properties->deviceName),
1070 "NVK %s", info->device_name);
1071 } else {
1072 snprintf(properties->deviceName, sizeof(properties->deviceName),
1073 "%s (NVK %s)", info->device_name, info->chipset_name);
1074 }
1075
1076 /* VK_EXT_shader_module_identifier */
1077 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1078 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1079 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1080 vk_shaderModuleIdentifierAlgorithmUUID,
1081 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1082
1083 nv_device_uuid(info, properties->deviceUUID, VK_UUID_SIZE, true);
1084 STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
1085 memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
1086
1087 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
1088 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1089 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1090 }
1091
1092 static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device * pdev)1093 nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
1094 {
1095 struct nvk_instance *instance = nvk_physical_device_instance(pdev);
1096
1097 struct mesa_sha1 sha_ctx;
1098 _mesa_sha1_init(&sha_ctx);
1099
1100 _mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
1101 sizeof(instance->driver_build_sha));
1102
1103 const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
1104 _mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
1105
1106 unsigned char sha[SHA1_DIGEST_LENGTH];
1107 _mesa_sha1_final(&sha_ctx, sha);
1108
1109 STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
1110 memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
1111 memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
1112
1113 #ifdef ENABLE_SHADER_CACHE
1114 char renderer[10];
1115 ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
1116 pdev->info.chipset);
1117 assert(len == sizeof(renderer) - 2);
1118
1119 char timestamp[41];
1120 _mesa_sha1_format(timestamp, instance->driver_build_sha);
1121
1122 const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
1123 pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
1124 #endif
1125 }
1126
1127 static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device * pdev)1128 nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
1129 {
1130 #ifdef ENABLE_SHADER_CACHE
1131 if (pdev->vk.disk_cache) {
1132 disk_cache_destroy(pdev->vk.disk_cache);
1133 pdev->vk.disk_cache = NULL;
1134 }
1135 #else
1136 assert(pdev->vk.disk_cache == NULL);
1137 #endif
1138 }
1139
1140 static uint64_t
nvk_get_sysmem_heap_size(void)1141 nvk_get_sysmem_heap_size(void)
1142 {
1143 uint64_t sysmem_size_B = 0;
1144 if (!os_get_total_physical_memory(&sysmem_size_B))
1145 return 0;
1146
1147 /* Use 3/4 of total size to avoid swapping */
1148 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1149 }
1150
1151 static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device * pdev)1152 nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
1153 {
1154 uint64_t sysmem_size_B = 0;
1155 if (!os_get_available_system_memory(&sysmem_size_B)) {
1156 vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
1157 return 0;
1158 }
1159
1160 /* Use 3/4 of available to avoid swapping */
1161 return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
1162 }
1163
1164 static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device * pdev)1165 nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
1166 {
1167 const uint64_t used = nvkmd_pdev_get_vram_used(pdev->nvkmd);
1168 if (used > pdev->info.vram_size_B)
1169 return 0;
1170
1171 return pdev->info.vram_size_B - used;
1172 }
1173
1174 VkResult
nvk_create_drm_physical_device(struct vk_instance * _instance,struct _drmDevice * drm_device,struct vk_physical_device ** pdev_out)1175 nvk_create_drm_physical_device(struct vk_instance *_instance,
1176 struct _drmDevice *drm_device,
1177 struct vk_physical_device **pdev_out)
1178 {
1179 struct nvk_instance *instance = (struct nvk_instance *)_instance;
1180 VkResult result;
1181
1182 struct nvkmd_pdev *nvkmd;
1183 result = nvkmd_try_create_pdev_for_drm(drm_device, &instance->vk.base,
1184 instance->debug_flags, &nvkmd);
1185 if (result != VK_SUCCESS)
1186 return result;
1187
1188 /* We don't support anything pre-Kepler */
1189 if (nvkmd->dev_info.cls_eng3d < KEPLER_A) {
1190 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1191 goto fail_nvkmd;
1192 }
1193
1194 bool conformant =
1195 nvkmd->dev_info.type == NV_DEVICE_TYPE_DIS &&
1196 nvkmd->dev_info.cls_eng3d >= TURING_A &&
1197 nvkmd->dev_info.cls_eng3d <= ADA_A;
1198
1199 if (!conformant &&
1200 !debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
1201 #ifdef NDEBUG
1202 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1203 #else
1204 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1205 "WARNING: NVK is not well-tested on %s, pass "
1206 "NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
1207 "if you know what you're doing.",
1208 nvkmd->dev_info.device_name);
1209 #endif
1210 goto fail_nvkmd;
1211 }
1212
1213 if (!conformant)
1214 vk_warn_non_conformant_implementation("NVK");
1215
1216 struct nvk_physical_device *pdev =
1217 vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
1218 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1219
1220 if (pdev == NULL) {
1221 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1222 goto fail_nvkmd;
1223 }
1224
1225 struct vk_physical_device_dispatch_table dispatch_table;
1226 vk_physical_device_dispatch_table_from_entrypoints(
1227 &dispatch_table, &nvk_physical_device_entrypoints, true);
1228 vk_physical_device_dispatch_table_from_entrypoints(
1229 &dispatch_table, &wsi_physical_device_entrypoints, false);
1230
1231 struct vk_device_extension_table supported_extensions;
1232 nvk_get_device_extensions(instance, &nvkmd->dev_info,
1233 nvkmd->kmd_info.has_alloc_tiled,
1234 &supported_extensions);
1235
1236 struct vk_features supported_features;
1237 nvk_get_device_features(&nvkmd->dev_info, &supported_extensions,
1238 &supported_features);
1239
1240 struct vk_properties properties;
1241 nvk_get_device_properties(instance, &nvkmd->dev_info, conformant,
1242 &properties);
1243
1244 if (nvkmd->drm.render_dev) {
1245 properties.drmHasRender = true;
1246 properties.drmRenderMajor = major(nvkmd->drm.render_dev);
1247 properties.drmRenderMinor = minor(nvkmd->drm.render_dev);
1248 }
1249
1250 if (nvkmd->drm.primary_dev) {
1251 properties.drmHasPrimary = true;
1252 properties.drmPrimaryMajor = major(nvkmd->drm.primary_dev);
1253 properties.drmPrimaryMinor = minor(nvkmd->drm.primary_dev);
1254 }
1255
1256 result = vk_physical_device_init(&pdev->vk, &instance->vk,
1257 &supported_extensions,
1258 &supported_features,
1259 &properties,
1260 &dispatch_table);
1261 if (result != VK_SUCCESS)
1262 goto fail_alloc;
1263
1264 pdev->nvkmd = nvkmd;
1265 pdev->info = nvkmd->dev_info;
1266 pdev->debug_flags = instance->debug_flags;
1267
1268 pdev->nak = nak_compiler_create(&pdev->info);
1269 if (pdev->nak == NULL) {
1270 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1271 goto fail_init;
1272 }
1273
1274 nvk_physical_device_init_pipeline_cache(pdev);
1275
1276 uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
1277 if (sysmem_size_B == 0) {
1278 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1279 "Failed to query total system memory");
1280 goto fail_disk_cache;
1281 }
1282
1283 if (pdev->info.vram_size_B > 0) {
1284 uint32_t vram_heap_idx = pdev->mem_heap_count++;
1285 uint32_t bar_heap_idx = vram_heap_idx;
1286 pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
1287 .size = pdev->info.vram_size_B,
1288 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1289 };
1290
1291 if (pdev->info.bar_size_B > 0 &&
1292 pdev->info.bar_size_B < pdev->info.vram_size_B) {
1293 bar_heap_idx = pdev->mem_heap_count++;
1294 pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
1295 .size = pdev->info.bar_size_B,
1296 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
1297 };
1298 }
1299
1300 /* Only set available if we have the ioctl. */
1301 if (nvkmd->kmd_info.has_get_vram_used)
1302 pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
1303
1304 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1305 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
1306 .heapIndex = vram_heap_idx,
1307 };
1308
1309 if (pdev->info.cls_eng3d >= MAXWELL_A) {
1310 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1311 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1312 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1313 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
1314 .heapIndex = bar_heap_idx,
1315 };
1316 }
1317 }
1318
1319 uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
1320 pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
1321 .size = sysmem_size_B,
1322 /* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
1323 .flags = pdev->info.vram_size_B == 0
1324 ? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
1325 : 0,
1326 .available = nvk_get_sysmem_heap_available,
1327 };
1328
1329 pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
1330 /* TODO: What's the right thing to do here on Tegra? */
1331 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1332 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1333 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
1334 .heapIndex = sysmem_heap_idx,
1335 };
1336
1337 assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
1338 assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
1339
1340 pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
1341 .queue_flags = VK_QUEUE_GRAPHICS_BIT |
1342 VK_QUEUE_COMPUTE_BIT |
1343 VK_QUEUE_TRANSFER_BIT |
1344 VK_QUEUE_SPARSE_BINDING_BIT,
1345 .queue_count = 1,
1346 };
1347 assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
1348
1349 pdev->vk.supported_sync_types = nvkmd->sync_types;
1350
1351 result = nvk_init_wsi(pdev);
1352 if (result != VK_SUCCESS)
1353 goto fail_disk_cache;
1354
1355 *pdev_out = &pdev->vk;
1356
1357 return VK_SUCCESS;
1358
1359 fail_disk_cache:
1360 nvk_physical_device_free_disk_cache(pdev);
1361 nak_compiler_destroy(pdev->nak);
1362 fail_init:
1363 vk_physical_device_finish(&pdev->vk);
1364 fail_alloc:
1365 vk_free(&instance->vk.alloc, pdev);
1366 fail_nvkmd:
1367 nvkmd_pdev_destroy(nvkmd);
1368 return result;
1369 }
1370
1371 void
nvk_physical_device_destroy(struct vk_physical_device * vk_pdev)1372 nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
1373 {
1374 struct nvk_physical_device *pdev =
1375 container_of(vk_pdev, struct nvk_physical_device, vk);
1376
1377 nvk_finish_wsi(pdev);
1378 nvk_physical_device_free_disk_cache(pdev);
1379 nak_compiler_destroy(pdev->nak);
1380 nvkmd_pdev_destroy(pdev->nvkmd);
1381 vk_physical_device_finish(&pdev->vk);
1382 vk_free(&pdev->vk.instance->alloc, pdev);
1383 }
1384
1385 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1386 nvk_GetPhysicalDeviceMemoryProperties2(
1387 VkPhysicalDevice physicalDevice,
1388 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1389 {
1390 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1391
1392 pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
1393 for (int i = 0; i < pdev->mem_heap_count; i++) {
1394 pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
1395 .size = pdev->mem_heaps[i].size,
1396 .flags = pdev->mem_heaps[i].flags,
1397 };
1398 }
1399
1400 pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
1401 for (int i = 0; i < pdev->mem_type_count; i++) {
1402 pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
1403 }
1404
1405 vk_foreach_struct(ext, pMemoryProperties->pNext)
1406 {
1407 switch (ext->sType) {
1408 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1409 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
1410
1411 for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
1412 const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
1413 uint64_t used = p_atomic_read(&heap->used);
1414
1415 /* From the Vulkan 1.3.278 spec:
1416 *
1417 * "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1418 * values in which memory usages are returned, with one element
1419 * for each memory heap. A heap’s usage is an estimate of how
1420 * much memory the process is currently using in that heap."
1421 *
1422 * TODO: Include internal allocations?
1423 */
1424 p->heapUsage[i] = used;
1425
1426 uint64_t available = heap->size;
1427 if (heap->available)
1428 available = heap->available(pdev);
1429
1430 /* From the Vulkan 1.3.278 spec:
1431 *
1432 * "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
1433 * values in which memory budgets are returned, with one
1434 * element for each memory heap. A heap’s budget is a rough
1435 * estimate of how much memory the process can allocate from
1436 * that heap before allocations may fail or cause performance
1437 * degradation. The budget includes any currently allocated
1438 * device memory."
1439 *
1440 * and
1441 *
1442 * "The heapBudget value must be less than or equal to
1443 * VkMemoryHeap::size for each heap."
1444 *
1445 * available (queried above) is the total amount free memory
1446 * system-wide and does not include our allocations so we need
1447 * to add that in.
1448 */
1449 uint64_t budget = MIN2(available + used, heap->size);
1450
1451 /* Set the budget at 90% of available to avoid thrashing */
1452 p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
1453 }
1454
1455 /* From the Vulkan 1.3.278 spec:
1456 *
1457 * "The heapBudget and heapUsage values must be zero for array
1458 * elements greater than or equal to
1459 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
1460 * heapBudget value must be non-zero for array elements less than
1461 * VkPhysicalDeviceMemoryProperties::memoryHeapCount."
1462 */
1463 for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
1464 p->heapBudget[i] = 0u;
1465 p->heapUsage[i] = 0u;
1466 }
1467 break;
1468 }
1469 default:
1470 vk_debug_ignored_stype(ext->sType);
1471 break;
1472 }
1473 }
1474 }
1475
1476 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1477 nvk_GetPhysicalDeviceQueueFamilyProperties2(
1478 VkPhysicalDevice physicalDevice,
1479 uint32_t *pQueueFamilyPropertyCount,
1480 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1481 {
1482 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1483 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
1484 pQueueFamilyPropertyCount);
1485
1486 for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
1487 const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
1488
1489 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1490 p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
1491 p->queueFamilyProperties.queueCount = queue_family->queue_count;
1492 p->queueFamilyProperties.timestampValidBits = 64;
1493 p->queueFamilyProperties.minImageTransferGranularity =
1494 (VkExtent3D){1, 1, 1};
1495 }
1496 }
1497 }
1498
1499 static const VkTimeDomainKHR nvk_time_domains[] = {
1500 VK_TIME_DOMAIN_DEVICE_KHR,
1501 VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
1502 #ifdef CLOCK_MONOTONIC_RAW
1503 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
1504 #endif
1505 };
1506
1507 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)1508 nvk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
1509 VkPhysicalDevice physicalDevice,
1510 uint32_t *pTimeDomainCount,
1511 VkTimeDomainKHR *pTimeDomains)
1512 {
1513 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
1514
1515 for (int d = 0; d < ARRAY_SIZE(nvk_time_domains); d++) {
1516 vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
1517 *i = nvk_time_domains[d];
1518 }
1519 }
1520
1521 return vk_outarray_status(&out);
1522 }
1523
1524
1525 VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)1526 nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
1527 VkPhysicalDevice physicalDevice,
1528 VkSampleCountFlagBits samples,
1529 VkMultisamplePropertiesEXT *pMultisampleProperties)
1530 {
1531 VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
1532
1533 if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
1534 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
1535 } else {
1536 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
1537 }
1538 }
1539