1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/sysinfo.h>
30 #include <unistd.h>
31 #include <xf86drm.h>
32
33 #ifdef MAJOR_IN_MKDEV
34 #include <sys/mkdev.h>
35 #endif
36 #ifdef MAJOR_IN_SYSMACROS
37 #include <sys/sysmacros.h>
38 #endif
39
40 #include "v3dv_private.h"
41
42 #include "common/v3d_debug.h"
43
44 #include "compiler/v3d_compiler.h"
45
46 #include "drm-uapi/v3d_drm.h"
47 #include "vk_android.h"
48 #include "vk_drm_syncobj.h"
49 #include "vk_util.h"
50 #include "git_sha1.h"
51
52 #include "util/build_id.h"
53 #include "util/os_file.h"
54 #include "util/u_debug.h"
55 #include "util/format/u_format.h"
56
57 #if DETECT_OS_ANDROID
58 #include "vk_android.h"
59 #include <vndk/hardware_buffer.h>
60 #include "util/u_gralloc/u_gralloc.h"
61 #endif
62
63 #ifdef VK_USE_PLATFORM_XCB_KHR
64 #include <xcb/xcb.h>
65 #include <xcb/dri3.h>
66 #include <X11/Xlib-xcb.h>
67 #endif
68
69 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
70 #include <wayland-client.h>
71 #include "wayland-drm-client-protocol.h"
72 #endif
73
74 #define V3DV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
75
76 #ifdef ANDROID_STRICT
77 #if ANDROID_API_LEVEL <= 32
78 /* Android 12.1 and lower support only Vulkan API v1.1 */
79 #undef V3DV_API_VERSION
80 #define V3DV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
81 #endif
82 #endif
83
84 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceVersion(uint32_t * pApiVersion)85 v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion)
86 {
87 *pApiVersion = V3DV_API_VERSION;
88 return VK_SUCCESS;
89 }
90
91 #if defined(VK_USE_PLATFORM_WIN32_KHR) || \
92 defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
93 defined(VK_USE_PLATFORM_XCB_KHR) || \
94 defined(VK_USE_PLATFORM_XLIB_KHR) || \
95 defined(VK_USE_PLATFORM_DISPLAY_KHR)
96 #define V3DV_USE_WSI_PLATFORM
97 #endif
98
99 static const struct vk_instance_extension_table instance_extensions = {
100 .KHR_device_group_creation = true,
101 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
102 .KHR_display = true,
103 .KHR_get_display_properties2 = true,
104 .EXT_direct_mode_display = true,
105 .EXT_acquire_drm_display = true,
106 #endif
107 .KHR_external_fence_capabilities = true,
108 .KHR_external_memory_capabilities = true,
109 .KHR_external_semaphore_capabilities = true,
110 .KHR_get_physical_device_properties2 = true,
111 #ifdef V3DV_USE_WSI_PLATFORM
112 .KHR_get_surface_capabilities2 = true,
113 .KHR_surface = true,
114 .KHR_surface_protected_capabilities = true,
115 .EXT_surface_maintenance1 = true,
116 .EXT_swapchain_colorspace = true,
117 #endif
118 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
119 .KHR_wayland_surface = true,
120 #endif
121 #ifdef VK_USE_PLATFORM_XCB_KHR
122 .KHR_xcb_surface = true,
123 #endif
124 #ifdef VK_USE_PLATFORM_XLIB_KHR
125 .KHR_xlib_surface = true,
126 #endif
127 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
128 .EXT_acquire_xlib_display = true,
129 #endif
130 #ifndef VK_USE_PLATFORM_WIN32_KHR
131 .EXT_headless_surface = true,
132 #endif
133 .EXT_debug_report = true,
134 .EXT_debug_utils = true,
135 };
136
137 static void
get_device_extensions(const struct v3dv_physical_device * device,struct vk_device_extension_table * ext)138 get_device_extensions(const struct v3dv_physical_device *device,
139 struct vk_device_extension_table *ext)
140 {
141 *ext = (struct vk_device_extension_table) {
142 .KHR_8bit_storage = true,
143 .KHR_16bit_storage = true,
144 .KHR_bind_memory2 = true,
145 .KHR_buffer_device_address = true,
146 .KHR_copy_commands2 = true,
147 .KHR_create_renderpass2 = true,
148 .KHR_dedicated_allocation = true,
149 .KHR_device_group = true,
150 .KHR_driver_properties = true,
151 .KHR_descriptor_update_template = true,
152 .KHR_depth_stencil_resolve = true,
153 .KHR_dynamic_rendering = true,
154 .KHR_external_fence = true,
155 .KHR_external_fence_fd = true,
156 .KHR_external_memory = true,
157 .KHR_external_memory_fd = true,
158 .KHR_external_semaphore = true,
159 .KHR_external_semaphore_fd = true,
160 .KHR_format_feature_flags2 = true,
161 .KHR_get_memory_requirements2 = true,
162 .KHR_image_format_list = true,
163 .KHR_imageless_framebuffer = true,
164 .KHR_index_type_uint8 = true,
165 .KHR_line_rasterization = true,
166 .KHR_load_store_op_none = true,
167 .KHR_performance_query = device->caps.perfmon,
168 .KHR_relaxed_block_layout = true,
169 .KHR_maintenance1 = true,
170 .KHR_maintenance2 = true,
171 .KHR_maintenance3 = true,
172 .KHR_maintenance4 = true,
173 .KHR_maintenance5 = true,
174 .KHR_multiview = true,
175 .KHR_pipeline_executable_properties = true,
176 .KHR_separate_depth_stencil_layouts = true,
177 .KHR_shader_expect_assume = true,
178 .KHR_shader_float_controls = true,
179 .KHR_shader_non_semantic_info = true,
180 .KHR_shader_relaxed_extended_instruction = true,
181 .KHR_sampler_mirror_clamp_to_edge = true,
182 .KHR_sampler_ycbcr_conversion = true,
183 .KHR_spirv_1_4 = true,
184 .KHR_storage_buffer_storage_class = true,
185 .KHR_timeline_semaphore = true,
186 .KHR_uniform_buffer_standard_layout = true,
187 .KHR_shader_integer_dot_product = true,
188 .KHR_shader_terminate_invocation = true,
189 .KHR_synchronization2 = true,
190 .KHR_workgroup_memory_explicit_layout = true,
191 #ifdef V3DV_USE_WSI_PLATFORM
192 .KHR_swapchain = true,
193 .KHR_swapchain_mutable_format = true,
194 .KHR_incremental_present = true,
195 #endif
196 .KHR_variable_pointers = true,
197 .KHR_vertex_attribute_divisor = true,
198 .KHR_vulkan_memory_model = true,
199 .KHR_zero_initialize_workgroup_memory = true,
200 .EXT_4444_formats = true,
201 .EXT_attachment_feedback_loop_layout = true,
202 .EXT_border_color_swizzle = true,
203 .EXT_color_write_enable = true,
204 .EXT_custom_border_color = true,
205 .EXT_depth_clamp_zero_one = device->devinfo.ver >= 71,
206 .EXT_depth_clip_control = true,
207 .EXT_depth_clip_enable = device->devinfo.ver >= 71,
208 .EXT_load_store_op_none = true,
209 .EXT_inline_uniform_block = true,
210 .EXT_extended_dynamic_state = true,
211 .EXT_extended_dynamic_state2 = true,
212 .EXT_external_memory_dma_buf = true,
213 .EXT_host_query_reset = true,
214 .EXT_image_drm_format_modifier = true,
215 .EXT_image_robustness = true,
216 .EXT_index_type_uint8 = true,
217 .EXT_line_rasterization = true,
218 .EXT_memory_budget = true,
219 .EXT_multi_draw = true,
220 .EXT_physical_device_drm = true,
221 .EXT_pipeline_creation_cache_control = true,
222 .EXT_pipeline_creation_feedback = true,
223 .EXT_pipeline_robustness = true,
224 .EXT_primitive_topology_list_restart = true,
225 .EXT_private_data = true,
226 .EXT_provoking_vertex = true,
227 .EXT_queue_family_foreign = true,
228 .EXT_separate_stencil_usage = true,
229 .EXT_shader_demote_to_helper_invocation = true,
230 .EXT_shader_module_identifier = true,
231 .EXT_subgroup_size_control = true,
232 #ifdef V3DV_USE_WSI_PLATFORM
233 .EXT_swapchain_maintenance1 = true,
234 #endif
235 .EXT_texel_buffer_alignment = true,
236 .EXT_tooling_info = true,
237 .EXT_vertex_attribute_divisor = true,
238 };
239 #if DETECT_OS_ANDROID
240 if (vk_android_get_ugralloc() != NULL) {
241 ext->ANDROID_external_memory_android_hardware_buffer = true;
242 ext->ANDROID_native_buffer = true;
243 }
244 #endif
245 }
246
247 static void
get_features(const struct v3dv_physical_device * physical_device,struct vk_features * features)248 get_features(const struct v3dv_physical_device *physical_device,
249 struct vk_features *features)
250 {
251 *features = (struct vk_features) {
252 /* Vulkan 1.0 */
253 .robustBufferAccess = true, /* This feature is mandatory */
254 .fullDrawIndexUint32 = physical_device->devinfo.ver >= 71,
255 .imageCubeArray = true,
256 .independentBlend = true,
257 .geometryShader = true,
258 .tessellationShader = false,
259 .sampleRateShading = true,
260 .dualSrcBlend = false,
261 .logicOp = true,
262 .multiDrawIndirect = false,
263 .drawIndirectFirstInstance = true,
264 .depthClamp = physical_device->devinfo.ver >= 71,
265 .depthClampZeroOne = physical_device->devinfo.ver >= 71,
266 .depthBiasClamp = true,
267 .fillModeNonSolid = true,
268 .depthBounds = physical_device->devinfo.ver >= 71,
269 .wideLines = true,
270 .largePoints = true,
271 .alphaToOne = true,
272 .multiViewport = false,
273 .samplerAnisotropy = true,
274 .textureCompressionETC2 = true,
275 .textureCompressionASTC_LDR = true,
276 /* Note that textureCompressionBC requires that the driver support all
277 * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
278 * that we support it.
279 */
280 .textureCompressionBC = false,
281 .occlusionQueryPrecise = true,
282 .pipelineStatisticsQuery = false,
283 .vertexPipelineStoresAndAtomics = true,
284 .fragmentStoresAndAtomics = true,
285 .shaderTessellationAndGeometryPointSize = true,
286 .shaderImageGatherExtended = true,
287 .shaderStorageImageExtendedFormats = true,
288 .shaderStorageImageMultisample = false,
289 .shaderStorageImageReadWithoutFormat = true,
290 .shaderStorageImageWriteWithoutFormat = false,
291 .shaderUniformBufferArrayDynamicIndexing = false,
292 .shaderSampledImageArrayDynamicIndexing = false,
293 .shaderStorageBufferArrayDynamicIndexing = false,
294 .shaderStorageImageArrayDynamicIndexing = false,
295 .shaderClipDistance = true,
296 .shaderCullDistance = false,
297 .shaderFloat64 = false,
298 .shaderInt64 = false,
299 .shaderInt16 = false,
300 .shaderResourceResidency = false,
301 .shaderResourceMinLod = false,
302 .sparseBinding = false,
303 .sparseResidencyBuffer = false,
304 .sparseResidencyImage2D = false,
305 .sparseResidencyImage3D = false,
306 .sparseResidency2Samples = false,
307 .sparseResidency4Samples = false,
308 .sparseResidency8Samples = false,
309 .sparseResidency16Samples = false,
310 .sparseResidencyAliased = false,
311 .variableMultisampleRate = false,
312 .inheritedQueries = true,
313
314 /* Vulkan 1.1 */
315 .storageBuffer16BitAccess = true,
316 .uniformAndStorageBuffer16BitAccess = true,
317 .storagePushConstant16 = true,
318 .storageInputOutput16 = false,
319 .multiview = true,
320 .multiviewGeometryShader = false,
321 .multiviewTessellationShader = false,
322 .variablePointersStorageBuffer = true,
323 /* FIXME: this needs support for non-constant index on UBO/SSBO */
324 .variablePointers = false,
325 .protectedMemory = false,
326 .samplerYcbcrConversion = true,
327 .shaderDrawParameters = false,
328
329 /* Vulkan 1.2 */
330 .hostQueryReset = true,
331 .uniformAndStorageBuffer8BitAccess = true,
332 .uniformBufferStandardLayout = true,
333 /* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and
334 * stores of vectors that cross these boundaries would not work correctly
335 * with scalarBlockLayout and would need to be split into smaller vectors
336 * (and/or scalars) that don't cross these boundaries. For load/stores
337 * with dynamic offsets where we can't identify if the offset is
338 * problematic, we would always have to scalarize. Overall, this would
339 * not lead to best performance so let's just not support it.
340 */
341 .scalarBlockLayout = physical_device->devinfo.ver >= 71,
342 /* This tells applications 2 things:
343 *
344 * 1. If they can select just one aspect for barriers. For us barriers
345 * decide if we need to split a job and we don't care if it is only
346 * for one of the aspects of the image or both, so we don't really
347 * benefit from seeing barriers that select just one aspect.
348 *
349 * 2. If they can program different layouts for each aspect. We
350 * generally don't care about layouts, so again, we don't get any
351 * benefits from this to limit the scope of image layout transitions.
352 *
353 * Still, Vulkan 1.2 requires this feature to be supported so we
354 * advertise it even though we don't really take advantage of it.
355 */
356 .separateDepthStencilLayouts = true,
357 .storageBuffer8BitAccess = true,
358 .storagePushConstant8 = true,
359 .imagelessFramebuffer = true,
360 .timelineSemaphore = true,
361
362 .samplerMirrorClampToEdge = true,
363
364 /* Extended subgroup types is mandatory by Vulkan 1.2, however, it is
365 * only in effect if the implementation supports non 32-bit types, which
366 * we don't, so in practice setting it to true doesn't have any
367 * implications for us.
368 */
369 .shaderSubgroupExtendedTypes = true,
370 .subgroupBroadcastDynamicId = true,
371
372 .vulkanMemoryModel = true,
373 .vulkanMemoryModelDeviceScope = true,
374 .vulkanMemoryModelAvailabilityVisibilityChains = true,
375
376 .bufferDeviceAddress = true,
377 .bufferDeviceAddressCaptureReplay = false,
378 .bufferDeviceAddressMultiDevice = false,
379
380 /* Vulkan 1.3 */
381 .inlineUniformBlock = true,
382 /* Inline buffers work like push constants, so after their are bound
383 * some of their contents may be copied into the uniform stream as soon
384 * as the next draw/dispatch is recorded in the command buffer. This means
385 * that if the client updates the buffer contents after binding it to
386 * a command buffer, the next queue submit of that command buffer may
387 * not use the latest update to the buffer contents, but the data that
388 * was present in the buffer at the time it was bound to the command
389 * buffer.
390 */
391 .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
392 .pipelineCreationCacheControl = true,
393 .privateData = true,
394 .maintenance4 = true,
395 .shaderZeroInitializeWorkgroupMemory = true,
396 .synchronization2 = true,
397 .robustImageAccess = true,
398 .shaderIntegerDotProduct = true,
399
400 /* VK_EXT_4444_formats */
401 .formatA4R4G4B4 = true,
402 .formatA4B4G4R4 = true,
403
404 /* VK_EXT_custom_border_color */
405 .customBorderColors = true,
406 .customBorderColorWithoutFormat = false,
407
408 /* VK_EXT_index_type_uint8 */
409 .indexTypeUint8 = true,
410
411 /* VK_EXT_line_rasterization */
412 .rectangularLines = true,
413 .bresenhamLines = true,
414 .smoothLines = true,
415 .stippledRectangularLines = false,
416 .stippledBresenhamLines = false,
417 .stippledSmoothLines = false,
418
419 /* VK_EXT_color_write_enable */
420 .colorWriteEnable = true,
421
422 /* VK_EXT_extended_dynamic_state */
423 .extendedDynamicState = true,
424
425 /* VK_EXT_extended_dynamic_state2 */
426 .extendedDynamicState2 = true,
427 /* We don't support extendedDynamicState2LogicOp as that would require
428 * compile shader variants after the pipeline creation.
429 */
430 .extendedDynamicState2LogicOp = false,
431 /* We don't support extendedDynamicState2PatchControlPoints as we don't
432 * support Tessellation Shaders
433 */
434 .extendedDynamicState2PatchControlPoints = false,
435
436 /* VK_KHR_pipeline_executable_properties */
437 .pipelineExecutableInfo = true,
438
439 /* VK_EXT_provoking_vertex */
440 .provokingVertexLast = true,
441 /* FIXME: update when supporting EXT_transform_feedback */
442 .transformFeedbackPreservesProvokingVertex = false,
443
444 /* VK_EXT_vertex_attribute_divisor */
445 .vertexAttributeInstanceRateDivisor = true,
446 .vertexAttributeInstanceRateZeroDivisor = false,
447
448 /* VK_KHR_performance_query */
449 .performanceCounterQueryPools = physical_device->caps.perfmon,
450 .performanceCounterMultipleQueryPools = false,
451
452 /* VK_EXT_texel_buffer_alignment */
453 .texelBufferAlignment = true,
454
455 /* VK_KHR_workgroup_memory_explicit_layout */
456 .workgroupMemoryExplicitLayout = true,
457 .workgroupMemoryExplicitLayoutScalarBlockLayout = false,
458 .workgroupMemoryExplicitLayout8BitAccess = true,
459 .workgroupMemoryExplicitLayout16BitAccess = true,
460
461 /* VK_EXT_border_color_swizzle */
462 .borderColorSwizzle = true,
463 .borderColorSwizzleFromImage = true,
464
465 /* VK_EXT_shader_module_identifier */
466 .shaderModuleIdentifier = true,
467
468 /* VK_EXT_depth_clip_control */
469 .depthClipControl = true,
470
471 /* VK_EXT_depth_clip_enable */
472 .depthClipEnable = physical_device->devinfo.ver >= 71,
473
474 /* VK_EXT_attachment_feedback_loop_layout */
475 .attachmentFeedbackLoopLayout = true,
476
477 /* VK_EXT_primitive_topology_list_restart */
478 .primitiveTopologyListRestart = true,
479 /* FIXME: we don't support tessellation shaders yet */
480 .primitiveTopologyPatchListRestart = false,
481
482 /* VK_EXT_pipeline_robustness */
483 .pipelineRobustness = true,
484
485 /* VK_EXT_multi_draw */
486 .multiDraw = true,
487
488 /* VK_KHR_shader_terminate_invocation */
489 .shaderTerminateInvocation = true,
490
491 /* VK_EXT_shader_demote_to_helper_invocation */
492 .shaderDemoteToHelperInvocation = true,
493
494 /* VK_EXT_subgroup_size_control */
495 .subgroupSizeControl = true,
496 .computeFullSubgroups = true,
497
498 /* VK_KHR_shader_expect_assume */
499 .shaderExpectAssume = true,
500
501 /* VK_KHR_dynamic_rendering */
502 .dynamicRendering = true,
503
504 /* VK_KHR_maintenance5 */
505 .maintenance5 = true,
506
507 #ifdef V3DV_USE_WSI_PLATFORM
508 /* VK_EXT_swapchain_maintenance1 */
509 .swapchainMaintenance1 = true,
510 #endif
511
512 /* VK_KHR_shader_relaxed_extended_instruction */
513 .shaderRelaxedExtendedInstruction = true,
514 };
515 }
516
517 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)518 v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
519 uint32_t *pPropertyCount,
520 VkExtensionProperties *pProperties)
521 {
522 /* We don't support any layers */
523 if (pLayerName)
524 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
525
526 return vk_enumerate_instance_extension_properties(
527 &instance_extensions, pPropertyCount, pProperties);
528 }
529
530 static VkResult enumerate_devices(struct vk_instance *vk_instance);
531
532 static void destroy_physical_device(struct vk_physical_device *device);
533
534 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)535 v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
536 const VkAllocationCallbacks *pAllocator,
537 VkInstance *pInstance)
538 {
539 struct v3dv_instance *instance;
540 VkResult result;
541
542 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
543
544 if (pAllocator == NULL)
545 pAllocator = vk_default_allocator();
546
547 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
548 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
549 if (!instance)
550 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
551
552 struct vk_instance_dispatch_table dispatch_table;
553 vk_instance_dispatch_table_from_entrypoints(
554 &dispatch_table, &v3dv_instance_entrypoints, true);
555 vk_instance_dispatch_table_from_entrypoints(
556 &dispatch_table, &wsi_instance_entrypoints, false);
557
558 result = vk_instance_init(&instance->vk,
559 &instance_extensions,
560 &dispatch_table,
561 pCreateInfo, pAllocator);
562
563 if (result != VK_SUCCESS) {
564 vk_free(pAllocator, instance);
565 return vk_error(NULL, result);
566 }
567
568 v3d_process_debug_variable();
569
570 instance->vk.physical_devices.enumerate = enumerate_devices;
571 instance->vk.physical_devices.destroy = destroy_physical_device;
572
573 /* We start with the default values for the pipeline_cache envvars.
574 *
575 * FIXME: with so many options now, perhaps we could use parse_debug_string
576 */
577 instance->pipeline_cache_enabled = true;
578 instance->default_pipeline_cache_enabled = true;
579 instance->meta_cache_enabled = true;
580 const char *pipeline_cache_str = getenv("V3DV_ENABLE_PIPELINE_CACHE");
581 if (pipeline_cache_str != NULL) {
582 if (strncmp(pipeline_cache_str, "full", 4) == 0) {
583 /* nothing to do, just to filter correct values */
584 } else if (strncmp(pipeline_cache_str, "no-default-cache", 16) == 0) {
585 instance->default_pipeline_cache_enabled = false;
586 } else if (strncmp(pipeline_cache_str, "no-meta-cache", 13) == 0) {
587 instance->meta_cache_enabled = false;
588 } else if (strncmp(pipeline_cache_str, "off", 3) == 0) {
589 instance->pipeline_cache_enabled = false;
590 instance->default_pipeline_cache_enabled = false;
591 instance->meta_cache_enabled = false;
592 } else {
593 fprintf(stderr, "Wrong value for envvar V3DV_ENABLE_PIPELINE_CACHE. "
594 "Allowed values are: full, no-default-cache, no-meta-cache, off\n");
595 }
596 }
597
598 if (instance->pipeline_cache_enabled == false) {
599 fprintf(stderr, "WARNING: v3dv pipeline cache is disabled. Performance "
600 "can be affected negatively\n");
601 }
602 if (instance->default_pipeline_cache_enabled == false) {
603 fprintf(stderr, "WARNING: default v3dv pipeline cache is disabled. "
604 "Performance can be affected negatively\n");
605 }
606 if (instance->meta_cache_enabled == false) {
607 fprintf(stderr, "WARNING: custom pipeline cache for meta operations are disabled. "
608 "Performance can be affected negatively\n");
609 }
610
611
612 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
613
614 #if DETECT_OS_ANDROID
615 struct u_gralloc *u_gralloc = vk_android_init_ugralloc();
616
617 if (u_gralloc && u_gralloc_get_type(u_gralloc) == U_GRALLOC_TYPE_FALLBACK) {
618 mesa_logw(
619 "v3dv: Gralloc is not supported. Android extensions are disabled.");
620 vk_android_destroy_ugralloc();
621 }
622 #endif
623
624 *pInstance = v3dv_instance_to_handle(instance);
625
626 return VK_SUCCESS;
627 }
628
629 static void
v3dv_physical_device_free_disk_cache(struct v3dv_physical_device * device)630 v3dv_physical_device_free_disk_cache(struct v3dv_physical_device *device)
631 {
632 #ifdef ENABLE_SHADER_CACHE
633 if (device->disk_cache)
634 disk_cache_destroy(device->disk_cache);
635 #else
636 assert(device->disk_cache == NULL);
637 #endif
638 }
639
640 static void
physical_device_finish(struct v3dv_physical_device * device)641 physical_device_finish(struct v3dv_physical_device *device)
642 {
643 v3dv_wsi_finish(device);
644 v3dv_physical_device_free_disk_cache(device);
645 v3d_compiler_free(device->compiler);
646
647 util_sparse_array_finish(&device->bo_map);
648
649 close(device->render_fd);
650 if (device->display_fd >= 0)
651 close(device->display_fd);
652
653 free(device->name);
654
655 #if USE_V3D_SIMULATOR
656 v3d_simulator_destroy(device->sim_file);
657 #endif
658
659 vk_physical_device_finish(&device->vk);
660 mtx_destroy(&device->mutex);
661 }
662
663 static void
destroy_physical_device(struct vk_physical_device * device)664 destroy_physical_device(struct vk_physical_device *device)
665 {
666 physical_device_finish((struct v3dv_physical_device *)device);
667 vk_free(&device->instance->alloc, device);
668 }
669
670 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)671 v3dv_DestroyInstance(VkInstance _instance,
672 const VkAllocationCallbacks *pAllocator)
673 {
674 V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
675
676 if (!instance)
677 return;
678
679 #if DETECT_OS_ANDROID
680 vk_android_destroy_ugralloc();
681 #endif
682
683 VG(VALGRIND_DESTROY_MEMPOOL(instance));
684
685 vk_instance_finish(&instance->vk);
686 vk_free(&instance->vk.alloc, instance);
687 }
688
689 static uint64_t
compute_heap_size()690 compute_heap_size()
691 {
692 #if !USE_V3D_SIMULATOR
693 /* Query the total ram from the system */
694 struct sysinfo info;
695 sysinfo(&info);
696
697 uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
698 #else
699 uint64_t total_ram = (uint64_t) v3d_simulator_get_mem_size();
700 #endif
701
702 /* We don't want to burn too much ram with the GPU. If the user has 4GB
703 * or less, we use at most half. If they have more than 4GB we limit it
704 * to 3/4 with a max. of 4GB since the GPU cannot address more than that.
705 */
706 const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull;
707 uint64_t available;
708 if (total_ram <= MAX_HEAP_SIZE)
709 available = total_ram / 2;
710 else
711 available = MIN2(MAX_HEAP_SIZE, total_ram * 3 / 4);
712
713 return available;
714 }
715
716 static uint64_t
compute_memory_budget(struct v3dv_physical_device * device)717 compute_memory_budget(struct v3dv_physical_device *device)
718 {
719 uint64_t heap_size = device->memory.memoryHeaps[0].size;
720 uint64_t heap_used = device->heap_used;
721 uint64_t sys_available;
722 #if !USE_V3D_SIMULATOR
723 ASSERTED bool has_available_memory =
724 os_get_available_system_memory(&sys_available);
725 assert(has_available_memory);
726 #else
727 sys_available = (uint64_t) v3d_simulator_get_mem_free();
728 #endif
729
730 /* Let's not incite the app to starve the system: report at most 90% of
731 * available system memory.
732 */
733 uint64_t heap_available = sys_available * 9 / 10;
734 return MIN2(heap_size, heap_used + heap_available);
735 }
736
737 static bool
v3d_has_feature(struct v3dv_physical_device * device,enum drm_v3d_param feature)738 v3d_has_feature(struct v3dv_physical_device *device, enum drm_v3d_param feature)
739 {
740 struct drm_v3d_get_param p = {
741 .param = feature,
742 };
743 if (v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_GET_PARAM, &p) != 0)
744 return false;
745 return p.value;
746 }
747
748 static bool
device_has_expected_features(struct v3dv_physical_device * device)749 device_has_expected_features(struct v3dv_physical_device *device)
750 {
751 return v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_TFU) &&
752 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CSD) &&
753 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH) &&
754 device->caps.multisync;
755 }
756
757
758 static VkResult
init_uuids(struct v3dv_physical_device * device)759 init_uuids(struct v3dv_physical_device *device)
760 {
761 const struct build_id_note *note =
762 build_id_find_nhdr_for_addr(init_uuids);
763 if (!note) {
764 return vk_errorf(device->vk.instance,
765 VK_ERROR_INITIALIZATION_FAILED,
766 "Failed to find build-id");
767 }
768
769 unsigned build_id_len = build_id_length(note);
770 if (build_id_len < 20) {
771 return vk_errorf(device->vk.instance,
772 VK_ERROR_INITIALIZATION_FAILED,
773 "build-id too short. It needs to be a SHA");
774 }
775
776 memcpy(device->driver_build_sha1, build_id_data(note), 20);
777
778 uint32_t vendor_id = v3dv_physical_device_vendor_id(device);
779 uint32_t device_id = v3dv_physical_device_device_id(device);
780
781 struct mesa_sha1 sha1_ctx;
782 uint8_t sha1[20];
783 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
784
785 /* The pipeline cache UUID is used for determining when a pipeline cache is
786 * invalid. It needs both a driver build and the PCI ID of the device.
787 */
788 _mesa_sha1_init(&sha1_ctx);
789 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
790 _mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
791 _mesa_sha1_final(&sha1_ctx, sha1);
792 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
793
794 /* The driver UUID is used for determining sharability of images and memory
795 * between two Vulkan instances in separate processes. People who want to
796 * share memory need to also check the device UUID (below) so all this
797 * needs to be is the build-id.
798 */
799 memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);
800
801 /* The device UUID uniquely identifies the given device within the machine.
802 * Since we never have more than one device, this doesn't need to be a real
803 * UUID.
804 */
805 _mesa_sha1_init(&sha1_ctx);
806 _mesa_sha1_update(&sha1_ctx, &vendor_id, sizeof(vendor_id));
807 _mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
808 _mesa_sha1_final(&sha1_ctx, sha1);
809 memcpy(device->device_uuid, sha1, VK_UUID_SIZE);
810
811 return VK_SUCCESS;
812 }
813
814 static void
v3dv_physical_device_init_disk_cache(struct v3dv_physical_device * device)815 v3dv_physical_device_init_disk_cache(struct v3dv_physical_device *device)
816 {
817 #ifdef ENABLE_SHADER_CACHE
818 char timestamp[41];
819 _mesa_sha1_format(timestamp, device->driver_build_sha1);
820
821 assert(device->name);
822 device->disk_cache = disk_cache_create(device->name, timestamp, v3d_mesa_debug);
823 #else
824 device->disk_cache = NULL;
825 #endif
826 }
827
828 static void
get_device_properties(const struct v3dv_physical_device * device,struct vk_properties * properties)829 get_device_properties(const struct v3dv_physical_device *device,
830 struct vk_properties *properties)
831 {
832 STATIC_ASSERT(MAX_SAMPLED_IMAGES + MAX_STORAGE_IMAGES + MAX_INPUT_ATTACHMENTS
833 <= V3D_MAX_TEXTURE_SAMPLERS);
834 STATIC_ASSERT(MAX_UNIFORM_BUFFERS >= MAX_DYNAMIC_UNIFORM_BUFFERS);
835 STATIC_ASSERT(MAX_STORAGE_BUFFERS >= MAX_DYNAMIC_STORAGE_BUFFERS);
836
837 const uint32_t page_size = 4096;
838 const uint64_t mem_size = compute_heap_size();
839
840 const uint32_t max_varying_components = 16 * 4;
841
842 const uint32_t max_per_stage_resources = 128;
843
844 const float v3d_point_line_granularity = 2.0f / (1 << V3D_COORD_SHIFT);
845 const uint32_t max_fb_size = V3D_MAX_IMAGE_DIMENSION;
846
847 const VkSampleCountFlags supported_sample_counts =
848 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
849
850 const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(device->devinfo.ver);
851
852 struct timespec clock_res;
853 clock_getres(CLOCK_MONOTONIC, &clock_res);
854 const float timestamp_period =
855 clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
856
857 /* We don't really have special restrictions for the maximum
858 * descriptors per set, other than maybe not exceeding the limits
859 * of addressable memory in a single allocation on either the host
860 * or the GPU. This will be a much larger limit than any of the
861 * per-stage limits already available in Vulkan though, so in practice,
862 * it is not expected to limit anything beyond what is already
863 * constrained through per-stage limits.
864 */
865 const uint32_t max_host_descriptors =
866 (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) /
867 sizeof(struct v3dv_descriptor);
868 const uint32_t max_gpu_descriptors =
869 (UINT32_MAX / v3dv_X(device, max_descriptor_bo_size)());
870
871 VkSubgroupFeatureFlags subgroup_ops = VK_SUBGROUP_FEATURE_BASIC_BIT;
872 if (device->devinfo.ver >= 71) {
873 subgroup_ops |= VK_SUBGROUP_FEATURE_BALLOT_BIT |
874 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
875 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
876 VK_SUBGROUP_FEATURE_VOTE_BIT |
877 VK_SUBGROUP_FEATURE_QUAD_BIT;
878 }
879
880 #if DETECT_OS_ANDROID
881 /* Used to determine the sharedImage prop in
882 * VkPhysicalDevicePresentationPropertiesANDROID
883 */
884 uint64_t front_rendering_usage = 0;
885 struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
886 if (gralloc != NULL) {
887 u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
888 u_gralloc_destroy(&gralloc);
889 }
890 VkBool32 shared_image = front_rendering_usage ? VK_TRUE : VK_FALSE;
891 #endif
892
893 /* FIXME: this will probably require an in-depth review */
894 *properties = (struct vk_properties) {
895 /* VkPhysicalDeviceProperties, limits and sparse props below */
896 .apiVersion = V3DV_API_VERSION,
897 .driverVersion = vk_get_driver_version(),
898 .vendorID = v3dv_physical_device_vendor_id(device),
899 .deviceID = v3dv_physical_device_device_id(device),
900 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
901
902 /* Vulkan 1.0 limits */
903 .maxImageDimension1D = V3D_MAX_IMAGE_DIMENSION,
904 .maxImageDimension2D = V3D_MAX_IMAGE_DIMENSION,
905 .maxImageDimension3D = V3D_MAX_IMAGE_DIMENSION,
906 .maxImageDimensionCube = V3D_MAX_IMAGE_DIMENSION,
907 .maxImageArrayLayers = V3D_MAX_ARRAY_LAYERS,
908 .maxTexelBufferElements = (1ul << 28),
909 .maxUniformBufferRange = V3D_MAX_BUFFER_RANGE,
910 .maxStorageBufferRange = V3D_MAX_BUFFER_RANGE,
911 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
912 .maxMemoryAllocationCount = mem_size / page_size,
913 .maxSamplerAllocationCount = 64 * 1024,
914 .bufferImageGranularity = V3D_NON_COHERENT_ATOM_SIZE,
915 .sparseAddressSpaceSize = 0,
916 .maxBoundDescriptorSets = MAX_SETS,
917 .maxPerStageDescriptorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
918 .maxPerStageDescriptorUniformBuffers = MAX_UNIFORM_BUFFERS,
919 .maxPerStageDescriptorStorageBuffers = MAX_STORAGE_BUFFERS,
920 .maxPerStageDescriptorSampledImages = MAX_SAMPLED_IMAGES,
921 .maxPerStageDescriptorStorageImages = MAX_STORAGE_IMAGES,
922 .maxPerStageDescriptorInputAttachments = MAX_INPUT_ATTACHMENTS,
923 .maxPerStageResources = max_per_stage_resources,
924
925 .maxDescriptorSetSamplers =
926 V3DV_SUPPORTED_SHADER_STAGES * V3D_MAX_TEXTURE_SAMPLERS,
927 .maxDescriptorSetUniformBuffers =
928 V3DV_SUPPORTED_SHADER_STAGES * MAX_UNIFORM_BUFFERS,
929 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
930 .maxDescriptorSetStorageBuffers =
931 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_BUFFERS,
932 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
933 .maxDescriptorSetSampledImages =
934 V3DV_SUPPORTED_SHADER_STAGES * MAX_SAMPLED_IMAGES,
935 .maxDescriptorSetStorageImages =
936 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_IMAGES,
937 .maxDescriptorSetInputAttachments = MAX_INPUT_ATTACHMENTS,
938
939 /* Vertex limits */
940 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
941 .maxVertexInputBindings = MAX_VBS,
942 .maxVertexInputAttributeOffset = 0xffffffff,
943 .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
944 .maxVertexOutputComponents = max_varying_components,
945
946 /* Tessellation limits */
947 .maxTessellationGenerationLevel = 0,
948 .maxTessellationPatchSize = 0,
949 .maxTessellationControlPerVertexInputComponents = 0,
950 .maxTessellationControlPerVertexOutputComponents = 0,
951 .maxTessellationControlPerPatchOutputComponents = 0,
952 .maxTessellationControlTotalOutputComponents = 0,
953 .maxTessellationEvaluationInputComponents = 0,
954 .maxTessellationEvaluationOutputComponents = 0,
955
956 /* Geometry limits */
957 .maxGeometryShaderInvocations = 32,
958 .maxGeometryInputComponents = 64,
959 .maxGeometryOutputComponents = 64,
960 .maxGeometryOutputVertices = 256,
961 .maxGeometryTotalOutputComponents = 1024,
962
963 /* Fragment limits */
964 .maxFragmentInputComponents = max_varying_components,
965 .maxFragmentOutputAttachments = 4,
966 .maxFragmentDualSrcAttachments = 0,
967 .maxFragmentCombinedOutputResources = max_rts +
968 MAX_STORAGE_BUFFERS +
969 MAX_STORAGE_IMAGES,
970
971 /* Compute limits */
972 .maxComputeSharedMemorySize = 16384,
973 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
974 .maxComputeWorkGroupInvocations = 256,
975 .maxComputeWorkGroupSize = { 256, 256, 256 },
976
977 .subPixelPrecisionBits = V3D_COORD_SHIFT,
978 .subTexelPrecisionBits = 8,
979 .mipmapPrecisionBits = 8,
980 .maxDrawIndexedIndexValue = device->devinfo.ver >= 71 ?
981 0xffffffff : 0x00ffffff,
982 .maxDrawIndirectCount = 0x7fffffff,
983 .maxSamplerLodBias = 14.0f,
984 .maxSamplerAnisotropy = 16.0f,
985 .maxViewports = MAX_VIEWPORTS,
986 .maxViewportDimensions = { max_fb_size, max_fb_size },
987 .viewportBoundsRange = { -2.0 * max_fb_size,
988 2.0 * max_fb_size - 1 },
989 .viewportSubPixelBits = 0,
990 .minMemoryMapAlignment = page_size,
991 .minTexelBufferOffsetAlignment = V3D_TMU_TEXEL_ALIGN,
992 .minUniformBufferOffsetAlignment = 32,
993 .minStorageBufferOffsetAlignment = 32,
994 .minTexelOffset = -8,
995 .maxTexelOffset = 7,
996 .minTexelGatherOffset = -8,
997 .maxTexelGatherOffset = 7,
998 .minInterpolationOffset = -0.5,
999 .maxInterpolationOffset = 0.5,
1000 .subPixelInterpolationOffsetBits = V3D_COORD_SHIFT,
1001 .maxFramebufferWidth = max_fb_size,
1002 .maxFramebufferHeight = max_fb_size,
1003 .maxFramebufferLayers = 256,
1004 .framebufferColorSampleCounts = supported_sample_counts,
1005 .framebufferDepthSampleCounts = supported_sample_counts,
1006 .framebufferStencilSampleCounts = supported_sample_counts,
1007 .framebufferNoAttachmentsSampleCounts = supported_sample_counts,
1008 .maxColorAttachments = max_rts,
1009 .sampledImageColorSampleCounts = supported_sample_counts,
1010 .sampledImageIntegerSampleCounts = supported_sample_counts,
1011 .sampledImageDepthSampleCounts = supported_sample_counts,
1012 .sampledImageStencilSampleCounts = supported_sample_counts,
1013 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1014 .maxSampleMaskWords = 1,
1015 .timestampComputeAndGraphics = true,
1016 .timestampPeriod = timestamp_period,
1017 .maxClipDistances = 8,
1018 .maxCullDistances = 0,
1019 .maxCombinedClipAndCullDistances = 8,
1020 .discreteQueuePriorities = 2,
1021 .pointSizeRange = { v3d_point_line_granularity,
1022 V3D_MAX_POINT_SIZE },
1023 .lineWidthRange = { 1.0f, V3D_MAX_LINE_WIDTH },
1024 .pointSizeGranularity = v3d_point_line_granularity,
1025 .lineWidthGranularity = v3d_point_line_granularity,
1026 .strictLines = true,
1027 .standardSampleLocations = false,
1028 .optimalBufferCopyOffsetAlignment = 32,
1029 .optimalBufferCopyRowPitchAlignment = 32,
1030 .nonCoherentAtomSize = V3D_NON_COHERENT_ATOM_SIZE,
1031
1032 /* Vulkan 1.0 sparse properties */
1033 .sparseResidencyStandard2DBlockShape = false,
1034 .sparseResidencyStandard2DMultisampleBlockShape = false,
1035 .sparseResidencyStandard3DBlockShape = false,
1036 .sparseResidencyAlignedMipSize = false,
1037 .sparseResidencyNonResidentStrict = false,
1038
1039 /* Vulkan 1.1 properties*/
1040 .deviceLUIDValid = false,
1041 .subgroupSize = V3D_CHANNELS,
1042 .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT |
1043 VK_SHADER_STAGE_FRAGMENT_BIT,
1044 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT,
1045 .subgroupQuadOperationsInAllStages = false,
1046 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
1047 .maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT,
1048 .maxMultiviewInstanceIndex = UINT32_MAX - 1,
1049 .protectedNoFault = false,
1050 .maxPerSetDescriptors = MIN2(max_host_descriptors, max_gpu_descriptors),
1051 /* Minimum required by the spec */
1052 .maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE,
1053
1054 /* Vulkan 1.2 properties */
1055 .driverID = VK_DRIVER_ID_MESA_V3DV,
1056 .conformanceVersion = {
1057 .major = 1,
1058 .minor = 3,
1059 .subminor = 8,
1060 .patch = 3,
1061 },
1062 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
1063 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
1064 /* FIXME: if we want to support independentResolveNone then we would
1065 * need to honor attachment load operations on resolve attachments,
1066 * which we currently ignore because the resolve makes them irrelevant,
1067 * as it unconditionally writes all pixels in the render area. However,
1068 * with independentResolveNone, it is possible to have one aspect of a
1069 * D/S resolve attachment stay unresolved, in which case the attachment
1070 * load operation is relevant.
1071 *
1072 * NOTE: implementing attachment load for resolve attachments isn't
1073 * immediately trivial because these attachments are not part of the
1074 * framebuffer and therefore we can't use the same mechanism we use
1075 * for framebuffer attachments. Instead, we should probably have to
1076 * emit a meta operation for that right at the start of the render
1077 * pass (or subpass).
1078 */
1079 .independentResolveNone = false,
1080 .independentResolve = false,
1081 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
1082
1083 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1084 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1085 .shaderSignedZeroInfNanPreserveFloat16 = true,
1086 .shaderSignedZeroInfNanPreserveFloat32 = true,
1087 .shaderSignedZeroInfNanPreserveFloat64 = false,
1088 .shaderDenormPreserveFloat16 = true,
1089 .shaderDenormPreserveFloat32 = true,
1090 .shaderDenormPreserveFloat64 = false,
1091 .shaderDenormFlushToZeroFloat16 = false,
1092 .shaderDenormFlushToZeroFloat32 = false,
1093 .shaderDenormFlushToZeroFloat64 = false,
1094 .shaderRoundingModeRTEFloat16 = true,
1095 .shaderRoundingModeRTEFloat32 = true,
1096 .shaderRoundingModeRTEFloat64 = false,
1097 .shaderRoundingModeRTZFloat16 = false,
1098 .shaderRoundingModeRTZFloat32 = false,
1099 .shaderRoundingModeRTZFloat64 = false,
1100
1101 .maxPerStageDescriptorUpdateAfterBindSamplers = V3D_MAX_TEXTURE_SAMPLERS,
1102 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_UNIFORM_BUFFERS,
1103 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = MAX_STORAGE_BUFFERS,
1104 .maxPerStageDescriptorUpdateAfterBindSampledImages = MAX_SAMPLED_IMAGES,
1105 .maxPerStageDescriptorUpdateAfterBindStorageImages = MAX_STORAGE_IMAGES,
1106 .maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_INPUT_ATTACHMENTS,
1107 .maxPerStageUpdateAfterBindResources = max_per_stage_resources,
1108 .maxDescriptorSetUpdateAfterBindSamplers =
1109 V3DV_SUPPORTED_SHADER_STAGES * V3D_MAX_TEXTURE_SAMPLERS,
1110 .maxDescriptorSetUpdateAfterBindUniformBuffers =
1111 V3DV_SUPPORTED_SHADER_STAGES * MAX_UNIFORM_BUFFERS,
1112 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1113 .maxDescriptorSetUpdateAfterBindStorageBuffers =
1114 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_BUFFERS,
1115 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1116 .maxDescriptorSetUpdateAfterBindSampledImages =
1117 V3DV_SUPPORTED_SHADER_STAGES * MAX_SAMPLED_IMAGES,
1118 .maxDescriptorSetUpdateAfterBindStorageImages =
1119 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_IMAGES,
1120 .maxDescriptorSetUpdateAfterBindInputAttachments = MAX_INPUT_ATTACHMENTS,
1121
1122 /* V3D doesn't support min/max filtering */
1123 .filterMinmaxSingleComponentFormats = false,
1124 .filterMinmaxImageComponentMapping = false,
1125
1126 .framebufferIntegerColorSampleCounts =
1127 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT,
1128
1129 /* Vulkan 1.3 properties */
1130 .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
1131 .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
1132 .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
1133 .maxInlineUniformTotalSize =
1134 MAX_INLINE_UNIFORM_BUFFERS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
1135 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1136 MAX_INLINE_UNIFORM_BUFFERS,
1137 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1138 MAX_INLINE_UNIFORM_BUFFERS,
1139 .maxBufferSize = V3D_MAX_BUFFER_RANGE,
1140 .storageTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
1141 .storageTexelBufferOffsetSingleTexelAlignment = false,
1142 .uniformTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
1143 .uniformTexelBufferOffsetSingleTexelAlignment = false,
1144 /* No native acceleration for integer dot product. We use NIR lowering. */
1145 .integerDotProduct8BitUnsignedAccelerated = false,
1146 .integerDotProduct8BitMixedSignednessAccelerated = false,
1147 .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
1148 .integerDotProduct4x8BitPackedSignedAccelerated = false,
1149 .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
1150 .integerDotProduct16BitUnsignedAccelerated = false,
1151 .integerDotProduct16BitSignedAccelerated = false,
1152 .integerDotProduct16BitMixedSignednessAccelerated = false,
1153 .integerDotProduct32BitUnsignedAccelerated = false,
1154 .integerDotProduct32BitSignedAccelerated = false,
1155 .integerDotProduct32BitMixedSignednessAccelerated = false,
1156 .integerDotProduct64BitUnsignedAccelerated = false,
1157 .integerDotProduct64BitSignedAccelerated = false,
1158 .integerDotProduct64BitMixedSignednessAccelerated = false,
1159 .integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false,
1160 .integerDotProductAccumulatingSaturating8BitSignedAccelerated = false,
1161 .integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false,
1162 .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false,
1163 .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false,
1164 .integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false,
1165 .integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false,
1166 .integerDotProductAccumulatingSaturating16BitSignedAccelerated = false,
1167 .integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false,
1168 .integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false,
1169 .integerDotProductAccumulatingSaturating32BitSignedAccelerated = false,
1170 .integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false,
1171 .integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false,
1172 .integerDotProductAccumulatingSaturating64BitSignedAccelerated = false,
1173 .integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false,
1174
1175 /* VkPhysicalDeviceCustomBorderColorPropertiesEXT */
1176 .maxCustomBorderColorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
1177
1178 /* VkPhysicalDeviceProvokingVertexPropertiesEXT */
1179 .provokingVertexModePerPipeline = true,
1180 /* FIXME: update when supporting EXT_transform_feedback */
1181 .transformFeedbackPreservesTriangleFanProvokingVertex = false,
1182
1183 /* VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT */
1184 .maxVertexAttribDivisor = V3D_MAX_VERTEX_ATTRIB_DIVISOR,
1185 .supportsNonZeroFirstInstance = true,
1186
1187 /* VkPhysicalDevicePerformanceQueryPropertiesKHR */
1188 .allowCommandBufferQueryCopies = true,
1189
1190 #if DETECT_OS_ANDROID
1191 /* VkPhysicalDevicePresentationPropertiesANDROID */
1192 .sharedImage = shared_image,
1193 #endif
1194
1195 /* VkPhysicalDeviceDrmPropertiesEXT */
1196 .drmHasPrimary = device->has_primary,
1197 .drmPrimaryMajor = (int64_t) major(device->primary_devid),
1198 .drmPrimaryMinor = (int64_t) minor(device->primary_devid),
1199 .drmHasRender = device->has_render,
1200 .drmRenderMajor = (int64_t) major(device->render_devid),
1201 .drmRenderMinor = (int64_t) minor(device->render_devid),
1202
1203 /* VkPhysicalDeviceLineRasterizationPropertiesEXT */
1204 .lineSubPixelPrecisionBits = V3D_COORD_SHIFT,
1205
1206 /* VkPhysicalDevicePipelineRobustnessPropertiesEXT */
1207 .defaultRobustnessStorageBuffers =
1208 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1209 .defaultRobustnessUniformBuffers =
1210 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1211 .defaultRobustnessVertexInputs =
1212 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1213 .defaultRobustnessImages =
1214 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT,
1215
1216 /* VkPhysicalDeviceMultiDrawPropertiesEXT */
1217 .maxMultiDrawCount = 2048,
1218
1219 /* VkPhysicalDevicePCIBusInfoPropertiesEXT is not supported
1220 * and is left unfilled
1221 */
1222
1223 /* VK_EXT_subgroup_size_control */
1224 .minSubgroupSize = V3D_CHANNELS,
1225 .maxSubgroupSize = V3D_CHANNELS,
1226 .maxComputeWorkgroupSubgroups = 16, /* 256 / 16 */
1227 .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
1228
1229 .subgroupSupportedOperations = subgroup_ops,
1230
1231 /* VK_KHR_maintenance5 */
1232 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
1233 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
1234 .depthStencilSwizzleOneSupport = true,
1235 .polygonModePointSize = true,
1236 .nonStrictSinglePixelWideLinesUseParallelogram = true,
1237 .nonStrictWideLinesUseParallelogram = true,
1238 };
1239
1240 /* VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT */
1241 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1242 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1243 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1244 vk_shaderModuleIdentifierAlgorithmUUID,
1245 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1246
1247 /* VkPhysicalDeviceProperties */
1248 snprintf(properties->deviceName, sizeof(properties->deviceName),
1249 "%s", device->name);
1250 memcpy(properties->pipelineCacheUUID,
1251 device->pipeline_cache_uuid, VK_UUID_SIZE);
1252
1253 /* Vulkan 1.1 properties */
1254 memcpy(properties->deviceUUID, device->device_uuid, VK_UUID_SIZE);
1255 memcpy(properties->driverUUID, device->driver_uuid, VK_UUID_SIZE);
1256
1257 /* Vulkan 1.2 properties */
1258 memset(properties->driverName, 0, VK_MAX_DRIVER_NAME_SIZE);
1259 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "V3DV Mesa");
1260 memset(properties->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE);
1261 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1262 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1263
1264 }
1265
1266 static VkResult
create_physical_device(struct v3dv_instance * instance,drmDevicePtr gpu_device,drmDevicePtr display_device)1267 create_physical_device(struct v3dv_instance *instance,
1268 drmDevicePtr gpu_device,
1269 drmDevicePtr display_device)
1270 {
1271 VkResult result = VK_SUCCESS;
1272 int32_t display_fd = -1;
1273 int32_t render_fd = -1;
1274
1275 struct v3dv_physical_device *device =
1276 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1277 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1278
1279 if (!device)
1280 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1281
1282 struct vk_physical_device_dispatch_table dispatch_table;
1283 vk_physical_device_dispatch_table_from_entrypoints
1284 (&dispatch_table, &v3dv_physical_device_entrypoints, true);
1285 vk_physical_device_dispatch_table_from_entrypoints(
1286 &dispatch_table, &wsi_physical_device_entrypoints, false);
1287
1288 result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL,
1289 NULL, &dispatch_table);
1290
1291 if (result != VK_SUCCESS)
1292 goto fail;
1293
1294 assert(gpu_device);
1295 const char *path = gpu_device->nodes[DRM_NODE_RENDER];
1296 render_fd = open(path, O_RDWR | O_CLOEXEC);
1297 if (render_fd < 0) {
1298 fprintf(stderr, "Opening %s failed: %s\n", path, strerror(errno));
1299 result = VK_ERROR_INITIALIZATION_FAILED;
1300 goto fail;
1301 }
1302
1303 /* If we are running on VK_KHR_display we need to acquire the master
1304 * display device now for the v3dv_wsi_init() call below. For anything else
1305 * we postpone that until a swapchain is created.
1306 */
1307
1308 const char *primary_path;
1309 #if !USE_V3D_SIMULATOR
1310 if (display_device)
1311 primary_path = display_device->nodes[DRM_NODE_PRIMARY];
1312 else
1313 primary_path = NULL;
1314 #else
1315 primary_path = gpu_device->nodes[DRM_NODE_PRIMARY];
1316 #endif
1317
1318 struct stat primary_stat = {0}, render_stat = {0};
1319
1320 device->has_primary = primary_path;
1321 if (device->has_primary) {
1322 if (stat(primary_path, &primary_stat) != 0) {
1323 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1324 "failed to stat DRM primary node %s",
1325 primary_path);
1326 goto fail;
1327 }
1328
1329 device->primary_devid = primary_stat.st_rdev;
1330 }
1331
1332 if (fstat(render_fd, &render_stat) != 0) {
1333 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1334 "failed to stat DRM render node %s",
1335 path);
1336 goto fail;
1337 }
1338 device->has_render = true;
1339 device->render_devid = render_stat.st_rdev;
1340
1341 #if USE_V3D_SIMULATOR
1342 device->device_id = gpu_device->deviceinfo.pci->device_id;
1343 #endif
1344
1345 if (instance->vk.enabled_extensions.KHR_display ||
1346 instance->vk.enabled_extensions.KHR_xcb_surface ||
1347 instance->vk.enabled_extensions.KHR_xlib_surface ||
1348 instance->vk.enabled_extensions.KHR_wayland_surface ||
1349 instance->vk.enabled_extensions.EXT_acquire_drm_display) {
1350 #if !USE_V3D_SIMULATOR
1351 /* Open the primary node on the vc4 display device */
1352 assert(display_device);
1353 display_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1354 #else
1355 /* There is only one device with primary and render nodes.
1356 * Open its primary node.
1357 */
1358 display_fd = open(primary_path, O_RDWR | O_CLOEXEC);
1359 #endif
1360 }
1361
1362 #if USE_V3D_SIMULATOR
1363 device->sim_file = v3d_simulator_init(render_fd);
1364 #endif
1365
1366 device->render_fd = render_fd; /* The v3d render node */
1367 device->display_fd = display_fd; /* Master vc4 primary node */
1368
1369 if (!v3d_get_device_info(device->render_fd, &device->devinfo, &v3dv_ioctl)) {
1370 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1371 "Failed to get info from device.");
1372 goto fail;
1373 }
1374
1375 if (device->devinfo.ver < 42) {
1376 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1377 "Device version < 42.");
1378 goto fail;
1379 }
1380
1381 device->caps.cpu_queue =
1382 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE);
1383
1384 device->caps.multisync =
1385 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT);
1386
1387 device->caps.perfmon =
1388 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_PERFMON);
1389
1390 if (!device_has_expected_features(device)) {
1391 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1392 "Kernel driver doesn't have required features.");
1393 goto fail;
1394 }
1395
1396 result = init_uuids(device);
1397 if (result != VK_SUCCESS)
1398 goto fail;
1399
1400 device->compiler = v3d_compiler_init(&device->devinfo,
1401 MAX_INLINE_UNIFORM_BUFFERS);
1402 device->next_program_id = 0;
1403
1404 ASSERTED int len =
1405 asprintf(&device->name, "V3D %d.%d.%d.%d",
1406 device->devinfo.ver / 10,
1407 device->devinfo.ver % 10,
1408 device->devinfo.rev,
1409 device->devinfo.compat_rev);
1410 assert(len != -1);
1411
1412 v3dv_physical_device_init_disk_cache(device);
1413
1414 /* Setup available memory heaps and types */
1415 VkPhysicalDeviceMemoryProperties *mem = &device->memory;
1416 mem->memoryHeapCount = 1;
1417 mem->memoryHeaps[0].size = compute_heap_size();
1418 mem->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1419
1420 /* This is the only combination required by the spec */
1421 mem->memoryTypeCount = 1;
1422 mem->memoryTypes[0].propertyFlags =
1423 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1424 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1425 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1426 mem->memoryTypes[0].heapIndex = 0;
1427
1428 /* Initialize sparse array for refcounting imported BOs */
1429 util_sparse_array_init(&device->bo_map, sizeof(struct v3dv_bo), 512);
1430
1431 device->options.merge_jobs = !V3D_DBG(NO_MERGE_JOBS);
1432
1433 device->drm_syncobj_type = vk_drm_syncobj_get_type(device->render_fd);
1434
1435 /* We don't support timelines in the uAPI yet and we don't want it getting
1436 * suddenly turned on by vk_drm_syncobj_get_type() without us adding v3dv
1437 * code for it first.
1438 */
1439 device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1440
1441 /* Multiwait is required for emulated timeline semaphores and is supported
1442 * by the v3d kernel interface.
1443 */
1444 device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT;
1445
1446 device->sync_timeline_type =
1447 vk_sync_timeline_get_type(&device->drm_syncobj_type);
1448
1449 device->sync_types[0] = &device->drm_syncobj_type;
1450 device->sync_types[1] = &device->sync_timeline_type.sync;
1451 device->sync_types[2] = NULL;
1452 device->vk.supported_sync_types = device->sync_types;
1453
1454 get_device_extensions(device, &device->vk.supported_extensions);
1455 get_features(device, &device->vk.supported_features);
1456 get_device_properties(device, &device->vk.properties);
1457
1458 result = v3dv_wsi_init(device);
1459 if (result != VK_SUCCESS) {
1460 vk_error(instance, result);
1461 goto fail;
1462 }
1463
1464 mtx_init(&device->mutex, mtx_plain);
1465
1466 list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1467
1468 return VK_SUCCESS;
1469
1470 fail:
1471 vk_physical_device_finish(&device->vk);
1472 vk_free(&instance->vk.alloc, device);
1473
1474 if (render_fd >= 0)
1475 close(render_fd);
1476 if (display_fd >= 0)
1477 close(display_fd);
1478
1479 return result;
1480 }
1481
1482 /* This driver hook is expected to return VK_SUCCESS (unless a memory
1483 * allocation error happened) if no compatible device is found. If a
1484 * compatible device is found, it may return an error code if device
1485 * inialization failed.
1486 */
1487 static VkResult
enumerate_devices(struct vk_instance * vk_instance)1488 enumerate_devices(struct vk_instance *vk_instance)
1489 {
1490 struct v3dv_instance *instance =
1491 container_of(vk_instance, struct v3dv_instance, vk);
1492
1493 /* FIXME: Check for more devices? */
1494 drmDevicePtr devices[8];
1495 int max_devices;
1496
1497 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1498 if (max_devices < 1)
1499 return VK_SUCCESS;
1500
1501 VkResult result = VK_SUCCESS;
1502
1503 #if !USE_V3D_SIMULATOR
1504 int32_t v3d_idx = -1;
1505 int32_t vc4_idx = -1;
1506 #endif
1507 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1508 #if USE_V3D_SIMULATOR
1509 /* In the simulator, we look for an Intel/AMD render node */
1510 const int required_nodes = (1 << DRM_NODE_RENDER) | (1 << DRM_NODE_PRIMARY);
1511 if ((devices[i]->available_nodes & required_nodes) == required_nodes &&
1512 devices[i]->bustype == DRM_BUS_PCI &&
1513 (devices[i]->deviceinfo.pci->vendor_id == 0x8086 ||
1514 devices[i]->deviceinfo.pci->vendor_id == 0x1002)) {
1515 result = create_physical_device(instance, devices[i], NULL);
1516 if (result == VK_SUCCESS)
1517 break;
1518 }
1519 #else
1520 /* On actual hardware, we should have a gpu device (v3d) and a display
1521 * device (vc4). We will need to use the display device to allocate WSI
1522 * buffers and share them with the render node via prime, but that is a
1523 * privileged operation so we need t have an authenticated display fd
1524 * and for that we need the display server to provide the it (with DRI3),
1525 * so here we only check that the device is present but we don't try to
1526 * open it.
1527 */
1528 if (devices[i]->bustype != DRM_BUS_PLATFORM)
1529 continue;
1530
1531 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) {
1532 char **compat = devices[i]->deviceinfo.platform->compatible;
1533 while (*compat) {
1534 if (strncmp(*compat, "brcm,2711-v3d", 13) == 0 ||
1535 strncmp(*compat, "brcm,2712-v3d", 13) == 0) {
1536 v3d_idx = i;
1537 break;
1538 }
1539 compat++;
1540 }
1541 } else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
1542 char **compat = devices[i]->deviceinfo.platform->compatible;
1543 while (*compat) {
1544 if (strncmp(*compat, "brcm,bcm2712-vc6", 16) == 0 ||
1545 strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
1546 strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0) {
1547 vc4_idx = i;
1548 break;
1549 }
1550 compat++;
1551 }
1552 }
1553 #endif
1554 }
1555
1556 #if !USE_V3D_SIMULATOR
1557 if (v3d_idx != -1) {
1558 drmDevicePtr v3d_device = devices[v3d_idx];
1559 drmDevicePtr vc4_device = vc4_idx != -1 ? devices[vc4_idx] : NULL;
1560 result = create_physical_device(instance, v3d_device, vc4_device);
1561 }
1562 #endif
1563
1564 drmFreeDevices(devices, max_devices);
1565
1566 return result;
1567 }
1568
1569 uint32_t
v3dv_physical_device_vendor_id(const struct v3dv_physical_device * dev)1570 v3dv_physical_device_vendor_id(const struct v3dv_physical_device *dev)
1571 {
1572 return 0x14E4; /* Broadcom */
1573 }
1574
1575 uint32_t
v3dv_physical_device_device_id(const struct v3dv_physical_device * dev)1576 v3dv_physical_device_device_id(const struct v3dv_physical_device *dev)
1577 {
1578 #if USE_V3D_SIMULATOR
1579 return dev->device_id;
1580 #else
1581 switch (dev->devinfo.ver) {
1582 case 42:
1583 return 0xBE485FD3; /* Broadcom deviceID for 2711 */
1584 case 71:
1585 return 0x55701C33; /* Broadcom deviceID for 2712 */
1586 default:
1587 unreachable("Unsupported V3D version");
1588 }
1589 #endif
1590 }
1591
1592 /* We support exactly one queue family. */
1593 static const VkQueueFamilyProperties
1594 v3dv_queue_family_properties = {
1595 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1596 VK_QUEUE_COMPUTE_BIT |
1597 VK_QUEUE_TRANSFER_BIT,
1598 .queueCount = 1,
1599 .timestampValidBits = 64,
1600 .minImageTransferGranularity = { 1, 1, 1 },
1601 };
1602
1603 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1604 v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
1605 uint32_t *pQueueFamilyPropertyCount,
1606 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1607 {
1608 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1609 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1610
1611 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1612 p->queueFamilyProperties = v3dv_queue_family_properties;
1613
1614 vk_foreach_struct(s, p->pNext) {
1615 vk_debug_ignored_stype(s->sType);
1616 }
1617 }
1618 }
1619
1620 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)1621 v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
1622 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1623 {
1624 V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
1625 *pMemoryProperties = device->memory;
1626 }
1627
1628 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1629 v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
1630 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1631 {
1632 V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
1633
1634 v3dv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1635 &pMemoryProperties->memoryProperties);
1636
1637 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1638 switch (ext->sType) {
1639 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1640 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p =
1641 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1642 p->heapUsage[0] = device->heap_used;
1643 p->heapBudget[0] = compute_memory_budget(device);
1644
1645 /* The heapBudget and heapUsage values must be zero for array elements
1646 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1647 */
1648 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1649 p->heapBudget[i] = 0u;
1650 p->heapUsage[i] = 0u;
1651 }
1652 break;
1653 }
1654 default:
1655 vk_debug_ignored_stype(ext->sType);
1656 break;
1657 }
1658 }
1659 }
1660
1661 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_GetInstanceProcAddr(VkInstance _instance,const char * pName)1662 v3dv_GetInstanceProcAddr(VkInstance _instance,
1663 const char *pName)
1664 {
1665 V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
1666 return vk_instance_get_proc_addr(instance ? &instance->vk : NULL,
1667 &v3dv_instance_entrypoints,
1668 pName);
1669 }
1670
1671 /* With version 1+ of the loader interface the ICD should expose
1672 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
1673 */
1674 PUBLIC
1675 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1676 vk_icdGetInstanceProcAddr(VkInstance instance,
1677 const char* pName)
1678 {
1679 return v3dv_GetInstanceProcAddr(instance, pName);
1680 }
1681
1682 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1683 v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1684 VkLayerProperties *pProperties)
1685 {
1686 if (pProperties == NULL) {
1687 *pPropertyCount = 0;
1688 return VK_SUCCESS;
1689 }
1690
1691 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1692 }
1693
1694 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)1695 v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
1696 uint32_t *pPropertyCount,
1697 VkLayerProperties *pProperties)
1698 {
1699 V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
1700
1701 if (pProperties == NULL) {
1702 *pPropertyCount = 0;
1703 return VK_SUCCESS;
1704 }
1705
1706 return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT);
1707 }
1708
1709 static void
destroy_queue_syncs(struct v3dv_queue * queue)1710 destroy_queue_syncs(struct v3dv_queue *queue)
1711 {
1712 for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
1713 if (queue->last_job_syncs.syncs[i]) {
1714 drmSyncobjDestroy(queue->device->pdevice->render_fd,
1715 queue->last_job_syncs.syncs[i]);
1716 }
1717 }
1718 }
1719
1720 static VkResult
queue_init(struct v3dv_device * device,struct v3dv_queue * queue,const VkDeviceQueueCreateInfo * create_info,uint32_t index_in_family)1721 queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
1722 const VkDeviceQueueCreateInfo *create_info,
1723 uint32_t index_in_family)
1724 {
1725 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info,
1726 index_in_family);
1727 if (result != VK_SUCCESS)
1728 return result;
1729
1730 result = vk_queue_enable_submit_thread(&queue->vk);
1731 if (result != VK_SUCCESS)
1732 goto fail_submit_thread;
1733
1734 queue->device = device;
1735 queue->vk.driver_submit = v3dv_queue_driver_submit;
1736
1737 for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
1738 queue->last_job_syncs.first[i] = true;
1739 int ret = drmSyncobjCreate(device->pdevice->render_fd,
1740 DRM_SYNCOBJ_CREATE_SIGNALED,
1741 &queue->last_job_syncs.syncs[i]);
1742 if (ret) {
1743 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1744 "syncobj create failed: %m");
1745 goto fail_last_job_syncs;
1746 }
1747 }
1748
1749 queue->noop_job = NULL;
1750 return VK_SUCCESS;
1751
1752 fail_last_job_syncs:
1753 destroy_queue_syncs(queue);
1754 fail_submit_thread:
1755 vk_queue_finish(&queue->vk);
1756 return result;
1757 }
1758
1759 static void
queue_finish(struct v3dv_queue * queue)1760 queue_finish(struct v3dv_queue *queue)
1761 {
1762 if (queue->noop_job)
1763 v3dv_job_destroy(queue->noop_job);
1764 destroy_queue_syncs(queue);
1765 vk_queue_finish(&queue->vk);
1766 }
1767
1768 static void
init_device_meta(struct v3dv_device * device)1769 init_device_meta(struct v3dv_device *device)
1770 {
1771 mtx_init(&device->meta.mtx, mtx_plain);
1772 v3dv_meta_clear_init(device);
1773 v3dv_meta_blit_init(device);
1774 v3dv_meta_texel_buffer_copy_init(device);
1775 }
1776
1777 static void
destroy_device_meta(struct v3dv_device * device)1778 destroy_device_meta(struct v3dv_device *device)
1779 {
1780 mtx_destroy(&device->meta.mtx);
1781 v3dv_meta_clear_finish(device);
1782 v3dv_meta_blit_finish(device);
1783 v3dv_meta_texel_buffer_copy_finish(device);
1784 }
1785
1786 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1787 v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
1788 const VkDeviceCreateInfo *pCreateInfo,
1789 const VkAllocationCallbacks *pAllocator,
1790 VkDevice *pDevice)
1791 {
1792 V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
1793 struct v3dv_instance *instance = (struct v3dv_instance*) physical_device->vk.instance;
1794 VkResult result;
1795 struct v3dv_device *device;
1796
1797 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1798
1799 /* Check requested queues (we only expose one queue ) */
1800 assert(pCreateInfo->queueCreateInfoCount == 1);
1801 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1802 assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
1803 assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
1804 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
1805 return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
1806 }
1807
1808 device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
1809 sizeof(*device), 8,
1810 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1811 if (!device)
1812 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1813
1814 struct vk_device_dispatch_table dispatch_table;
1815 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1816 &v3dv_device_entrypoints, true);
1817 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1818 &wsi_device_entrypoints, false);
1819 result = vk_device_init(&device->vk, &physical_device->vk,
1820 &dispatch_table, pCreateInfo, pAllocator);
1821 if (result != VK_SUCCESS) {
1822 vk_free(&device->vk.alloc, device);
1823 return vk_error(NULL, result);
1824 }
1825
1826 device->instance = instance;
1827 device->pdevice = physical_device;
1828
1829 mtx_init(&device->query_mutex, mtx_plain);
1830 cnd_init(&device->query_ended);
1831
1832 device->vk.command_buffer_ops = &v3dv_cmd_buffer_ops;
1833
1834 vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
1835 vk_device_enable_threaded_submit(&device->vk);
1836
1837 result = queue_init(device, &device->queue,
1838 pCreateInfo->pQueueCreateInfos, 0);
1839 if (result != VK_SUCCESS)
1840 goto fail;
1841
1842 device->devinfo = physical_device->devinfo;
1843
1844 if (device->vk.enabled_features.robustBufferAccess)
1845 perf_debug("Device created with Robust Buffer Access enabled.\n");
1846
1847 if (device->vk.enabled_features.robustImageAccess)
1848 perf_debug("Device created with Robust Image Access enabled.\n");
1849
1850
1851 #if MESA_DEBUG
1852 v3dv_X(device, device_check_prepacked_sizes)();
1853 #endif
1854 init_device_meta(device);
1855 v3dv_bo_cache_init(device);
1856 v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
1857 device->instance->default_pipeline_cache_enabled);
1858 device->default_attribute_float =
1859 v3dv_X(device, create_default_attribute_values)(device, NULL);
1860
1861 device->device_address_mem_ctx = ralloc_context(NULL);
1862 util_dynarray_init(&device->device_address_bo_list,
1863 device->device_address_mem_ctx);
1864
1865 mtx_init(&device->events.lock, mtx_plain);
1866 result = v3dv_event_allocate_resources(device);
1867 if (result != VK_SUCCESS)
1868 goto fail;
1869
1870 if (list_is_empty(&device->events.free_list)) {
1871 result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1872 goto fail;
1873 }
1874
1875 result = v3dv_query_allocate_resources(device);
1876 if (result != VK_SUCCESS)
1877 goto fail;
1878
1879 *pDevice = v3dv_device_to_handle(device);
1880
1881 return VK_SUCCESS;
1882
1883 fail:
1884 cnd_destroy(&device->query_ended);
1885 mtx_destroy(&device->query_mutex);
1886 queue_finish(&device->queue);
1887 destroy_device_meta(device);
1888 v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
1889 v3dv_event_free_resources(device);
1890 v3dv_query_free_resources(device);
1891 vk_device_finish(&device->vk);
1892 vk_free(&device->vk.alloc, device);
1893
1894 return result;
1895 }
1896
1897 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1898 v3dv_DestroyDevice(VkDevice _device,
1899 const VkAllocationCallbacks *pAllocator)
1900 {
1901 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1902
1903 device->vk.dispatch_table.DeviceWaitIdle(_device);
1904 queue_finish(&device->queue);
1905
1906 v3dv_event_free_resources(device);
1907 mtx_destroy(&device->events.lock);
1908
1909 v3dv_query_free_resources(device);
1910
1911 destroy_device_meta(device);
1912 v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
1913
1914 if (device->default_attribute_float) {
1915 v3dv_bo_free(device, device->default_attribute_float);
1916 device->default_attribute_float = NULL;
1917 }
1918
1919 ralloc_free(device->device_address_mem_ctx);
1920
1921 /* Bo cache should be removed the last, as any other object could be
1922 * freeing their private bos
1923 */
1924 v3dv_bo_cache_destroy(device);
1925
1926 cnd_destroy(&device->query_ended);
1927 mtx_destroy(&device->query_mutex);
1928
1929 vk_device_finish(&device->vk);
1930 vk_free2(&device->vk.alloc, pAllocator, device);
1931 }
1932
1933 static VkResult
device_alloc(struct v3dv_device * device,struct v3dv_device_memory * mem,VkDeviceSize size)1934 device_alloc(struct v3dv_device *device,
1935 struct v3dv_device_memory *mem,
1936 VkDeviceSize size)
1937 {
1938 /* Our kernel interface is 32-bit */
1939 assert(size <= UINT32_MAX);
1940
1941 mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
1942 if (!mem->bo)
1943 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1944
1945 return VK_SUCCESS;
1946 }
1947
1948 static void
device_free_wsi_dumb(int32_t display_fd,int32_t dumb_handle)1949 device_free_wsi_dumb(int32_t display_fd, int32_t dumb_handle)
1950 {
1951 assert(display_fd != -1);
1952 if (dumb_handle < 0)
1953 return;
1954
1955 struct drm_mode_destroy_dumb destroy_dumb = {
1956 .handle = dumb_handle,
1957 };
1958 if (v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb)) {
1959 fprintf(stderr, "destroy dumb object %d: %s\n", dumb_handle, strerror(errno));
1960 }
1961 }
1962
1963 static void
device_free(struct v3dv_device * device,struct v3dv_device_memory * mem)1964 device_free(struct v3dv_device *device, struct v3dv_device_memory *mem)
1965 {
1966 /* If this memory allocation was for WSI, then we need to use the
1967 * display device to free the allocated dumb BO.
1968 */
1969 if (mem->is_for_wsi) {
1970 device_free_wsi_dumb(device->pdevice->display_fd, mem->bo->dumb_handle);
1971 }
1972
1973 p_atomic_add(&device->pdevice->heap_used, -((int64_t)mem->bo->size));
1974
1975 v3dv_bo_free(device, mem->bo);
1976 }
1977
1978 static void
device_unmap(struct v3dv_device * device,struct v3dv_device_memory * mem)1979 device_unmap(struct v3dv_device *device, struct v3dv_device_memory *mem)
1980 {
1981 assert(mem && mem->bo->map && mem->bo->map_size > 0);
1982 v3dv_bo_unmap(device, mem->bo);
1983 }
1984
1985 static VkResult
device_map(struct v3dv_device * device,struct v3dv_device_memory * mem)1986 device_map(struct v3dv_device *device, struct v3dv_device_memory *mem)
1987 {
1988 assert(mem && mem->bo);
1989
1990 /* From the spec:
1991 *
1992 * "After a successful call to vkMapMemory the memory object memory is
1993 * considered to be currently host mapped. It is an application error to
1994 * call vkMapMemory on a memory object that is already host mapped."
1995 *
1996 * We are not concerned with this ourselves (validation layers should
1997 * catch these errors and warn users), however, the driver may internally
1998 * map things (for example for debug CLIF dumps or some CPU-side operations)
1999 * so by the time the user calls here the buffer might already been mapped
2000 * internally by the driver.
2001 */
2002 if (mem->bo->map) {
2003 assert(mem->bo->map_size == mem->bo->size);
2004 return VK_SUCCESS;
2005 }
2006
2007 bool ok = v3dv_bo_map(device, mem->bo, mem->bo->size);
2008 if (!ok)
2009 return VK_ERROR_MEMORY_MAP_FAILED;
2010
2011 return VK_SUCCESS;
2012 }
2013
2014 static VkResult
device_import_bo(struct v3dv_device * device,const VkAllocationCallbacks * pAllocator,int fd,uint64_t size,struct v3dv_bo ** bo)2015 device_import_bo(struct v3dv_device *device,
2016 const VkAllocationCallbacks *pAllocator,
2017 int fd, uint64_t size,
2018 struct v3dv_bo **bo)
2019 {
2020 *bo = NULL;
2021
2022 off_t real_size = lseek(fd, 0, SEEK_END);
2023 lseek(fd, 0, SEEK_SET);
2024 if (real_size < 0 || (uint64_t) real_size < size)
2025 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2026
2027 int render_fd = device->pdevice->render_fd;
2028 assert(render_fd >= 0);
2029
2030 int ret;
2031 uint32_t handle;
2032 ret = drmPrimeFDToHandle(render_fd, fd, &handle);
2033 if (ret)
2034 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2035
2036 struct drm_v3d_get_bo_offset get_offset = {
2037 .handle = handle,
2038 };
2039 ret = v3dv_ioctl(render_fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get_offset);
2040 if (ret)
2041 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2042 assert(get_offset.offset != 0);
2043
2044 *bo = v3dv_device_lookup_bo(device->pdevice, handle);
2045 assert(*bo);
2046
2047 if ((*bo)->refcnt == 0)
2048 v3dv_bo_init_import(*bo, handle, size, get_offset.offset, false);
2049 else
2050 p_atomic_inc(&(*bo)->refcnt);
2051
2052 return VK_SUCCESS;
2053 }
2054
2055 static VkResult
device_alloc_for_wsi(struct v3dv_device * device,const VkAllocationCallbacks * pAllocator,struct v3dv_device_memory * mem,VkDeviceSize size)2056 device_alloc_for_wsi(struct v3dv_device *device,
2057 const VkAllocationCallbacks *pAllocator,
2058 struct v3dv_device_memory *mem,
2059 VkDeviceSize size)
2060 {
2061 /* In the simulator we can get away with a regular allocation since both
2062 * allocation and rendering happen in the same DRM render node. On actual
2063 * hardware we need to allocate our winsys BOs on the vc4 display device
2064 * and import them into v3d.
2065 */
2066 #if USE_V3D_SIMULATOR
2067 return device_alloc(device, mem, size);
2068 #else
2069 VkResult result;
2070 struct v3dv_physical_device *pdevice = device->pdevice;
2071 assert(pdevice->display_fd != -1);
2072
2073 mem->is_for_wsi = true;
2074
2075 int display_fd = pdevice->display_fd;
2076 struct drm_mode_create_dumb create_dumb = {
2077 .width = 1024, /* one page */
2078 .height = align(size, 4096) / 4096,
2079 .bpp = util_format_get_blocksizebits(PIPE_FORMAT_RGBA8888_UNORM),
2080 };
2081
2082 int err;
2083 err = v3dv_ioctl(display_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
2084 if (err < 0)
2085 goto fail_create;
2086
2087 int fd;
2088 err =
2089 drmPrimeHandleToFD(display_fd, create_dumb.handle, O_CLOEXEC, &fd);
2090 if (err < 0)
2091 goto fail_export;
2092
2093 result = device_import_bo(device, pAllocator, fd, size, &mem->bo);
2094 close(fd);
2095 if (result != VK_SUCCESS)
2096 goto fail_import;
2097
2098 mem->bo->dumb_handle = create_dumb.handle;
2099 return VK_SUCCESS;
2100
2101 fail_import:
2102 fail_export:
2103 device_free_wsi_dumb(display_fd, create_dumb.handle);
2104
2105 fail_create:
2106 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2107 #endif
2108 }
2109
2110 static void
device_add_device_address_bo(struct v3dv_device * device,struct v3dv_bo * bo)2111 device_add_device_address_bo(struct v3dv_device *device,
2112 struct v3dv_bo *bo)
2113 {
2114 util_dynarray_append(&device->device_address_bo_list,
2115 struct v3dv_bo *,
2116 bo);
2117 }
2118
2119 static void
device_remove_device_address_bo(struct v3dv_device * device,struct v3dv_bo * bo)2120 device_remove_device_address_bo(struct v3dv_device *device,
2121 struct v3dv_bo *bo)
2122 {
2123 util_dynarray_delete_unordered(&device->device_address_bo_list,
2124 struct v3dv_bo *,
2125 bo);
2126 }
2127
2128 static void
free_memory(struct v3dv_device * device,struct v3dv_device_memory * mem,const VkAllocationCallbacks * pAllocator)2129 free_memory(struct v3dv_device *device,
2130 struct v3dv_device_memory *mem,
2131 const VkAllocationCallbacks *pAllocator)
2132 {
2133 if (mem == NULL)
2134 return;
2135
2136 if (mem->bo->map)
2137 device_unmap(device, mem);
2138
2139 if (mem->is_for_device_address)
2140 device_remove_device_address_bo(device, mem->bo);
2141
2142 device_free(device, mem);
2143
2144 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2145 }
2146
2147 VKAPI_ATTR void VKAPI_CALL
v3dv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2148 v3dv_FreeMemory(VkDevice _device,
2149 VkDeviceMemory _mem,
2150 const VkAllocationCallbacks *pAllocator)
2151 {
2152 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2153 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _mem);
2154 free_memory(device, mem, pAllocator);
2155 }
2156
2157 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2158 v3dv_AllocateMemory(VkDevice _device,
2159 const VkMemoryAllocateInfo *pAllocateInfo,
2160 const VkAllocationCallbacks *pAllocator,
2161 VkDeviceMemory *pMem)
2162 {
2163 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2164 struct v3dv_device_memory *mem;
2165 struct v3dv_physical_device *pdevice = device->pdevice;
2166
2167 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2168
2169 /* We always allocate device memory in multiples of a page, so round up
2170 * requested size to that.
2171 */
2172 const VkDeviceSize alloc_size = align64(pAllocateInfo->allocationSize, 4096);
2173
2174 if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE))
2175 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2176
2177 uint64_t heap_used = p_atomic_read(&pdevice->heap_used);
2178 if (unlikely(heap_used + alloc_size > pdevice->memory.memoryHeaps[0].size))
2179 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2180
2181 mem = vk_device_memory_create(&device->vk, pAllocateInfo,
2182 pAllocator, sizeof(*mem));
2183 if (mem == NULL)
2184 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2185
2186 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.memoryTypeCount);
2187 mem->type = &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex];
2188 mem->is_for_wsi = false;
2189
2190 const struct wsi_memory_allocate_info *wsi_info = NULL;
2191 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2192 const VkMemoryAllocateFlagsInfo *flags_info = NULL;
2193 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
2194 switch ((unsigned)ext->sType) {
2195 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2196 wsi_info = (void *)ext;
2197 break;
2198 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2199 fd_info = (void *)ext;
2200 break;
2201 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
2202 flags_info = (void *)ext;
2203 break;
2204 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
2205 /* We don't have particular optimizations associated with memory
2206 * allocations that won't be suballocated to multiple resources.
2207 */
2208 break;
2209 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2210 /* The mask of handle types specified here must be supported
2211 * according to VkExternalImageFormatProperties, so it must be
2212 * fd or dmabuf, which don't have special requirements for us.
2213 */
2214 break;
2215 default:
2216 vk_debug_ignored_stype(ext->sType);
2217 break;
2218 }
2219 }
2220
2221 VkResult result;
2222
2223 if (wsi_info) {
2224 result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size);
2225 } else if (fd_info && fd_info->handleType) {
2226 assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2227 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2228 result = device_import_bo(device, pAllocator,
2229 fd_info->fd, alloc_size, &mem->bo);
2230 if (result == VK_SUCCESS)
2231 close(fd_info->fd);
2232 } else if (mem->vk.ahardware_buffer) {
2233 #if DETECT_OS_ANDROID
2234 const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2235 assert(handle->numFds > 0);
2236 size_t size = lseek(handle->data[0], 0, SEEK_END);
2237 result = device_import_bo(device, pAllocator,
2238 handle->data[0], size, &mem->bo);
2239 #else
2240 result = VK_ERROR_FEATURE_NOT_PRESENT;
2241 #endif
2242 } else {
2243 result = device_alloc(device, mem, alloc_size);
2244 }
2245
2246 if (result != VK_SUCCESS) {
2247 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2248 return vk_error(device, result);
2249 }
2250
2251 heap_used = p_atomic_add_return(&pdevice->heap_used, mem->bo->size);
2252 if (heap_used > pdevice->memory.memoryHeaps[0].size) {
2253 free_memory(device, mem, pAllocator);
2254 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2255 }
2256
2257 /* If this memory can be used via VK_KHR_buffer_device_address then we
2258 * will need to manually add the BO to any job submit that makes use of
2259 * VK_KHR_buffer_device_address, since such jobs may produce buffer
2260 * load/store operations that may access any buffer memory allocated with
2261 * this flag and we don't have any means to tell which buffers will be
2262 * accessed through this mechanism since they don't even have to be bound
2263 * through descriptor state.
2264 */
2265 if (flags_info &&
2266 (flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)) {
2267 mem->is_for_device_address = true;
2268 device_add_device_address_bo(device, mem->bo);
2269 }
2270
2271 *pMem = v3dv_device_memory_to_handle(mem);
2272 return result;
2273 }
2274
2275 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2276 v3dv_MapMemory(VkDevice _device,
2277 VkDeviceMemory _memory,
2278 VkDeviceSize offset,
2279 VkDeviceSize size,
2280 VkMemoryMapFlags flags,
2281 void **ppData)
2282 {
2283 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2284 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
2285
2286 if (mem == NULL) {
2287 *ppData = NULL;
2288 return VK_SUCCESS;
2289 }
2290
2291 assert(offset < mem->bo->size);
2292
2293 /* Since the driver can map BOs internally as well and the mapped range
2294 * required by the user or the driver might not be the same, we always map
2295 * the entire BO and then add the requested offset to the start address
2296 * of the mapped region.
2297 */
2298 VkResult result = device_map(device, mem);
2299 if (result != VK_SUCCESS)
2300 return vk_error(device, result);
2301
2302 *ppData = ((uint8_t *) mem->bo->map) + offset;
2303 return VK_SUCCESS;
2304 }
2305
2306 VKAPI_ATTR void VKAPI_CALL
v3dv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2307 v3dv_UnmapMemory(VkDevice _device,
2308 VkDeviceMemory _memory)
2309 {
2310 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2311 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
2312
2313 if (mem == NULL)
2314 return;
2315
2316 device_unmap(device, mem);
2317 }
2318
2319 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2320 v3dv_FlushMappedMemoryRanges(VkDevice _device,
2321 uint32_t memoryRangeCount,
2322 const VkMappedMemoryRange *pMemoryRanges)
2323 {
2324 return VK_SUCCESS;
2325 }
2326
2327 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2328 v3dv_InvalidateMappedMemoryRanges(VkDevice _device,
2329 uint32_t memoryRangeCount,
2330 const VkMappedMemoryRange *pMemoryRanges)
2331 {
2332 return VK_SUCCESS;
2333 }
2334
2335 static void
get_image_memory_requirements(struct v3dv_image * image,VkImageAspectFlagBits planeAspect,VkMemoryRequirements2 * pMemoryRequirements)2336 get_image_memory_requirements(struct v3dv_image *image,
2337 VkImageAspectFlagBits planeAspect,
2338 VkMemoryRequirements2 *pMemoryRequirements)
2339 {
2340 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2341 .memoryTypeBits = 0x1,
2342 .alignment = image->planes[0].alignment,
2343 .size = image->non_disjoint_size
2344 };
2345
2346 if (planeAspect != VK_IMAGE_ASPECT_NONE) {
2347 assert(image->format->plane_count > 1);
2348 /* Disjoint images should have a 0 non_disjoint_size */
2349 assert(!pMemoryRequirements->memoryRequirements.size);
2350
2351 uint8_t plane = v3dv_image_aspect_to_plane(image, planeAspect);
2352
2353 VkMemoryRequirements *mem_reqs =
2354 &pMemoryRequirements->memoryRequirements;
2355 mem_reqs->alignment = image->planes[plane].alignment;
2356 mem_reqs->size = image->planes[plane].size;
2357 }
2358
2359 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2360 switch (ext->sType) {
2361 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2362 VkMemoryDedicatedRequirements *req =
2363 (VkMemoryDedicatedRequirements *) ext;
2364 req->requiresDedicatedAllocation = image->vk.external_handle_types != 0;
2365 req->prefersDedicatedAllocation = image->vk.external_handle_types != 0;
2366 break;
2367 }
2368 default:
2369 vk_debug_ignored_stype(ext->sType);
2370 break;
2371 }
2372 }
2373 }
2374
2375 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2376 v3dv_GetImageMemoryRequirements2(VkDevice device,
2377 const VkImageMemoryRequirementsInfo2 *pInfo,
2378 VkMemoryRequirements2 *pMemoryRequirements)
2379 {
2380 V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image);
2381
2382 VkImageAspectFlagBits planeAspect = VK_IMAGE_ASPECT_NONE;
2383 vk_foreach_struct_const(ext, pInfo->pNext) {
2384 switch (ext->sType) {
2385 case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: {
2386 VkImagePlaneMemoryRequirementsInfo *req =
2387 (VkImagePlaneMemoryRequirementsInfo *) ext;
2388 planeAspect = req->planeAspect;
2389 break;
2390 }
2391 default:
2392 vk_debug_ignored_stype(ext->sType);
2393 break;
2394 }
2395 }
2396
2397 get_image_memory_requirements(image, planeAspect, pMemoryRequirements);
2398 }
2399
2400 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageMemoryRequirements(VkDevice _device,const VkDeviceImageMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2401 v3dv_GetDeviceImageMemoryRequirements(
2402 VkDevice _device,
2403 const VkDeviceImageMemoryRequirements *pInfo,
2404 VkMemoryRequirements2 *pMemoryRequirements)
2405 {
2406 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2407
2408 struct v3dv_image image = { 0 };
2409 vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo);
2410
2411 ASSERTED VkResult result =
2412 v3dv_image_init(device, pInfo->pCreateInfo, NULL, &image);
2413 assert(result == VK_SUCCESS);
2414
2415 /* From VkDeviceImageMemoryRequirements spec:
2416 *
2417 * " planeAspect is a VkImageAspectFlagBits value specifying the aspect
2418 * corresponding to the image plane to query. This parameter is ignored
2419 * unless pCreateInfo::tiling is
2420 * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, or pCreateInfo::flags has
2421 * VK_IMAGE_CREATE_DISJOINT_BIT set"
2422 *
2423 * We need to explicitly ignore that flag, or following asserts could be
2424 * triggered.
2425 */
2426 VkImageAspectFlagBits planeAspect =
2427 pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT ||
2428 pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT ?
2429 pInfo->planeAspect : 0;
2430
2431 get_image_memory_requirements(&image, planeAspect, pMemoryRequirements);
2432 }
2433
2434 static void
bind_image_memory(const VkBindImageMemoryInfo * info)2435 bind_image_memory(const VkBindImageMemoryInfo *info)
2436 {
2437 V3DV_FROM_HANDLE(v3dv_image, image, info->image);
2438 V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
2439
2440 /* Valid usage:
2441 *
2442 * "memoryOffset must be an integer multiple of the alignment member of
2443 * the VkMemoryRequirements structure returned from a call to
2444 * vkGetImageMemoryRequirements with image"
2445 */
2446 assert(info->memoryOffset < mem->bo->size);
2447
2448 uint64_t offset = info->memoryOffset;
2449 if (image->non_disjoint_size) {
2450 /* We only check for plane 0 as it is the only one that actually starts
2451 * at that offset
2452 */
2453 assert(offset % image->planes[0].alignment == 0);
2454 for (uint8_t plane = 0; plane < image->plane_count; plane++) {
2455 image->planes[plane].mem = mem;
2456 image->planes[plane].mem_offset = offset;
2457 }
2458 } else {
2459 const VkBindImagePlaneMemoryInfo *plane_mem_info =
2460 vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
2461 assert(plane_mem_info);
2462
2463 /*
2464 * From VkBindImagePlaneMemoryInfo spec:
2465 *
2466 * "If the image’s tiling is VK_IMAGE_TILING_LINEAR or
2467 * VK_IMAGE_TILING_OPTIMAL, then planeAspect must be a single valid
2468 * format plane for the image"
2469 *
2470 * <skip>
2471 *
2472 * "If the image’s tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
2473 * then planeAspect must be a single valid memory plane for the
2474 * image"
2475 *
2476 * So planeAspect should only refer to one plane.
2477 */
2478 uint8_t plane = v3dv_plane_from_aspect(plane_mem_info->planeAspect);
2479 assert(offset % image->planes[plane].alignment == 0);
2480 image->planes[plane].mem = mem;
2481 image->planes[plane].mem_offset = offset;
2482 }
2483 }
2484
2485 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2486 v3dv_BindImageMemory2(VkDevice _device,
2487 uint32_t bindInfoCount,
2488 const VkBindImageMemoryInfo *pBindInfos)
2489 {
2490 for (uint32_t i = 0; i < bindInfoCount; i++) {
2491 /* This section is removed by the optimizer for non-ANDROID builds */
2492 V3DV_FROM_HANDLE(v3dv_image, image, pBindInfos[i].image);
2493 if (vk_image_is_android_hardware_buffer(&image->vk)) {
2494 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2495 V3DV_FROM_HANDLE(v3dv_device_memory, mem, pBindInfos[i].memory);
2496
2497 VkImageDrmFormatModifierExplicitCreateInfoEXT eci;
2498 VkSubresourceLayout a_plane_layouts[V3DV_MAX_PLANE_COUNT];
2499 VkResult result = vk_android_get_ahb_layout(mem->vk.ahardware_buffer,
2500 &eci, a_plane_layouts,
2501 V3DV_MAX_PLANE_COUNT);
2502 if (result != VK_SUCCESS)
2503 return result;
2504
2505 result = v3dv_update_image_layout(device, image,
2506 eci.drmFormatModifier,
2507 /* disjoint = */ false, &eci);
2508 if (result != VK_SUCCESS)
2509 return result;
2510 }
2511
2512 const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
2513 vk_find_struct_const(pBindInfos->pNext,
2514 BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
2515 if (swapchain_info && swapchain_info->swapchain) {
2516 #if !DETECT_OS_ANDROID
2517 struct v3dv_image *swapchain_image =
2518 v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain,
2519 swapchain_info->imageIndex);
2520 /* Making the assumption that swapchain images are a single plane */
2521 assert(swapchain_image->plane_count == 1);
2522 VkBindImageMemoryInfo swapchain_bind = {
2523 .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
2524 .image = pBindInfos[i].image,
2525 .memory = v3dv_device_memory_to_handle(swapchain_image->planes[0].mem),
2526 .memoryOffset = swapchain_image->planes[0].mem_offset,
2527 };
2528 bind_image_memory(&swapchain_bind);
2529 #endif
2530 } else
2531 {
2532 bind_image_memory(&pBindInfos[i]);
2533 }
2534 }
2535
2536 return VK_SUCCESS;
2537 }
2538
2539 void
v3dv_buffer_init(struct v3dv_device * device,const VkBufferCreateInfo * pCreateInfo,struct v3dv_buffer * buffer,uint32_t alignment)2540 v3dv_buffer_init(struct v3dv_device *device,
2541 const VkBufferCreateInfo *pCreateInfo,
2542 struct v3dv_buffer *buffer,
2543 uint32_t alignment)
2544 {
2545 const VkBufferUsageFlags2CreateInfoKHR *flags2 =
2546 vk_find_struct_const(pCreateInfo->pNext,
2547 BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR);
2548 VkBufferUsageFlags2KHR usage;
2549 if (flags2)
2550 usage = flags2->usage;
2551 else
2552 usage = pCreateInfo->usage;
2553
2554 buffer->size = pCreateInfo->size;
2555 buffer->usage = usage;
2556 buffer->alignment = alignment;
2557 }
2558
2559 static void
get_buffer_memory_requirements(struct v3dv_buffer * buffer,VkMemoryRequirements2 * pMemoryRequirements)2560 get_buffer_memory_requirements(struct v3dv_buffer *buffer,
2561 VkMemoryRequirements2 *pMemoryRequirements)
2562 {
2563 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2564 .memoryTypeBits = 0x1,
2565 .alignment = buffer->alignment,
2566 .size = align64(buffer->size, buffer->alignment),
2567 };
2568
2569 /* UBO and SSBO may be read using ldunifa, which prefetches the next
2570 * 4 bytes after a read. If the buffer's size is exactly a multiple
2571 * of a page size and the shader reads the last 4 bytes with ldunifa
2572 * the prefetching would read out of bounds and cause an MMU error,
2573 * so we allocate extra space to avoid kernel error spamming.
2574 */
2575 bool can_ldunifa = buffer->usage &
2576 (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
2577 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2578 if (can_ldunifa && (buffer->size % 4096 == 0))
2579 pMemoryRequirements->memoryRequirements.size += buffer->alignment;
2580
2581 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2582 switch (ext->sType) {
2583 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2584 VkMemoryDedicatedRequirements *req =
2585 (VkMemoryDedicatedRequirements *) ext;
2586 req->requiresDedicatedAllocation = false;
2587 req->prefersDedicatedAllocation = false;
2588 break;
2589 }
2590 default:
2591 vk_debug_ignored_stype(ext->sType);
2592 break;
2593 }
2594 }
2595 }
2596
2597 VKAPI_ATTR void VKAPI_CALL
v3dv_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2598 v3dv_GetBufferMemoryRequirements2(VkDevice device,
2599 const VkBufferMemoryRequirementsInfo2 *pInfo,
2600 VkMemoryRequirements2 *pMemoryRequirements)
2601 {
2602 V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
2603 get_buffer_memory_requirements(buffer, pMemoryRequirements);
2604 }
2605
2606 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2607 v3dv_GetDeviceBufferMemoryRequirements(
2608 VkDevice _device,
2609 const VkDeviceBufferMemoryRequirements *pInfo,
2610 VkMemoryRequirements2 *pMemoryRequirements)
2611 {
2612 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2613
2614 struct v3dv_buffer buffer = { 0 };
2615 v3dv_buffer_init(device, pInfo->pCreateInfo, &buffer, V3D_NON_COHERENT_ATOM_SIZE);
2616 get_buffer_memory_requirements(&buffer, pMemoryRequirements);
2617 }
2618
2619 void
v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo * info)2620 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info)
2621 {
2622 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer);
2623 V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
2624
2625 /* Valid usage:
2626 *
2627 * "memoryOffset must be an integer multiple of the alignment member of
2628 * the VkMemoryRequirements structure returned from a call to
2629 * vkGetBufferMemoryRequirements with buffer"
2630 */
2631 assert(info->memoryOffset % buffer->alignment == 0);
2632 assert(info->memoryOffset < mem->bo->size);
2633
2634 buffer->mem = mem;
2635 buffer->mem_offset = info->memoryOffset;
2636 }
2637
2638
2639 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2640 v3dv_BindBufferMemory2(VkDevice device,
2641 uint32_t bindInfoCount,
2642 const VkBindBufferMemoryInfo *pBindInfos)
2643 {
2644 for (uint32_t i = 0; i < bindInfoCount; i++)
2645 v3dv_buffer_bind_memory(&pBindInfos[i]);
2646
2647 return VK_SUCCESS;
2648 }
2649
2650 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2651 v3dv_CreateBuffer(VkDevice _device,
2652 const VkBufferCreateInfo *pCreateInfo,
2653 const VkAllocationCallbacks *pAllocator,
2654 VkBuffer *pBuffer)
2655 {
2656 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2657 struct v3dv_buffer *buffer;
2658
2659 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2660 assert(pCreateInfo->usage != 0);
2661
2662 /* We don't support any flags for now */
2663 assert(pCreateInfo->flags == 0);
2664
2665 buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer),
2666 VK_OBJECT_TYPE_BUFFER);
2667 if (buffer == NULL)
2668 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2669
2670 v3dv_buffer_init(device, pCreateInfo, buffer, V3D_NON_COHERENT_ATOM_SIZE);
2671
2672 /* Limit allocations to 32-bit */
2673 const VkDeviceSize aligned_size = align64(buffer->size, buffer->alignment);
2674 if (aligned_size > UINT32_MAX || aligned_size < buffer->size) {
2675 vk_free(&device->vk.alloc, buffer);
2676 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2677 }
2678
2679 *pBuffer = v3dv_buffer_to_handle(buffer);
2680
2681 return VK_SUCCESS;
2682 }
2683
2684 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2685 v3dv_DestroyBuffer(VkDevice _device,
2686 VkBuffer _buffer,
2687 const VkAllocationCallbacks *pAllocator)
2688 {
2689 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2690 V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
2691
2692 if (!buffer)
2693 return;
2694
2695 vk_object_free(&device->vk, pAllocator, buffer);
2696 }
2697
2698 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2699 v3dv_CreateFramebuffer(VkDevice _device,
2700 const VkFramebufferCreateInfo *pCreateInfo,
2701 const VkAllocationCallbacks *pAllocator,
2702 VkFramebuffer *pFramebuffer)
2703 {
2704 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2705 struct v3dv_framebuffer *framebuffer;
2706
2707 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2708
2709 size_t size = sizeof(*framebuffer) +
2710 sizeof(struct v3dv_image_view *) * pCreateInfo->attachmentCount;
2711 framebuffer = vk_object_zalloc(&device->vk, pAllocator, size,
2712 VK_OBJECT_TYPE_FRAMEBUFFER);
2713 if (framebuffer == NULL)
2714 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2715
2716 framebuffer->width = pCreateInfo->width;
2717 framebuffer->height = pCreateInfo->height;
2718 framebuffer->layers = pCreateInfo->layers;
2719 framebuffer->has_edge_padding = true;
2720
2721 const VkFramebufferAttachmentsCreateInfo *imageless =
2722 vk_find_struct_const(pCreateInfo->pNext,
2723 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
2724
2725 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2726 framebuffer->color_attachment_count = 0;
2727 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2728 if (!imageless) {
2729 framebuffer->attachments[i] =
2730 v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]);
2731 if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
2732 framebuffer->color_attachment_count++;
2733 } else {
2734 assert(i < imageless->attachmentImageInfoCount);
2735 if (imageless->pAttachmentImageInfos[i].usage &
2736 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
2737 framebuffer->color_attachment_count++;
2738 }
2739 }
2740 }
2741
2742 *pFramebuffer = v3dv_framebuffer_to_handle(framebuffer);
2743
2744 return VK_SUCCESS;
2745 }
2746
2747 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2748 v3dv_DestroyFramebuffer(VkDevice _device,
2749 VkFramebuffer _fb,
2750 const VkAllocationCallbacks *pAllocator)
2751 {
2752 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2753 V3DV_FROM_HANDLE(v3dv_framebuffer, fb, _fb);
2754
2755 if (!fb)
2756 return;
2757
2758 vk_object_free(&device->vk, pAllocator, fb);
2759 }
2760
2761 void
v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer * cmd_buffer,const VkRenderingInfoKHR * info)2762 v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer,
2763 const VkRenderingInfoKHR *info)
2764 {
2765 struct v3dv_device *device = cmd_buffer->device;
2766
2767 /* Max framebuffer attachments is max_color_RTs + D/S multiplied by two for
2768 * MSAA resolves.
2769 */
2770 const uint32_t max_attachments =
2771 2 * (V3D_MAX_RENDER_TARGETS(device->devinfo.ver) + 1);
2772 const uint32_t attachments_alloc_size =
2773 sizeof(struct v3dv_image_view *) * max_attachments;
2774
2775 /* Only allocate the dynamic framebuffer once and will stay valid
2776 * for the duration of the command buffer.
2777 */
2778 struct v3dv_framebuffer *fb = cmd_buffer->state.dynamic_framebuffer;
2779 if (!fb) {
2780 uint32_t alloc_size = sizeof(struct v3dv_framebuffer) +
2781 attachments_alloc_size;
2782 fb = vk_object_zalloc(&cmd_buffer->device->vk, NULL, alloc_size,
2783 VK_OBJECT_TYPE_FRAMEBUFFER);
2784 if (fb == NULL) {
2785 v3dv_flag_oom(cmd_buffer, NULL);
2786 return;
2787 }
2788 cmd_buffer->state.dynamic_framebuffer = fb;
2789 } else {
2790 memset(fb->attachments, 0, attachments_alloc_size);
2791 }
2792
2793 fb->width = info->renderArea.offset.x + info->renderArea.extent.width;
2794 fb->height = info->renderArea.offset.y + info->renderArea.extent.height;
2795
2796 /* From the Vulkan spec for VkFramebufferCreateInfo:
2797 *
2798 * "If the render pass uses multiview, then layers must be one (...)"
2799 */
2800 fb->layers = info->viewMask == 0 ? info->layerCount : 1;
2801
2802 struct v3dv_render_pass *pass = &cmd_buffer->state.dynamic_pass;
2803 assert(pass->subpass_count == 1 && pass->subpasses);
2804 assert(pass->subpasses[0].color_count == info->colorAttachmentCount);
2805 fb->color_attachment_count = info->colorAttachmentCount;
2806
2807 uint32_t a = 0;
2808 for (int i = 0; i < info->colorAttachmentCount; i++) {
2809 if (info->pColorAttachments[i].imageView == VK_NULL_HANDLE)
2810 continue;
2811 fb->attachments[a++] =
2812 v3dv_image_view_from_handle(info->pColorAttachments[i].imageView);
2813 if (info->pColorAttachments[i].resolveMode != VK_RESOLVE_MODE_NONE) {
2814 fb->attachments[a++] =
2815 v3dv_image_view_from_handle(info->pColorAttachments[i].resolveImageView);
2816 }
2817 }
2818
2819 if ((info->pDepthAttachment && info->pDepthAttachment->imageView) ||
2820 (info->pStencilAttachment && info->pStencilAttachment->imageView)) {
2821 const struct VkRenderingAttachmentInfo *common_ds_info =
2822 (info->pDepthAttachment &&
2823 info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
2824 info->pDepthAttachment :
2825 info->pStencilAttachment;
2826
2827 fb->attachments[a++] =
2828 v3dv_image_view_from_handle(common_ds_info->imageView);
2829
2830 if (common_ds_info->resolveMode != VK_RESOLVE_MODE_NONE) {
2831 fb->attachments[a++] =
2832 v3dv_image_view_from_handle(common_ds_info->resolveImageView);
2833 }
2834 }
2835
2836 assert(a == pass->attachment_count);
2837 fb->attachment_count = a;
2838
2839 /* Dynamic rendering doesn't provide the size of the underlying framebuffer
2840 * so we estimate its size from the render area. This means it is possible
2841 * the underlying attachments are larger and thus we cannot assume we have
2842 * edge padding.
2843 */
2844 fb->has_edge_padding = false;
2845 }
2846
2847 void
v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer * cmd_buffer)2848 v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer)
2849 {
2850 if (!cmd_buffer->state.dynamic_framebuffer)
2851 return;
2852
2853 VkDevice vk_device = v3dv_device_to_handle(cmd_buffer->device);
2854 VkFramebuffer vk_dynamic_fb =
2855 v3dv_framebuffer_to_handle(cmd_buffer->state.dynamic_framebuffer);
2856 v3dv_DestroyFramebuffer(vk_device, vk_dynamic_fb, NULL);
2857 cmd_buffer->state.dynamic_framebuffer = NULL;
2858 }
2859
2860 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2861 v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,
2862 VkExternalMemoryHandleTypeFlagBits handleType,
2863 int fd,
2864 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2865 {
2866 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2867 struct v3dv_physical_device *pdevice = device->pdevice;
2868
2869 switch (handleType) {
2870 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2871 pMemoryFdProperties->memoryTypeBits =
2872 (1 << pdevice->memory.memoryTypeCount) - 1;
2873 return VK_SUCCESS;
2874 default:
2875 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2876 }
2877 }
2878
2879 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2880 v3dv_GetMemoryFdKHR(VkDevice _device,
2881 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2882 int *pFd)
2883 {
2884 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2885 V3DV_FROM_HANDLE(v3dv_device_memory, mem, pGetFdInfo->memory);
2886
2887 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2888 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2889 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2890
2891 int fd, ret;
2892 ret = drmPrimeHandleToFD(device->pdevice->render_fd,
2893 mem->bo->handle,
2894 DRM_CLOEXEC, &fd);
2895 if (ret)
2896 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2897
2898 *pFd = fd;
2899
2900 return VK_SUCCESS;
2901 }
2902
2903 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2904 v3dv_CreateSampler(VkDevice _device,
2905 const VkSamplerCreateInfo *pCreateInfo,
2906 const VkAllocationCallbacks *pAllocator,
2907 VkSampler *pSampler)
2908 {
2909 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2910 struct v3dv_sampler *sampler;
2911
2912 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2913
2914 sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler),
2915 VK_OBJECT_TYPE_SAMPLER);
2916 if (!sampler)
2917 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2918
2919 sampler->plane_count = 1;
2920
2921 sampler->compare_enable = pCreateInfo->compareEnable;
2922 sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates;
2923
2924 const VkSamplerCustomBorderColorCreateInfoEXT *bc_info =
2925 vk_find_struct_const(pCreateInfo->pNext,
2926 SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2927
2928 const VkSamplerYcbcrConversionInfo *ycbcr_conv_info =
2929 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
2930
2931 const struct vk_format_ycbcr_info *ycbcr_info = NULL;
2932
2933 if (ycbcr_conv_info) {
2934 VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, ycbcr_conv_info->conversion);
2935 ycbcr_info = vk_format_get_ycbcr_info(conversion->state.format);
2936 if (ycbcr_info) {
2937 sampler->plane_count = ycbcr_info->n_planes;
2938 sampler->conversion = conversion;
2939 }
2940 }
2941
2942 v3dv_X(device, pack_sampler_state)(device, sampler, pCreateInfo, bc_info);
2943
2944 *pSampler = v3dv_sampler_to_handle(sampler);
2945
2946 return VK_SUCCESS;
2947 }
2948
2949 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2950 v3dv_DestroySampler(VkDevice _device,
2951 VkSampler _sampler,
2952 const VkAllocationCallbacks *pAllocator)
2953 {
2954 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2955 V3DV_FROM_HANDLE(v3dv_sampler, sampler, _sampler);
2956
2957 if (!sampler)
2958 return;
2959
2960 vk_object_free(&device->vk, pAllocator, sampler);
2961 }
2962
2963 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2964 v3dv_GetDeviceMemoryCommitment(VkDevice device,
2965 VkDeviceMemory memory,
2966 VkDeviceSize *pCommittedMemoryInBytes)
2967 {
2968 *pCommittedMemoryInBytes = 0;
2969 }
2970
2971 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)2972 v3dv_GetImageSparseMemoryRequirements(
2973 VkDevice device,
2974 VkImage image,
2975 uint32_t *pSparseMemoryRequirementCount,
2976 VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
2977 {
2978 *pSparseMemoryRequirementCount = 0;
2979 }
2980
2981 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2982 v3dv_GetImageSparseMemoryRequirements2(
2983 VkDevice device,
2984 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2985 uint32_t *pSparseMemoryRequirementCount,
2986 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2987 {
2988 *pSparseMemoryRequirementCount = 0;
2989 }
2990
2991 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageSparseMemoryRequirements(VkDevice device,const VkDeviceImageMemoryRequirements * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2992 v3dv_GetDeviceImageSparseMemoryRequirements(
2993 VkDevice device,
2994 const VkDeviceImageMemoryRequirements *pInfo,
2995 uint32_t *pSparseMemoryRequirementCount,
2996 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2997 {
2998 *pSparseMemoryRequirementCount = 0;
2999 }
3000
3001 VkDeviceAddress
v3dv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3002 v3dv_GetBufferDeviceAddress(VkDevice device,
3003 const VkBufferDeviceAddressInfo *pInfo)
3004 {
3005 V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
3006 return buffer->mem_offset + buffer->mem->bo->offset;
3007 }
3008
3009 uint64_t
v3dv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3010 v3dv_GetBufferOpaqueCaptureAddress(VkDevice device,
3011 const VkBufferDeviceAddressInfo *pInfo)
3012 {
3013 /* Not implemented */
3014 return 0;
3015 }
3016
3017 uint64_t
v3dv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3018 v3dv_GetDeviceMemoryOpaqueCaptureAddress(
3019 VkDevice device,
3020 const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
3021 {
3022 /* Not implemented */
3023 return 0;
3024 }
3025
3026 VkResult
v3dv_create_compute_pipeline_from_nir(struct v3dv_device * device,nir_shader * nir,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)3027 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
3028 nir_shader *nir,
3029 VkPipelineLayout pipeline_layout,
3030 VkPipeline *pipeline)
3031 {
3032 struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
3033
3034 VkPipelineShaderStageCreateInfo set_event_cs_stage = {
3035 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3036 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
3037 .module = vk_shader_module_to_handle(&cs_m),
3038 .pName = "main",
3039 };
3040
3041 VkComputePipelineCreateInfo info = {
3042 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
3043 .stage = set_event_cs_stage,
3044 .layout = pipeline_layout,
3045 };
3046
3047 VkResult result =
3048 v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
3049 1, &info, &device->vk.alloc, pipeline);
3050
3051 return result;
3052 }
3053