1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_device.h"
11
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_query_pool.h"
41 #include "tu_rmv.h"
42 #include "tu_tracepoints.h"
43 #include "tu_wsi.h"
44
45 #if DETECT_OS_ANDROID
46 #include "util/u_gralloc/u_gralloc.h"
47 #include <vndk/hardware_buffer.h>
48 #endif
49
50 uint64_t os_page_size = 4096;
51
52 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)53 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
54 {
55 struct mesa_sha1 ctx;
56 unsigned char sha1[20];
57 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
58 * initialized until after compiler creation so we have to add it to the
59 * shader hash instead, since the compiler is only created with the logical
60 * device.
61 */
62 uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
63 uint16_t family = fd_dev_gpu_id(&device->dev_id);
64
65 memset(uuid, 0, VK_UUID_SIZE);
66 _mesa_sha1_init(&ctx);
67
68 if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
69 return -1;
70
71 _mesa_sha1_update(&ctx, &family, sizeof(family));
72 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
73 _mesa_sha1_final(&ctx, sha1);
74
75 memcpy(uuid, sha1, VK_UUID_SIZE);
76 return 0;
77 }
78
79 #define TU_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
80
81 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)82 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
83 {
84 *pApiVersion = TU_API_VERSION;
85 return VK_SUCCESS;
86 }
87
88 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
89 .KHR_device_group_creation = true,
90 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
91 .KHR_display = true,
92 #endif
93 .KHR_external_fence_capabilities = true,
94 .KHR_external_memory_capabilities = true,
95 .KHR_external_semaphore_capabilities = true,
96 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
97 .KHR_get_display_properties2 = true,
98 #endif
99 .KHR_get_physical_device_properties2 = true,
100 #ifdef TU_USE_WSI_PLATFORM
101 .KHR_get_surface_capabilities2 = true,
102 .KHR_surface = true,
103 .KHR_surface_protected_capabilities = true,
104 #endif
105 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
106 .KHR_wayland_surface = true,
107 #endif
108 #ifdef VK_USE_PLATFORM_XCB_KHR
109 .KHR_xcb_surface = true,
110 #endif
111 #ifdef VK_USE_PLATFORM_XLIB_KHR
112 .KHR_xlib_surface = true,
113 #endif
114 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
115 .EXT_acquire_drm_display = true,
116 #endif
117 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
118 .EXT_acquire_xlib_display = true,
119 #endif
120 .EXT_debug_report = true,
121 .EXT_debug_utils = true,
122 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
123 .EXT_direct_mode_display = true,
124 .EXT_display_surface_counter = true,
125 #endif
126 #ifndef VK_USE_PLATFORM_WIN32_KHR
127 .EXT_headless_surface = true,
128 #endif
129 #ifdef TU_USE_WSI_PLATFORM
130 .EXT_surface_maintenance1 = true,
131 .EXT_swapchain_colorspace = true,
132 #endif
133 } };
134
135 static bool
is_kgsl(struct tu_instance * instance)136 is_kgsl(struct tu_instance *instance)
137 {
138 return strcmp(instance->knl->name, "kgsl") == 0;
139 }
140
141 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)142 get_device_extensions(const struct tu_physical_device *device,
143 struct vk_device_extension_table *ext)
144 {
145 *ext = (struct vk_device_extension_table) { .table = {
146 .KHR_8bit_storage = device->info->a7xx.storage_8bit,
147 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
148 .KHR_bind_memory2 = true,
149 .KHR_buffer_device_address = true,
150 .KHR_copy_commands2 = true,
151 .KHR_create_renderpass2 = true,
152 .KHR_dedicated_allocation = true,
153 .KHR_depth_stencil_resolve = true,
154 .KHR_descriptor_update_template = true,
155 .KHR_device_group = true,
156 .KHR_draw_indirect_count = true,
157 .KHR_driver_properties = true,
158 .KHR_dynamic_rendering = true,
159 .KHR_external_fence = true,
160 .KHR_external_fence_fd = true,
161 .KHR_external_memory = true,
162 .KHR_external_memory_fd = true,
163 .KHR_external_semaphore = true,
164 .KHR_external_semaphore_fd = true,
165 .KHR_format_feature_flags2 = true,
166 .KHR_get_memory_requirements2 = true,
167 .KHR_global_priority = true,
168 .KHR_image_format_list = true,
169 .KHR_imageless_framebuffer = true,
170 #ifdef TU_USE_WSI_PLATFORM
171 .KHR_incremental_present = true,
172 #endif
173 .KHR_index_type_uint8 = true,
174 .KHR_line_rasterization = true,
175 .KHR_load_store_op_none = true,
176 .KHR_maintenance1 = true,
177 .KHR_maintenance2 = true,
178 .KHR_maintenance3 = true,
179 .KHR_maintenance4 = true,
180 .KHR_maintenance5 = true,
181 .KHR_maintenance6 = true,
182 .KHR_map_memory2 = true,
183 .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
184 .KHR_performance_query = TU_DEBUG(PERFC),
185 .KHR_pipeline_executable_properties = true,
186 .KHR_pipeline_library = true,
187 #ifdef TU_USE_WSI_PLATFORM
188 /* Hide these behind dri configs for now since we cannot implement it reliably on
189 * all surfaces yet. There is no surface capability query for present wait/id,
190 * but the feature is useful enough to hide behind an opt-in mechanism for now.
191 * If the instance only enables surface extensions that unconditionally support present wait,
192 * we can also expose the extension that way. */
193 .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
194 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
195 .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
196 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
197 #endif
198 .KHR_push_descriptor = true,
199 .KHR_relaxed_block_layout = true,
200 .KHR_sampler_mirror_clamp_to_edge = true,
201 .KHR_sampler_ycbcr_conversion = true,
202 .KHR_separate_depth_stencil_layouts = true,
203 .KHR_shader_draw_parameters = true,
204 .KHR_shader_expect_assume = true,
205 .KHR_shader_float16_int8 = true,
206 .KHR_shader_float_controls = true,
207 .KHR_shader_float_controls2 = true,
208 .KHR_shader_integer_dot_product = true,
209 .KHR_shader_non_semantic_info = true,
210 .KHR_shader_relaxed_extended_instruction = true,
211 .KHR_shader_subgroup_extended_types = true,
212 .KHR_shader_subgroup_uniform_control_flow = true,
213 .KHR_shader_terminate_invocation = true,
214 .KHR_spirv_1_4 = true,
215 .KHR_storage_buffer_storage_class = true,
216 #ifdef TU_USE_WSI_PLATFORM
217 .KHR_swapchain = true,
218 .KHR_swapchain_mutable_format = true,
219 #endif
220 .KHR_synchronization2 = true,
221 .KHR_timeline_semaphore = true,
222 .KHR_uniform_buffer_standard_layout = true,
223 .KHR_variable_pointers = true,
224 .KHR_vertex_attribute_divisor = true,
225 .KHR_vulkan_memory_model = true,
226 .KHR_workgroup_memory_explicit_layout = true,
227 .KHR_zero_initialize_workgroup_memory = true,
228
229 .EXT_4444_formats = true,
230 .EXT_attachment_feedback_loop_dynamic_state = true,
231 .EXT_attachment_feedback_loop_layout = true,
232 .EXT_border_color_swizzle = true,
233 .EXT_color_write_enable = true,
234 .EXT_conditional_rendering = true,
235 .EXT_custom_border_color = true,
236 .EXT_depth_clamp_zero_one = true,
237 .EXT_depth_clip_control = true,
238 .EXT_depth_clip_enable = true,
239 .EXT_descriptor_buffer = true,
240 .EXT_descriptor_indexing = true,
241 .EXT_device_address_binding_report = true,
242 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
243 .EXT_display_control = true,
244 #endif
245 .EXT_extended_dynamic_state = true,
246 .EXT_extended_dynamic_state2 = true,
247 .EXT_extended_dynamic_state3 = true,
248 .EXT_external_memory_dma_buf = true,
249 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
250 .EXT_fragment_density_map = true,
251 .EXT_global_priority = true,
252 .EXT_global_priority_query = true,
253 .EXT_graphics_pipeline_library = true,
254 .EXT_host_query_reset = true,
255 .EXT_image_2d_view_of_3d = true,
256 .EXT_image_drm_format_modifier = true,
257 .EXT_image_robustness = true,
258 .EXT_image_view_min_lod = true,
259 .EXT_index_type_uint8 = true,
260 .EXT_inline_uniform_block = true,
261 .EXT_legacy_dithering = true,
262 .EXT_legacy_vertex_attributes = true,
263 .EXT_line_rasterization = true,
264 .EXT_load_store_op_none = true,
265 .EXT_map_memory_placed = true,
266 .EXT_memory_budget = true,
267 .EXT_multi_draw = true,
268 .EXT_mutable_descriptor_type = true,
269 .EXT_nested_command_buffer = true,
270 .EXT_non_seamless_cube_map = true,
271 .EXT_physical_device_drm = !is_kgsl(device->instance),
272 .EXT_pipeline_creation_cache_control = true,
273 .EXT_pipeline_creation_feedback = true,
274 .EXT_post_depth_coverage = true,
275 .EXT_primitive_topology_list_restart = true,
276 .EXT_primitives_generated_query = true,
277 .EXT_private_data = true,
278 .EXT_provoking_vertex = true,
279 .EXT_queue_family_foreign = true,
280 .EXT_rasterization_order_attachment_access = true,
281 .EXT_robustness2 = true,
282 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
283 .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
284 .EXT_scalar_block_layout = true,
285 .EXT_separate_stencil_usage = true,
286 .EXT_shader_demote_to_helper_invocation = true,
287 .EXT_shader_module_identifier = true,
288 .EXT_shader_replicated_composites = true,
289 .EXT_shader_stencil_export = true,
290 .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
291 .EXT_subgroup_size_control = true,
292 #ifdef TU_USE_WSI_PLATFORM
293 .EXT_swapchain_maintenance1 = true,
294 #endif
295 .EXT_texel_buffer_alignment = true,
296 .EXT_tooling_info = true,
297 .EXT_transform_feedback = true,
298 .EXT_vertex_attribute_divisor = true,
299 .EXT_vertex_input_dynamic_state = true,
300
301 /* For Graphics Flight Recorder (GFR) */
302 .AMD_buffer_marker = true,
303 .ARM_rasterization_order_attachment_access = true,
304 .GOOGLE_decorate_string = true,
305 .GOOGLE_hlsl_functionality1 = true,
306 .GOOGLE_user_type = true,
307 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
308 .VALVE_mutable_descriptor_type = true,
309 } };
310
311 #if DETECT_OS_ANDROID
312 if (vk_android_get_ugralloc() != NULL) {
313 ext->ANDROID_external_memory_android_hardware_buffer = true,
314 ext->ANDROID_native_buffer = true;
315 }
316 #endif
317 }
318
319 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)320 tu_get_features(struct tu_physical_device *pdevice,
321 struct vk_features *features)
322 {
323 *features = (struct vk_features) { false };
324
325 /* Vulkan 1.0 */
326 features->robustBufferAccess = true;
327 features->fullDrawIndexUint32 = true;
328 features->imageCubeArray = true;
329 features->independentBlend = true;
330 features->geometryShader = true;
331 features->tessellationShader = true;
332 features->sampleRateShading = true;
333 features->dualSrcBlend = true;
334 features->logicOp = true;
335 features->multiDrawIndirect = true;
336 features->drawIndirectFirstInstance = true;
337 features->depthClamp = true;
338 features->depthBiasClamp = true;
339 features->fillModeNonSolid = true;
340 features->depthBounds = true;
341 features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
342 features->largePoints = true;
343 features->alphaToOne = true;
344 features->multiViewport = true;
345 features->samplerAnisotropy = true;
346 features->textureCompressionETC2 = true;
347 features->textureCompressionASTC_LDR = true;
348 features->textureCompressionBC = true;
349 features->occlusionQueryPrecise = true;
350 features->pipelineStatisticsQuery = true;
351 features->vertexPipelineStoresAndAtomics = true;
352 features->fragmentStoresAndAtomics = true;
353 features->shaderTessellationAndGeometryPointSize = true;
354 features->shaderImageGatherExtended = true;
355 features->shaderStorageImageExtendedFormats = true;
356 features->shaderStorageImageMultisample = false;
357 features->shaderStorageImageReadWithoutFormat = true;
358 features->shaderStorageImageWriteWithoutFormat = true;
359 features->shaderUniformBufferArrayDynamicIndexing = true;
360 features->shaderSampledImageArrayDynamicIndexing = true;
361 features->shaderStorageBufferArrayDynamicIndexing = true;
362 features->shaderStorageImageArrayDynamicIndexing = true;
363 features->shaderClipDistance = true;
364 features->shaderCullDistance = true;
365 features->shaderFloat64 = false;
366 features->shaderInt64 = false;
367 features->shaderInt16 = true;
368 features->sparseBinding = false;
369 features->variableMultisampleRate = true;
370 features->inheritedQueries = true;
371
372 /* Vulkan 1.1 */
373 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
374 features->uniformAndStorageBuffer16BitAccess = false;
375 features->storagePushConstant16 = false;
376 features->storageInputOutput16 = false;
377 features->multiview = true;
378 features->multiviewGeometryShader = false;
379 features->multiviewTessellationShader = false;
380 features->variablePointersStorageBuffer = true;
381 features->variablePointers = true;
382 features->protectedMemory = false;
383 features->samplerYcbcrConversion = true;
384 features->shaderDrawParameters = true;
385
386 /* Vulkan 1.2 */
387 features->samplerMirrorClampToEdge = true;
388 features->drawIndirectCount = true;
389 features->storageBuffer8BitAccess = pdevice->info->a7xx.storage_8bit;
390 features->uniformAndStorageBuffer8BitAccess = false;
391 features->storagePushConstant8 = false;
392 features->shaderBufferInt64Atomics = false;
393 features->shaderSharedInt64Atomics = false;
394 features->shaderFloat16 = true;
395 features->shaderInt8 = true;
396
397 features->descriptorIndexing = true;
398 features->shaderInputAttachmentArrayDynamicIndexing = false;
399 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
400 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
401 features->shaderUniformBufferArrayNonUniformIndexing = true;
402 features->shaderSampledImageArrayNonUniformIndexing = true;
403 features->shaderStorageBufferArrayNonUniformIndexing = true;
404 features->shaderStorageImageArrayNonUniformIndexing = true;
405 features->shaderInputAttachmentArrayNonUniformIndexing = false;
406 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
407 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
408 features->descriptorBindingUniformBufferUpdateAfterBind = true;
409 features->descriptorBindingSampledImageUpdateAfterBind = true;
410 features->descriptorBindingStorageImageUpdateAfterBind = true;
411 features->descriptorBindingStorageBufferUpdateAfterBind = true;
412 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
413 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
414 features->descriptorBindingUpdateUnusedWhilePending = true;
415 features->descriptorBindingPartiallyBound = true;
416 features->descriptorBindingVariableDescriptorCount = true;
417 features->runtimeDescriptorArray = true;
418
419 features->samplerFilterMinmax =
420 pdevice->info->a6xx.has_sampler_minmax;
421 features->scalarBlockLayout = true;
422 features->imagelessFramebuffer = true;
423 features->uniformBufferStandardLayout = true;
424 features->shaderSubgroupExtendedTypes = true;
425 features->separateDepthStencilLayouts = true;
426 features->hostQueryReset = true;
427 features->timelineSemaphore = true;
428 features->bufferDeviceAddress = true;
429 features->bufferDeviceAddressCaptureReplay = pdevice->has_set_iova;
430 features->bufferDeviceAddressMultiDevice = false;
431 features->vulkanMemoryModel = true;
432 features->vulkanMemoryModelDeviceScope = true;
433 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
434 features->shaderOutputViewportIndex = true;
435 features->shaderOutputLayer = true;
436 features->subgroupBroadcastDynamicId = true;
437
438 /* Vulkan 1.3 */
439 features->robustImageAccess = true;
440 features->inlineUniformBlock = true;
441 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
442 features->pipelineCreationCacheControl = true;
443 features->privateData = true;
444 features->shaderDemoteToHelperInvocation = true;
445 features->shaderTerminateInvocation = true;
446 features->subgroupSizeControl = true;
447 features->computeFullSubgroups = true;
448 features->synchronization2 = true;
449 features->textureCompressionASTC_HDR = false;
450 features->shaderZeroInitializeWorkgroupMemory = true;
451 features->dynamicRendering = true;
452 features->shaderIntegerDotProduct = true;
453 features->maintenance4 = true;
454
455 /* VK_KHR_index_type_uint8 */
456 features->indexTypeUint8 = true;
457
458 /* VK_KHR_line_rasterization */
459 features->rectangularLines = true;
460 features->bresenhamLines = true;
461 features->smoothLines = false;
462 features->stippledRectangularLines = false;
463 features->stippledBresenhamLines = false;
464 features->stippledSmoothLines = false;
465
466 /* VK_KHR_maintenance5 */
467 features->maintenance5 = true;
468
469 /* VK_KHR_maintenance6 */
470 features->maintenance6 = true;
471
472 /* VK_KHR_performance_query */
473 features->performanceCounterQueryPools = true;
474 features->performanceCounterMultipleQueryPools = false;
475
476 /* VK_KHR_pipeline_executable_properties */
477 features->pipelineExecutableInfo = true;
478
479 /* VK_KHR_present_id */
480 features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
481
482 /* VK_KHR_present_wait */
483 features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
484
485 /* VK_KHR_shader_expect_assume */
486 features->shaderExpectAssume = true;
487
488 /* VK_KHR_shader_float_controls2 */
489 features->shaderFloatControls2 = true;
490
491 /* VK_KHR_shader_subgroup_uniform_control_flow */
492 features->shaderSubgroupUniformControlFlow = true;
493
494 /* VK_KHR_vertex_attribute_divisor */
495 features->vertexAttributeInstanceRateDivisor = true;
496 features->vertexAttributeInstanceRateZeroDivisor = true;
497
498 /* VK_KHR_workgroup_memory_explicit_layout */
499 features->workgroupMemoryExplicitLayout = true;
500 features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
501 features->workgroupMemoryExplicitLayout8BitAccess = true;
502 features->workgroupMemoryExplicitLayout16BitAccess = true;
503
504 /* VK_EXT_4444_formats */
505 features->formatA4R4G4B4 = true;
506 features->formatA4B4G4R4 = true;
507
508 /* VK_EXT_attachment_feedback_loop_dynamic_state */
509 features->attachmentFeedbackLoopDynamicState = true;
510
511 /* VK_EXT_attachment_feedback_loop_layout */
512 features->attachmentFeedbackLoopLayout = true;
513
514 /* VK_EXT_border_color_swizzle */
515 features->borderColorSwizzle = true;
516 features->borderColorSwizzleFromImage = true;
517
518 /* VK_EXT_color_write_enable */
519 features->colorWriteEnable = true;
520
521 /* VK_EXT_conditional_rendering */
522 features->conditionalRendering = true;
523 features->inheritedConditionalRendering = true;
524
525 /* VK_EXT_custom_border_color */
526 features->customBorderColors = true;
527 features->customBorderColorWithoutFormat = true;
528
529 /* VK_EXT_depth_clamp_zero_one */
530 features->depthClampZeroOne = true;
531
532 /* VK_EXT_depth_clip_control */
533 features->depthClipControl = true;
534
535 /* VK_EXT_depth_clip_enable */
536 features->depthClipEnable = true;
537
538 /* VK_EXT_descriptor_buffer */
539 features->descriptorBuffer = true;
540 features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
541 features->descriptorBufferImageLayoutIgnored = true;
542 features->descriptorBufferPushDescriptors = true;
543
544 /* VK_EXT_device_address_binding_report */
545 features->reportAddressBinding = true;
546
547 /* VK_EXT_extended_dynamic_state */
548 features->extendedDynamicState = true;
549
550 /* VK_EXT_extended_dynamic_state2 */
551 features->extendedDynamicState2 = true;
552 features->extendedDynamicState2LogicOp = true;
553 features->extendedDynamicState2PatchControlPoints = true;
554
555 /* VK_EXT_extended_dynamic_state3 */
556 features->extendedDynamicState3PolygonMode = true;
557 features->extendedDynamicState3TessellationDomainOrigin = true;
558 features->extendedDynamicState3DepthClampEnable = true;
559 features->extendedDynamicState3DepthClipEnable = true;
560 features->extendedDynamicState3LogicOpEnable = true;
561 features->extendedDynamicState3SampleMask = true;
562 features->extendedDynamicState3RasterizationSamples = true;
563 features->extendedDynamicState3AlphaToCoverageEnable = true;
564 features->extendedDynamicState3AlphaToOneEnable = true;
565 features->extendedDynamicState3DepthClipNegativeOneToOne = true;
566 features->extendedDynamicState3RasterizationStream = true;
567 features->extendedDynamicState3ConservativeRasterizationMode = false;
568 features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
569 features->extendedDynamicState3LineRasterizationMode = true;
570 features->extendedDynamicState3LineStippleEnable = false;
571 features->extendedDynamicState3ProvokingVertexMode = true;
572 features->extendedDynamicState3SampleLocationsEnable =
573 pdevice->info->a6xx.has_sample_locations;
574 features->extendedDynamicState3ColorBlendEnable = true;
575 features->extendedDynamicState3ColorBlendEquation = true;
576 features->extendedDynamicState3ColorWriteMask = true;
577 features->extendedDynamicState3ViewportWScalingEnable = false;
578 features->extendedDynamicState3ViewportSwizzle = false;
579 features->extendedDynamicState3ShadingRateImageEnable = false;
580 features->extendedDynamicState3CoverageToColorEnable = false;
581 features->extendedDynamicState3CoverageToColorLocation = false;
582 features->extendedDynamicState3CoverageModulationMode = false;
583 features->extendedDynamicState3CoverageModulationTableEnable = false;
584 features->extendedDynamicState3CoverageModulationTable = false;
585 features->extendedDynamicState3CoverageReductionMode = false;
586 features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
587 features->extendedDynamicState3ColorBlendAdvanced = false;
588
589 /* VK_EXT_fragment_density_map */
590 features->fragmentDensityMap = true;
591 features->fragmentDensityMapDynamic = false;
592 features->fragmentDensityMapNonSubsampledImages = true;
593
594 /* VK_EXT_global_priority_query */
595 features->globalPriorityQuery = true;
596
597 /* VK_EXT_graphics_pipeline_library */
598 features->graphicsPipelineLibrary = true;
599
600 /* VK_EXT_image_2d_view_of_3d */
601 features->image2DViewOf3D = true;
602 features->sampler2DViewOf3D = true;
603
604 /* VK_EXT_image_view_min_lod */
605 features->minLod = true;
606
607 /* VK_EXT_legacy_vertex_attributes */
608 features->legacyVertexAttributes = true;
609
610 /* VK_EXT_legacy_dithering */
611 features->legacyDithering = true;
612
613 /* VK_EXT_map_memory_placed */
614 features->memoryMapPlaced = true;
615 features->memoryMapRangePlaced = false;
616 features->memoryUnmapReserve = true;
617
618 /* VK_EXT_multi_draw */
619 features->multiDraw = true;
620
621 /* VK_EXT_mutable_descriptor_type */
622 features->mutableDescriptorType = true;
623
624 /* VK_EXT_nested_command_buffer */
625 features->nestedCommandBuffer = true;
626 features->nestedCommandBufferRendering = true;
627 features->nestedCommandBufferSimultaneousUse = true;
628
629 /* VK_EXT_non_seamless_cube_map */
630 features->nonSeamlessCubeMap = true;
631
632 /* VK_EXT_primitive_topology_list_restart */
633 features->primitiveTopologyListRestart = true;
634 features->primitiveTopologyPatchListRestart = false;
635
636 /* VK_EXT_primitives_generated_query */
637 features->primitivesGeneratedQuery = true;
638 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
639 features->primitivesGeneratedQueryWithNonZeroStreams = false;
640
641 /* VK_EXT_provoking_vertex */
642 features->provokingVertexLast = true;
643
644 /* VK_EXT_rasterization_order_attachment_access */
645 features->rasterizationOrderColorAttachmentAccess = true;
646 features->rasterizationOrderDepthAttachmentAccess = true;
647 features->rasterizationOrderStencilAttachmentAccess = true;
648
649 /* VK_EXT_robustness2 */
650 features->robustBufferAccess2 = true;
651 features->robustImageAccess2 = true;
652 features->nullDescriptor = true;
653
654 /* VK_EXT_shader_module_identifier */
655 features->shaderModuleIdentifier = true;
656
657 /* VK_EXT_shader_replicated_composites */
658 features->shaderReplicatedComposites = true;
659
660 #ifdef TU_USE_WSI_PLATFORM
661 /* VK_EXT_swapchain_maintenance1 */
662 features->swapchainMaintenance1 = true;
663 #endif
664
665 /* VK_EXT_texel_buffer_alignment */
666 features->texelBufferAlignment = true;
667
668 /* VK_EXT_transform_feedback */
669 features->transformFeedback = true;
670 features->geometryStreams = true;
671
672 /* VK_EXT_vertex_input_dynamic_state */
673 features->vertexInputDynamicState = true;
674
675 /* VK_KHR_shader_relaxed_extended_instruction */
676 features->shaderRelaxedExtendedInstruction = true;
677 }
678
679 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)680 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
681 struct vk_properties *p)
682 {
683 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
684 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
685 memset(p->deviceLUID, 0, VK_LUID_SIZE);
686 p->deviceNodeMask = 0;
687 p->deviceLUIDValid = false;
688
689 p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
690 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
691 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
692 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
693 VK_SUBGROUP_FEATURE_VOTE_BIT |
694 VK_SUBGROUP_FEATURE_BALLOT_BIT |
695 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
696 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
697 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
698 if (pdevice->info->a6xx.has_getfiberid) {
699 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
700 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
701 }
702
703 p->subgroupQuadOperationsInAllStages = false;
704
705 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
706 p->maxMultiviewViewCount =
707 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
708 p->maxMultiviewInstanceIndex = INT_MAX;
709 p->protectedNoFault = false;
710 /* Our largest descriptors are 2 texture descriptors, or a texture and
711 * sampler descriptor.
712 */
713 p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
714 /* Our buffer size fields allow only this much */
715 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
716
717 }
718
719
720 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
721 static const VkSampleCountFlags sample_counts =
722 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
723
724 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)725 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
726 struct vk_properties *p)
727 {
728 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
729 memset(p->driverName, 0, sizeof(p->driverName));
730 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
731 "turnip Mesa driver");
732 memset(p->driverInfo, 0, sizeof(p->driverInfo));
733 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
734 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
735 p->conformanceVersion = (VkConformanceVersion) {
736 .major = 1,
737 .minor = 2,
738 .subminor = 7,
739 .patch = 1,
740 };
741
742 p->denormBehaviorIndependence =
743 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
744 p->roundingModeIndependence =
745 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
746
747 p->shaderDenormFlushToZeroFloat16 = true;
748 p->shaderDenormPreserveFloat16 = false;
749 p->shaderRoundingModeRTEFloat16 = true;
750 p->shaderRoundingModeRTZFloat16 = false;
751 p->shaderSignedZeroInfNanPreserveFloat16 = true;
752
753 p->shaderDenormFlushToZeroFloat32 = true;
754 p->shaderDenormPreserveFloat32 = false;
755 p->shaderRoundingModeRTEFloat32 = true;
756 p->shaderRoundingModeRTZFloat32 = false;
757 p->shaderSignedZeroInfNanPreserveFloat32 = true;
758
759 p->shaderDenormFlushToZeroFloat64 = false;
760 p->shaderDenormPreserveFloat64 = false;
761 p->shaderRoundingModeRTEFloat64 = false;
762 p->shaderRoundingModeRTZFloat64 = false;
763 p->shaderSignedZeroInfNanPreserveFloat64 = false;
764
765 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
766 p->shaderSampledImageArrayNonUniformIndexingNative = true;
767 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
768 p->shaderStorageImageArrayNonUniformIndexingNative = true;
769 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
770 p->robustBufferAccessUpdateAfterBind = false;
771 p->quadDivergentImplicitLod = false;
772
773 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
774 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
775 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
776 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
777 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
778 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
779 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
780 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
781 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
782 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
783 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
784 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
785 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
786 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
787 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
788 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS;
789
790 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
791 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
792 p->independentResolveNone = false;
793 p->independentResolve = false;
794
795 p->filterMinmaxSingleComponentFormats = true;
796 p->filterMinmaxImageComponentMapping = true;
797
798 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
799
800 p->framebufferIntegerColorSampleCounts = sample_counts;
801 }
802
803 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)804 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
805 struct vk_properties *p)
806 {
807 p->minSubgroupSize = pdevice->info->threadsize_base;
808 p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
809 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
810 p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
811 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
812
813 p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
814 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
815 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
816 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
817 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
818 p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
819
820 p->integerDotProduct8BitUnsignedAccelerated = false;
821 p->integerDotProduct8BitSignedAccelerated = false;
822 p->integerDotProduct8BitMixedSignednessAccelerated = false;
823 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
824 pdevice->info->a6xx.has_dp2acc;
825 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
826 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
827 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
828 pdevice->info->a6xx.has_dp2acc;
829 p->integerDotProduct16BitUnsignedAccelerated = false;
830 p->integerDotProduct16BitSignedAccelerated = false;
831 p->integerDotProduct16BitMixedSignednessAccelerated = false;
832 p->integerDotProduct32BitUnsignedAccelerated = false;
833 p->integerDotProduct32BitSignedAccelerated = false;
834 p->integerDotProduct32BitMixedSignednessAccelerated = false;
835 p->integerDotProduct64BitUnsignedAccelerated = false;
836 p->integerDotProduct64BitSignedAccelerated = false;
837 p->integerDotProduct64BitMixedSignednessAccelerated = false;
838 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
839 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
840 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
841 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
842 pdevice->info->a6xx.has_dp2acc;
843 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
844 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
845 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
846 pdevice->info->a6xx.has_dp2acc;
847 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
848 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
849 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
850 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
851 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
852 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
853 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
854 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
855 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
856
857 p->storageTexelBufferOffsetAlignmentBytes = 64;
858 p->storageTexelBufferOffsetSingleTexelAlignment = true;
859 p->uniformTexelBufferOffsetAlignmentBytes = 64;
860 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
861
862 /* The address space is 4GB for current kernels, so there's no point
863 * allowing a larger buffer. Our buffer sizes are 64-bit though, so
864 * GetBufferDeviceRequirements won't fall over if someone actually creates
865 * a 4GB buffer.
866 */
867 p->maxBufferSize = 1ull << 32;
868 }
869
870 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)871 tu_get_properties(struct tu_physical_device *pdevice,
872 struct vk_properties *props)
873 {
874 /* Limits */
875 props->maxImageDimension1D = (1 << 14);
876 props->maxImageDimension2D = (1 << 14);
877 props->maxImageDimension3D = (1 << 11);
878 props->maxImageDimensionCube = (1 << 14);
879 props->maxImageArrayLayers = (1 << 11);
880 props->maxTexelBufferElements = 128 * 1024 * 1024;
881 props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
882 props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
883 props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
884 props->maxMemoryAllocationCount = UINT32_MAX;
885 props->maxSamplerAllocationCount = 64 * 1024;
886 props->bufferImageGranularity = 64; /* A cache line */
887 props->sparseAddressSpaceSize = 0;
888 props->maxBoundDescriptorSets = pdevice->usable_sets;
889 props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
890 props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
891 props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
892 props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
893 props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
894 props->maxPerStageDescriptorInputAttachments = MAX_RTS;
895 props->maxPerStageResources = max_descriptor_set_size;
896 props->maxDescriptorSetSamplers = max_descriptor_set_size;
897 props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
898 props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
899 props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
900 props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
901 props->maxDescriptorSetSampledImages = max_descriptor_set_size;
902 props->maxDescriptorSetStorageImages = max_descriptor_set_size;
903 props->maxDescriptorSetInputAttachments = MAX_RTS;
904 props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
905 props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
906 props->maxVertexInputAttributeOffset = 4095;
907 props->maxVertexInputBindingStride = 2048;
908 props->maxVertexOutputComponents = 128;
909 props->maxTessellationGenerationLevel = 64;
910 props->maxTessellationPatchSize = 32;
911 props->maxTessellationControlPerVertexInputComponents = 128;
912 props->maxTessellationControlPerVertexOutputComponents = 128;
913 props->maxTessellationControlPerPatchOutputComponents = 120;
914 props->maxTessellationControlTotalOutputComponents = 4096;
915 props->maxTessellationEvaluationInputComponents = 128;
916 props->maxTessellationEvaluationOutputComponents = 128;
917 props->maxGeometryShaderInvocations = 32;
918 props->maxGeometryInputComponents = 64;
919 props->maxGeometryOutputComponents = 128;
920 props->maxGeometryOutputVertices = 256;
921 props->maxGeometryTotalOutputComponents = 1024;
922 props->maxFragmentInputComponents = 124;
923 props->maxFragmentOutputAttachments = 8;
924 props->maxFragmentDualSrcAttachments = 1;
925 props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
926 props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
927 props->maxComputeWorkGroupCount[0] =
928 props->maxComputeWorkGroupCount[1] =
929 props->maxComputeWorkGroupCount[2] = 65535;
930 props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
931 pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
932 pdevice->info->threadsize_base * pdevice->info->max_waves;
933 props->maxComputeWorkGroupSize[0] =
934 props->maxComputeWorkGroupSize[1] =
935 props->maxComputeWorkGroupSize[2] = 1024;
936 props->subPixelPrecisionBits = 8;
937 props->subTexelPrecisionBits = 8;
938 props->mipmapPrecisionBits = 8;
939 props->maxDrawIndexedIndexValue = UINT32_MAX;
940 props->maxDrawIndirectCount = UINT32_MAX;
941 props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
942 props->maxSamplerAnisotropy = 16;
943 props->maxViewports =
944 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
945 props->maxViewportDimensions[0] =
946 props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
947 props->viewportBoundsRange[0] = INT16_MIN;
948 props->viewportBoundsRange[1] = INT16_MAX;
949 props->viewportSubPixelBits = 8;
950 props->minMemoryMapAlignment = 4096; /* A page */
951 props->minTexelBufferOffsetAlignment = 64;
952 props->minUniformBufferOffsetAlignment = 64;
953 props->minStorageBufferOffsetAlignment = 4;
954 props->minTexelOffset = -16;
955 props->maxTexelOffset = 15;
956 props->minTexelGatherOffset = -32;
957 props->maxTexelGatherOffset = 31;
958 props->minInterpolationOffset = -0.5;
959 props->maxInterpolationOffset = 0.4375;
960 props->subPixelInterpolationOffsetBits = 4;
961 props->maxFramebufferWidth = (1 << 14);
962 props->maxFramebufferHeight = (1 << 14);
963 props->maxFramebufferLayers = (1 << 10);
964 props->framebufferColorSampleCounts = sample_counts;
965 props->framebufferDepthSampleCounts = sample_counts;
966 props->framebufferStencilSampleCounts = sample_counts;
967 props->framebufferNoAttachmentsSampleCounts = sample_counts;
968 props->maxColorAttachments = MAX_RTS;
969 props->sampledImageColorSampleCounts = sample_counts;
970 props->sampledImageIntegerSampleCounts = sample_counts;
971 props->sampledImageDepthSampleCounts = sample_counts;
972 props->sampledImageStencilSampleCounts = sample_counts;
973 props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
974 props->maxSampleMaskWords = 1;
975 props->timestampComputeAndGraphics = true;
976 props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
977 props->maxClipDistances = 8;
978 props->maxCullDistances = 8;
979 props->maxCombinedClipAndCullDistances = 8;
980 props->discreteQueuePriorities = 2;
981 props->pointSizeRange[0] = 1;
982 props->pointSizeRange[1] = 4092;
983 props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
984 props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
985 props->pointSizeGranularity = 0.0625;
986 props->lineWidthGranularity =
987 pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
988 props->strictLines = true;
989 props->standardSampleLocations = true;
990 props->optimalBufferCopyOffsetAlignment = 128;
991 props->optimalBufferCopyRowPitchAlignment = 128;
992 props->nonCoherentAtomSize = 64;
993
994 props->apiVersion =
995 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
996 TU_API_VERSION : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
997 props->driverVersion = vk_get_driver_version();
998 props->vendorID = 0x5143;
999 props->deviceID = pdevice->dev_id.chip_id;
1000 props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1001
1002 /* sparse properties */
1003 props->sparseResidencyStandard2DBlockShape = { 0 };
1004 props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1005 props->sparseResidencyStandard3DBlockShape = { 0 };
1006 props->sparseResidencyAlignedMipSize = { 0 };
1007 props->sparseResidencyNonResidentStrict = { 0 };
1008
1009 strcpy(props->deviceName, pdevice->name);
1010 memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1011
1012 tu_get_physical_device_properties_1_1(pdevice, props);
1013 tu_get_physical_device_properties_1_2(pdevice, props);
1014 tu_get_physical_device_properties_1_3(pdevice, props);
1015
1016 /* VK_KHR_push_descriptor */
1017 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1018
1019 /* VK_EXT_transform_feedback */
1020 props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1021 props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1022 props->maxTransformFeedbackBufferSize = UINT32_MAX;
1023 props->maxTransformFeedbackStreamDataSize = 512;
1024 props->maxTransformFeedbackBufferDataSize = 512;
1025 props->maxTransformFeedbackBufferDataStride = 512;
1026 props->transformFeedbackQueries = true;
1027 props->transformFeedbackStreamsLinesTriangles = true;
1028 props->transformFeedbackRasterizationStreamSelect = true;
1029 props->transformFeedbackDraw = true;
1030
1031 /* VK_EXT_sample_locations */
1032 props->sampleLocationSampleCounts =
1033 pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1034 props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1035 props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1036 props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1037 props->sampleLocationSubPixelBits = 4;
1038 props->variableSampleLocations = true;
1039
1040 /* VK_KHR_vertex_attribute_divisor */
1041 props->maxVertexAttribDivisor = UINT32_MAX;
1042 props->supportsNonZeroFirstInstance = true;
1043
1044 /* VK_EXT_custom_border_color */
1045 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1046
1047 /* VK_KHR_performance_query */
1048 props->allowCommandBufferQueryCopies = false;
1049
1050 /* VK_EXT_robustness2 */
1051 /* see write_buffer_descriptor() */
1052 props->robustStorageBufferAccessSizeAlignment = 4;
1053 /* see write_ubo_descriptor() */
1054 props->robustUniformBufferAccessSizeAlignment = 16;
1055
1056 /* VK_EXT_provoking_vertex */
1057 props->provokingVertexModePerPipeline = true;
1058 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1059
1060 /* VK_KHR_line_rasterization */
1061 props->lineSubPixelPrecisionBits = 8;
1062
1063 /* VK_EXT_physical_device_drm */
1064 props->drmHasPrimary = pdevice->has_master;
1065 props->drmPrimaryMajor = pdevice->master_major;
1066 props->drmPrimaryMinor = pdevice->master_minor;
1067
1068 props->drmHasRender = pdevice->has_local;
1069 props->drmRenderMajor = pdevice->local_major;
1070 props->drmRenderMinor = pdevice->local_minor;
1071
1072 /* VK_EXT_shader_module_identifier */
1073 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1074 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1075 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1076 vk_shaderModuleIdentifierAlgorithmUUID,
1077 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1078
1079 /* VK_EXT_map_memory_placed */
1080 os_get_page_size(&os_page_size);
1081 props->minPlacedMemoryMapAlignment = os_page_size;
1082
1083 /* VK_EXT_multi_draw */
1084 props->maxMultiDrawCount = 2048;
1085
1086 /* VK_EXT_nested_command_buffer */
1087 props->maxCommandBufferNestingLevel = UINT32_MAX;
1088
1089 /* VK_EXT_graphics_pipeline_library */
1090 props->graphicsPipelineLibraryFastLinking = true;
1091 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1092
1093 /* VK_EXT_extended_dynamic_state3 */
1094 props->dynamicPrimitiveTopologyUnrestricted = true;
1095
1096 /* VK_EXT_descriptor_buffer */
1097 props->combinedImageSamplerDescriptorSingleArray = true;
1098 props->bufferlessPushDescriptors = true;
1099 props->allowSamplerImageViewPostSubmitCreation = true;
1100 props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1101 props->maxDescriptorBufferBindings = pdevice->usable_sets;
1102 props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1103 props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1104 props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1105 props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1106 props->bufferCaptureReplayDescriptorDataSize = 0;
1107 props->imageCaptureReplayDescriptorDataSize = 0;
1108 props->imageViewCaptureReplayDescriptorDataSize = 0;
1109 props->samplerCaptureReplayDescriptorDataSize = 0;
1110 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1111 /* Note: these sizes must match descriptor_size() */
1112 props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1113 props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1114 props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1115 props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1116 props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1117 props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1118 props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1119 props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1120 props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1121 props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1122 props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1123 COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1124 COND(pdevice->info->a7xx.storage_8bit, 1));
1125 props->robustStorageBufferDescriptorSize =
1126 props->storageBufferDescriptorSize;
1127 props->inputAttachmentDescriptorSize = TU_DEBUG(DYNAMIC) ?
1128 A6XX_TEX_CONST_DWORDS * 4 : 0;
1129 props->maxSamplerDescriptorBufferRange = ~0ull;
1130 props->maxResourceDescriptorBufferRange = ~0ull;
1131 props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1132 props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1133 props->descriptorBufferAddressSpaceSize = ~0ull;
1134 props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1135
1136 /* VK_EXT_legacy_vertex_attributes */
1137 props->nativeUnalignedPerformance = true;
1138
1139 /* VK_EXT_fragment_density_map*/
1140 props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1141 props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1142 props->fragmentDensityInvocations = false;
1143
1144 /* VK_KHR_maintenance5 */
1145 props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1146 props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1147 props->depthStencilSwizzleOneSupport = true;
1148 props->polygonModePointSize = true;
1149 props->nonStrictWideLinesUseParallelogram = false;
1150 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1151
1152 /* VK_KHR_maintenance6 */
1153 props->blockTexelViewCompatibleMultipleLayers = true;
1154 props->maxCombinedImageSamplerDescriptorCount = 1;
1155 props->fragmentShadingRateClampCombinerInputs = false; /* TODO */
1156 }
1157
1158 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1159 &tu_shader_ops,
1160 &tu_nir_shaders_ops,
1161 NULL,
1162 };
1163
1164 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1165 tu_physical_device_init(struct tu_physical_device *device,
1166 struct tu_instance *instance)
1167 {
1168 VkResult result = VK_SUCCESS;
1169
1170 const char *fd_name = fd_dev_name(&device->dev_id);
1171 if (!fd_name) {
1172 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1173 "device (chip_id = %" PRIX64
1174 ", gpu_id = %u) is unsupported",
1175 device->dev_id.chip_id, device->dev_id.gpu_id);
1176 }
1177
1178 if (strncmp(fd_name, "FD", 2) == 0) {
1179 device->name = vk_asprintf(&instance->vk.alloc,
1180 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1181 "Turnip Adreno (TM) %s", &fd_name[2]);
1182 } else {
1183 device->name = vk_strdup(&instance->vk.alloc, fd_name,
1184 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1185
1186 }
1187 if (!device->name) {
1188 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1189 "device name alloc fail");
1190 }
1191
1192 const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1193 if (!info.chip) {
1194 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1195 "device %s is unsupported", device->name);
1196 goto fail_free_name;
1197 }
1198 switch (fd_dev_gen(&device->dev_id)) {
1199 case 6:
1200 case 7: {
1201 device->dev_info = info;
1202 device->info = &device->dev_info;
1203 uint32_t depth_cache_size =
1204 device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1205 uint32_t color_cache_size =
1206 (device->info->num_ccu *
1207 device->info->a6xx.sysmem_per_ccu_color_cache_size);
1208 uint32_t color_cache_size_gmem =
1209 color_cache_size /
1210 (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1211
1212 device->ccu_depth_offset_bypass = 0;
1213 device->ccu_offset_bypass =
1214 device->ccu_depth_offset_bypass + depth_cache_size;
1215
1216 if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1217 device->vpc_attr_buf_size_bypass =
1218 device->info->a7xx.sysmem_vpc_attr_buf_size;
1219 device->vpc_attr_buf_offset_bypass =
1220 device->ccu_offset_bypass + color_cache_size;
1221
1222 device->vpc_attr_buf_size_gmem =
1223 device->info->a7xx.gmem_vpc_attr_buf_size;
1224 device->vpc_attr_buf_offset_gmem =
1225 device->gmem_size -
1226 (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1227
1228 device->ccu_offset_gmem =
1229 device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1230
1231 device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1232 } else {
1233 device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1234 device->usable_gmem_size_gmem = device->gmem_size;
1235 }
1236
1237 if (instance->reserve_descriptor_set) {
1238 device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1239 } else {
1240 device->usable_sets = device->info->a6xx.max_sets;
1241 device->reserved_set_idx = -1;
1242 }
1243 break;
1244 }
1245 default:
1246 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1247 "device %s is unsupported", device->name);
1248 goto fail_free_name;
1249 }
1250 if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1251 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1252 "cannot generate UUID");
1253 goto fail_free_name;
1254 }
1255
1256 device->level1_dcache_size = tu_get_l1_dcache_size();
1257 device->has_cached_non_coherent_memory =
1258 device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1259
1260 device->memory.type_count = 1;
1261 device->memory.types[0] =
1262 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1263 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1264 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1265
1266 if (device->has_cached_coherent_memory) {
1267 device->memory.types[device->memory.type_count] =
1268 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1269 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1270 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1271 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1272 device->memory.type_count++;
1273 }
1274
1275 if (device->has_cached_non_coherent_memory) {
1276 device->memory.types[device->memory.type_count] =
1277 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1278 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1279 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1280 device->memory.type_count++;
1281 }
1282
1283 fd_get_driver_uuid(device->driver_uuid);
1284 fd_get_device_uuid(device->device_uuid, &device->dev_id);
1285
1286 struct vk_physical_device_dispatch_table dispatch_table;
1287 vk_physical_device_dispatch_table_from_entrypoints(
1288 &dispatch_table, &tu_physical_device_entrypoints, true);
1289 vk_physical_device_dispatch_table_from_entrypoints(
1290 &dispatch_table, &wsi_physical_device_entrypoints, false);
1291
1292 result = vk_physical_device_init(&device->vk, &instance->vk,
1293 NULL, NULL, NULL, /* We set up extensions later */
1294 &dispatch_table);
1295 if (result != VK_SUCCESS)
1296 goto fail_free_name;
1297
1298 get_device_extensions(device, &device->vk.supported_extensions);
1299 tu_get_features(device, &device->vk.supported_features);
1300 tu_get_properties(device, &device->vk.properties);
1301
1302 device->vk.supported_sync_types = device->sync_types;
1303
1304 #ifdef TU_USE_WSI_PLATFORM
1305 result = tu_wsi_init(device);
1306 if (result != VK_SUCCESS) {
1307 vk_startup_errorf(instance, result, "WSI init failure");
1308 vk_physical_device_finish(&device->vk);
1309 goto fail_free_name;
1310 }
1311 #endif
1312
1313 /* The gpu id is already embedded in the uuid so we just pass "tu"
1314 * when creating the cache.
1315 */
1316 char buf[VK_UUID_SIZE * 2 + 1];
1317 mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1318 device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1319
1320 device->vk.pipeline_cache_import_ops = cache_import_ops;
1321
1322 return VK_SUCCESS;
1323
1324 fail_free_name:
1325 vk_free(&instance->vk.alloc, (void *)device->name);
1326 return result;
1327 }
1328
1329 static void
tu_physical_device_finish(struct tu_physical_device * device)1330 tu_physical_device_finish(struct tu_physical_device *device)
1331 {
1332 #ifdef TU_USE_WSI_PLATFORM
1333 tu_wsi_finish(device);
1334 #endif
1335
1336 close(device->local_fd);
1337 if (device->master_fd != -1)
1338 close(device->master_fd);
1339
1340 if (device->kgsl_dma_fd != -1)
1341 close(device->kgsl_dma_fd);
1342
1343 disk_cache_destroy(device->vk.disk_cache);
1344 vk_free(&device->instance->vk.alloc, (void *)device->name);
1345
1346 vk_physical_device_finish(&device->vk);
1347 }
1348
1349 static void
tu_destroy_physical_device(struct vk_physical_device * device)1350 tu_destroy_physical_device(struct vk_physical_device *device)
1351 {
1352 tu_physical_device_finish((struct tu_physical_device *) device);
1353 vk_free(&device->instance->alloc, device);
1354 }
1355
1356 static const driOptionDescription tu_dri_options[] = {
1357 DRI_CONF_SECTION_PERFORMANCE
1358 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1359 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1360 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1361 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1362 DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1363 DRI_CONF_SECTION_END
1364
1365 DRI_CONF_SECTION_DEBUG
1366 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1367 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1368 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1369 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1370 DRI_CONF_SECTION_END
1371
1372 DRI_CONF_SECTION_MISCELLANEOUS
1373 DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1374 DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1375 DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1376 DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1377 DRI_CONF_SECTION_END
1378 };
1379
1380 static void
tu_init_dri_options(struct tu_instance * instance)1381 tu_init_dri_options(struct tu_instance *instance)
1382 {
1383 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1384 ARRAY_SIZE(tu_dri_options));
1385 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1386 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1387 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1388
1389 instance->dont_care_as_load =
1390 driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1391 instance->conservative_lrz =
1392 !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1393 instance->reserve_descriptor_set =
1394 !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1395 instance->allow_oob_indirect_ubo_loads =
1396 driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1397 instance->disable_d24s8_border_color_workaround =
1398 driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1399 }
1400
1401 static uint32_t instance_count = 0;
1402
1403 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1404 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1405 const VkAllocationCallbacks *pAllocator,
1406 VkInstance *pInstance)
1407 {
1408 struct tu_instance *instance;
1409 VkResult result;
1410
1411 tu_env_init();
1412
1413 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1414
1415 if (pAllocator == NULL)
1416 pAllocator = vk_default_allocator();
1417
1418 instance = (struct tu_instance *) vk_zalloc(
1419 pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1420
1421 if (!instance)
1422 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1423
1424 struct vk_instance_dispatch_table dispatch_table;
1425 vk_instance_dispatch_table_from_entrypoints(
1426 &dispatch_table, &tu_instance_entrypoints, true);
1427 vk_instance_dispatch_table_from_entrypoints(
1428 &dispatch_table, &wsi_instance_entrypoints, false);
1429
1430 result = vk_instance_init(&instance->vk,
1431 &tu_instance_extensions_supported,
1432 &dispatch_table,
1433 pCreateInfo, pAllocator);
1434 if (result != VK_SUCCESS) {
1435 vk_free(pAllocator, instance);
1436 return vk_error(NULL, result);
1437 }
1438
1439 instance->vk.physical_devices.try_create_for_drm =
1440 tu_physical_device_try_create;
1441 instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1442 instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1443
1444 instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1445 if (TU_DEBUG(STARTUP))
1446 mesa_logi("Created an instance");
1447
1448 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1449
1450 tu_init_dri_options(instance);
1451
1452 *pInstance = tu_instance_to_handle(instance);
1453
1454 #ifdef HAVE_PERFETTO
1455 tu_perfetto_init();
1456 #endif
1457
1458 util_gpuvis_init();
1459
1460 return VK_SUCCESS;
1461 }
1462
1463 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1464 tu_DestroyInstance(VkInstance _instance,
1465 const VkAllocationCallbacks *pAllocator)
1466 {
1467 VK_FROM_HANDLE(tu_instance, instance, _instance);
1468
1469 if (!instance)
1470 return;
1471
1472 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1473
1474 driDestroyOptionCache(&instance->dri_options);
1475 driDestroyOptionInfo(&instance->available_dri_options);
1476
1477 vk_instance_finish(&instance->vk);
1478 vk_free(&instance->vk.alloc, instance);
1479 }
1480
1481 static const VkQueueFamilyProperties tu_queue_family_properties = {
1482 .queueFlags =
1483 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1484 .queueCount = 1,
1485 .timestampValidBits = 48,
1486 .minImageTransferGranularity = { 1, 1, 1 },
1487 };
1488
1489 static void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1490 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1491 VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1492 {
1493 props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1494 switch (props->priorityCount) {
1495 case 1:
1496 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1497 break;
1498 case 2:
1499 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1500 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1501 break;
1502 case 3:
1503 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1504 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1505 props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1506 break;
1507 default:
1508 unreachable("unexpected priority count");
1509 break;
1510 }
1511 }
1512
1513 static int
tu_physical_device_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)1514 tu_physical_device_get_submitqueue_priority(const struct tu_physical_device *pdevice,
1515 VkQueueGlobalPriorityKHR global_priority,
1516 bool global_priority_query)
1517 {
1518 if (global_priority_query) {
1519 VkQueueFamilyGlobalPriorityPropertiesKHR props;
1520 tu_physical_device_get_global_priority_properties(pdevice, &props);
1521
1522 bool valid = false;
1523 for (uint32_t i = 0; i < props.priorityCount; i++) {
1524 if (props.priorities[i] == global_priority) {
1525 valid = true;
1526 break;
1527 }
1528 }
1529
1530 if (!valid)
1531 return -1;
1532 }
1533
1534 /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
1535 * with 0 being the highest priority. This matches what freedreno does.
1536 */
1537 int priority;
1538 if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1539 priority = pdevice->submitqueue_priority_count / 2;
1540 else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1541 priority = pdevice->submitqueue_priority_count - 1;
1542 else
1543 priority = 0;
1544
1545 return priority;
1546 }
1547
1548 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1549 tu_GetPhysicalDeviceQueueFamilyProperties2(
1550 VkPhysicalDevice physicalDevice,
1551 uint32_t *pQueueFamilyPropertyCount,
1552 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1553 {
1554 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1555
1556 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1557 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1558
1559 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1560 {
1561 p->queueFamilyProperties = tu_queue_family_properties;
1562
1563 vk_foreach_struct(ext, p->pNext) {
1564 switch (ext->sType) {
1565 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1566 VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1567 (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1568 tu_physical_device_get_global_priority_properties(pdevice, props);
1569 break;
1570 }
1571 default:
1572 break;
1573 }
1574 }
1575 }
1576 }
1577
1578 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1579 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1580 {
1581 uint64_t total_ram = 0;
1582 ASSERTED bool has_physical_memory =
1583 os_get_total_physical_memory(&total_ram);
1584 assert(has_physical_memory);
1585
1586 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1587 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1588 */
1589 uint64_t available_ram;
1590 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1591 available_ram = total_ram / 2;
1592 else
1593 available_ram = total_ram * 3 / 4;
1594
1595 if (physical_device->va_size)
1596 available_ram = MIN2(available_ram, physical_device->va_size);
1597
1598 return available_ram;
1599 }
1600
1601 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1602 tu_get_budget_memory(struct tu_physical_device *physical_device)
1603 {
1604 uint64_t heap_size = physical_device->heap.size;
1605 uint64_t heap_used = physical_device->heap.used;
1606 uint64_t sys_available;
1607 ASSERTED bool has_available_memory =
1608 os_get_available_system_memory(&sys_available);
1609 assert(has_available_memory);
1610
1611 if (physical_device->va_size)
1612 sys_available = MIN2(sys_available, physical_device->va_size);
1613
1614 /*
1615 * Let's not incite the app to starve the system: report at most 90% of
1616 * available system memory.
1617 */
1618 uint64_t heap_available = sys_available * 9 / 10;
1619 return MIN2(heap_size, heap_used + heap_available);
1620 }
1621
1622 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1623 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1624 VkPhysicalDeviceMemoryProperties2 *props2)
1625 {
1626 VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1627
1628 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1629 props->memoryHeapCount = 1;
1630 props->memoryHeaps[0].size = physical_device->heap.size;
1631 props->memoryHeaps[0].flags = physical_device->heap.flags;
1632
1633 props->memoryTypeCount = physical_device->memory.type_count;
1634 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1635 props->memoryTypes[i] = (VkMemoryType) {
1636 .propertyFlags = physical_device->memory.types[i],
1637 .heapIndex = 0,
1638 };
1639 }
1640
1641 vk_foreach_struct(ext, props2->pNext)
1642 {
1643 switch (ext->sType) {
1644 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1645 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1646 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1647 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1648 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1649
1650 /* The heapBudget and heapUsage values must be zero for array elements
1651 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1652 */
1653 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1654 memory_budget_props->heapBudget[i] = 0u;
1655 memory_budget_props->heapUsage[i] = 0u;
1656 }
1657 break;
1658 }
1659 default:
1660 break;
1661 }
1662 }
1663 }
1664
1665 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,bool global_priority_query)1666 tu_queue_init(struct tu_device *device,
1667 struct tu_queue *queue,
1668 int idx,
1669 const VkDeviceQueueCreateInfo *create_info,
1670 bool global_priority_query)
1671 {
1672 const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
1673 vk_find_struct_const(create_info->pNext,
1674 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
1675 const enum VkQueueGlobalPriorityKHR global_priority = priority_info ?
1676 priority_info->globalPriority : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1677
1678 const int priority = tu_physical_device_get_submitqueue_priority(
1679 device->physical_device, global_priority, global_priority_query);
1680 if (priority < 0) {
1681 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1682 "invalid global priority");
1683 }
1684
1685 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1686 if (result != VK_SUCCESS)
1687 return result;
1688
1689 queue->device = device;
1690 queue->priority = priority;
1691 queue->vk.driver_submit = tu_queue_submit;
1692
1693 int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1694 if (ret)
1695 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1696 "submitqueue create failed");
1697
1698 queue->fence = -1;
1699
1700 return VK_SUCCESS;
1701 }
1702
1703 static void
tu_queue_finish(struct tu_queue * queue)1704 tu_queue_finish(struct tu_queue *queue)
1705 {
1706 vk_queue_finish(&queue->vk);
1707 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1708 }
1709
1710 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1711 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1712 {
1713 /* This is based on the 19.2MHz always-on rbbm timer.
1714 *
1715 * TODO we should probably query this value from kernel..
1716 */
1717 return ts * (1000000000 / 19200000);
1718 }
1719
1720 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1721 tu_device_get_u_trace(struct tu_device *device)
1722 {
1723 return &device->trace_context;
1724 }
1725
1726 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1727 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1728 {
1729 struct tu_device *device =
1730 container_of(utctx, struct tu_device, trace_context);
1731
1732 struct tu_bo *bo;
1733 tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1734 tu_bo_map(device, bo, NULL);
1735
1736 return bo;
1737 }
1738
1739 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1740 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1741 {
1742 struct tu_device *device =
1743 container_of(utctx, struct tu_device, trace_context);
1744 struct tu_bo *bo = (struct tu_bo *) timestamps;
1745
1746 tu_bo_finish(device, bo);
1747 }
1748
1749 template <chip CHIP>
1750 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1751 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1752 uint64_t offset_B, uint32_t)
1753 {
1754 struct tu_bo *bo = (struct tu_bo *) timestamps;
1755 struct tu_cs *ts_cs = (struct tu_cs *) cs;
1756
1757 if (CHIP == A6XX) {
1758 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1759 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1760 CP_EVENT_WRITE_0_TIMESTAMP);
1761 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1762 tu_cs_emit(ts_cs, 0x00000000);
1763 } else {
1764 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1765 tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1766 .write_src = EV_WRITE_ALWAYSON,
1767 .write_dst = EV_DST_RAM,
1768 .write_enabled = true)
1769 .value);
1770 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1771 }
1772 }
1773
1774 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1775 tu_trace_read_ts(struct u_trace_context *utctx,
1776 void *timestamps, uint64_t offset_B, void *flush_data)
1777 {
1778 struct tu_device *device =
1779 container_of(utctx, struct tu_device, trace_context);
1780 struct tu_bo *bo = (struct tu_bo *) timestamps;
1781 struct tu_u_trace_submission_data *submission_data =
1782 (struct tu_u_trace_submission_data *) flush_data;
1783
1784 /* Only need to stall on results for the first entry: */
1785 if (offset_B == 0) {
1786 tu_device_wait_u_trace(device, submission_data->syncobj);
1787 }
1788
1789 if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1790 return U_TRACE_NO_TIMESTAMP;
1791 }
1792
1793 uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1794
1795 /* Don't translate the no-timestamp marker: */
1796 if (*ts == U_TRACE_NO_TIMESTAMP)
1797 return U_TRACE_NO_TIMESTAMP;
1798
1799 return tu_device_ticks_to_ns(device, *ts);
1800 }
1801
1802 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1803 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1804 {
1805 struct tu_device *device =
1806 container_of(utctx, struct tu_device, trace_context);
1807 struct tu_u_trace_submission_data *submission_data =
1808 (struct tu_u_trace_submission_data *) flush_data;
1809
1810 tu_u_trace_submission_data_finish(device, submission_data);
1811 }
1812
1813 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1814 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1815 void *ts_from, uint64_t from_offset_B,
1816 void *ts_to, uint64_t to_offset_B,
1817 uint64_t size_B)
1818 {
1819 struct tu_cs *cs = (struct tu_cs *) cmdstream;
1820 struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1821 struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1822
1823 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1824 tu_cs_emit(cs, size_B / sizeof(uint32_t));
1825 tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
1826 tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
1827 }
1828
1829 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)1830 tu_trace_capture_data(struct u_trace *ut,
1831 void *cs,
1832 void *dst_buffer,
1833 uint64_t dst_offset_B,
1834 void *src_buffer,
1835 uint64_t src_offset_B,
1836 uint32_t size_B)
1837 {
1838 if (src_buffer)
1839 tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
1840 dst_offset_B, size_B);
1841 }
1842
1843 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)1844 tu_trace_get_data(struct u_trace_context *utctx,
1845 void *buffer,
1846 uint64_t offset_B,
1847 uint32_t size_B)
1848 {
1849 struct tu_bo *bo = (struct tu_bo *) buffer;
1850 return (char *) bo->map + offset_B;
1851 }
1852
1853 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1854 * that ignore tracepoints at the beginning/end that are part of a
1855 * suspend/resume chain.
1856 */
1857 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1858 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1859 {
1860 switch (cmdbuf->state.suspend_resume) {
1861 case SR_IN_PRE_CHAIN:
1862 return cmdbuf->trace_renderpass_end;
1863 case SR_AFTER_PRE_CHAIN:
1864 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1865 return cmdbuf->pre_chain.trace_renderpass_end;
1866 default:
1867 return u_trace_begin_iterator(&cmdbuf->trace);
1868 }
1869 }
1870
1871 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1872 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1873 {
1874 switch (cmdbuf->state.suspend_resume) {
1875 case SR_IN_PRE_CHAIN:
1876 return cmdbuf->trace_renderpass_end;
1877 case SR_IN_CHAIN:
1878 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1879 return cmdbuf->trace_renderpass_start;
1880 default:
1881 return u_trace_end_iterator(&cmdbuf->trace);
1882 }
1883 }
1884 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1885 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1886 struct u_trace **trace_copy)
1887 {
1888 *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
1889 sizeof(struct tu_cs), 8,
1890 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1891
1892 if (*cs == NULL) {
1893 return VK_ERROR_OUT_OF_HOST_MEMORY;
1894 }
1895
1896 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1897 list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
1898
1899 tu_cs_begin(*cs);
1900
1901 tu_cs_emit_wfi(*cs);
1902 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1903
1904 *trace_copy = (struct u_trace *) vk_zalloc(
1905 &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1906 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1907
1908 if (*trace_copy == NULL) {
1909 return VK_ERROR_OUT_OF_HOST_MEMORY;
1910 }
1911
1912 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1913 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1914 tu_cmd_end_iterator(cmdbuf),
1915 *trace_copy, *cs,
1916 tu_copy_buffer);
1917
1918 tu_cs_emit_wfi(*cs);
1919
1920 tu_cs_end(*cs);
1921
1922 return VK_SUCCESS;
1923 }
1924
1925 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1926 tu_u_trace_submission_data_create(
1927 struct tu_device *device,
1928 struct tu_cmd_buffer **cmd_buffers,
1929 uint32_t cmd_buffer_count,
1930 struct tu_u_trace_submission_data **submission_data)
1931 {
1932 *submission_data = (struct tu_u_trace_submission_data *)
1933 vk_zalloc(&device->vk.alloc,
1934 sizeof(struct tu_u_trace_submission_data), 8,
1935 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1936
1937 if (!(*submission_data)) {
1938 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1939 }
1940
1941 struct tu_u_trace_submission_data *data = *submission_data;
1942
1943 data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
1944 &device->vk.alloc,
1945 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1946 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1947
1948 if (!data->cmd_trace_data) {
1949 goto fail;
1950 }
1951
1952 data->cmd_buffer_count = cmd_buffer_count;
1953 data->last_buffer_with_tracepoints = -1;
1954
1955 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1956 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1957
1958 if (!u_trace_has_points(&cmdbuf->trace))
1959 continue;
1960
1961 data->last_buffer_with_tracepoints = i;
1962
1963 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1964 /* A single command buffer could be submitted several times, but we
1965 * already baked timestamp iova addresses and trace points are
1966 * single-use. Therefor we have to copy trace points and create
1967 * a new timestamp buffer on every submit of reusable command buffer.
1968 */
1969 if (tu_create_copy_timestamp_cs(cmdbuf,
1970 &data->cmd_trace_data[i].timestamp_copy_cs,
1971 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1972 goto fail;
1973 }
1974
1975 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1976 } else {
1977 data->cmd_trace_data[i].trace = &cmdbuf->trace;
1978 }
1979 }
1980
1981 assert(data->last_buffer_with_tracepoints != -1);
1982
1983 return VK_SUCCESS;
1984
1985 fail:
1986 tu_u_trace_submission_data_finish(device, data);
1987 *submission_data = NULL;
1988
1989 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1990 }
1991
1992 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1993 tu_u_trace_submission_data_finish(
1994 struct tu_device *device,
1995 struct tu_u_trace_submission_data *submission_data)
1996 {
1997 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1998 /* Only if we had to create a copy of trace we should free it */
1999 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2000 if (cmd_data->timestamp_copy_cs) {
2001 tu_cs_finish(cmd_data->timestamp_copy_cs);
2002 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2003
2004 u_trace_fini(cmd_data->trace);
2005 vk_free(&device->vk.alloc, cmd_data->trace);
2006 }
2007 }
2008
2009 if (submission_data->kgsl_timestamp_bo.bo) {
2010 mtx_lock(&device->kgsl_profiling_mutex);
2011 tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2012 &submission_data->kgsl_timestamp_bo);
2013 mtx_unlock(&device->kgsl_profiling_mutex);
2014 }
2015
2016 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2017 vk_free(&device->vk.alloc, submission_data->syncobj);
2018 vk_free(&device->vk.alloc, submission_data);
2019 }
2020
2021 enum tu_reg_stomper_flags
2022 {
2023 TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2024 TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2025 TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2026 };
2027
2028 /* See freedreno.rst for usage tips */
2029 static const struct debug_named_value tu_reg_stomper_options[] = {
2030 { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2031 "By default the range specifies the regs to stomp, with 'inverse' it "
2032 "specifies the regs NOT to stomp" },
2033 { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2034 "Stomp regs at the start of a cmdbuf" },
2035 { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2036 "Stomp regs before a renderpass" },
2037 { NULL, 0 }
2038 };
2039
2040 template <chip CHIP>
2041 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2042 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2043 bool is_rp_blit,
2044 uint32_t first_reg,
2045 uint32_t last_reg,
2046 bool inverse)
2047 {
2048 const uint16_t *regs = NULL;
2049 size_t count = 0;
2050
2051 if (is_rp_blit) {
2052 regs = &RP_BLIT_REGS<CHIP>[0];
2053 count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2054 } else {
2055 regs = &CMD_REGS<CHIP>[0];
2056 count = ARRAY_SIZE(CMD_REGS<CHIP>);
2057 }
2058
2059 for (size_t i = 0; i < count; i++) {
2060 if (inverse) {
2061 if (regs[i] >= first_reg && regs[i] <= last_reg)
2062 continue;
2063 } else {
2064 if (regs[i] < first_reg || regs[i] > last_reg)
2065 continue;
2066 }
2067
2068 if (fd_reg_stomp_allowed(CHIP, regs[i]))
2069 tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2070 }
2071 }
2072
2073 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2074 tu_init_dbg_reg_stomper(struct tu_device *device)
2075 {
2076 const char *stale_reg_range_str =
2077 os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2078 if (!stale_reg_range_str)
2079 return;
2080
2081 uint32_t first_reg, last_reg;
2082
2083 if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2084 mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2085 return;
2086 }
2087
2088 uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2089 tu_reg_stomper_options,
2090 TU_DEBUG_REG_STOMP_CMDBUF);
2091
2092 struct tu_cs *cmdbuf_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2093 tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2094 "cmdbuf reg stomp cs");
2095 tu_cs_begin(cmdbuf_cs);
2096
2097 struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2098 tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2099 tu_cs_begin(rp_cs);
2100
2101 bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2102 TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2103 TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2104
2105 tu_cs_end(cmdbuf_cs);
2106 tu_cs_end(rp_cs);
2107
2108 device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2109 device->dbg_renderpass_stomp_cs = rp_cs;
2110 }
2111
2112 /* It is unknown what this workaround is for and what it fixes. */
2113 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2114 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2115 {
2116 struct tu_cs *cs;
2117
2118 if (!(device->cmdbuf_start_a725_quirk_cs =
2119 (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2120 return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2121 "OOM");
2122 }
2123
2124 if (!(device->cmdbuf_start_a725_quirk_entry =
2125 (struct tu_cs_entry *) calloc(1, sizeof(struct tu_cs_entry)))) {
2126 free(device->cmdbuf_start_a725_quirk_cs);
2127 return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2128 "OOM");
2129 }
2130
2131 cs = device->cmdbuf_start_a725_quirk_cs;
2132 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 57, "a725 workaround cs");
2133
2134 struct tu_cs shader_cs;
2135 tu_cs_begin_sub_stream(cs, 10, &shader_cs);
2136
2137 uint32_t raw_shader[] = {
2138 0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2139 0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2140 0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2141 0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2142 0x00000000, 0x03000000, // end
2143 };
2144
2145 tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2146 struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(cs, &shader_cs);
2147 uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2148
2149 struct tu_cs sub_cs;
2150 tu_cs_begin_sub_stream(cs, 47, &sub_cs);
2151
2152 tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2153 .vs_state = true, .hs_state = true, .ds_state = true,
2154 .gs_state = true, .fs_state = true, .gfx_ibo = true,
2155 .cs_bindless = 0xff, .gfx_bindless = 0xff));
2156 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2157 .constlen = 4,
2158 .enabled = true));
2159 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2160 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2161 .threadmode = MULTI,
2162 .threadsize = THREAD128,
2163 .mergedregs = true));
2164 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2165 tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2166 HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2167 HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2168 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2169 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2170 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2171 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2172 .linearlocalidregid = regid(63, 0),
2173 .threadsize = THREAD128,
2174 .workgrouprastorderzfirsten = true,
2175 .wgtilewidth = 4,
2176 .wgtileheight = 17));
2177 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2178 .wgidconstid = regid(51, 3),
2179 .wgsizeconstid = regid(48, 0),
2180 .wgoffsetconstid = regid(63, 0),
2181 .localidregid = regid(63, 0)));
2182 tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2183 .linearlocalidregid = regid(63, 0),
2184 .threadsize = THREAD128,
2185 .workitemrastorder = WORKITEMRASTORDER_TILED));
2186 tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2187
2188 tu_cs_emit_regs(&sub_cs,
2189 HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2190 .localsizex = 255,
2191 .localsizey = 1,
2192 .localsizez = 1),
2193 HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2194 HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2195 HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2196 HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2197 HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2198 HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2199 tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2200 .localsizex = 255,
2201 .localsizey = 0,
2202 .localsizez = 0));
2203 tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2204 tu_cs_emit(&sub_cs, 0);
2205 tu_cs_emit_qw(&sub_cs, shader_iova);
2206
2207 tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2208 tu_cs_emit(&sub_cs, 0x00000000);
2209 tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2210 tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2211 tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2212
2213 *device->cmdbuf_start_a725_quirk_entry = tu_cs_end_sub_stream(cs, &sub_cs);
2214
2215 return VK_SUCCESS;
2216 }
2217
2218 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2219 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2220 const VkDeviceCreateInfo *pCreateInfo,
2221 const VkAllocationCallbacks *pAllocator,
2222 VkDevice *pDevice)
2223 {
2224 VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2225 VkResult result;
2226 struct tu_device *device;
2227 bool border_color_without_format = false;
2228
2229 vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2230 switch (ext->sType) {
2231 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2232 border_color_without_format =
2233 ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2234 ->customBorderColorWithoutFormat;
2235 break;
2236 default:
2237 break;
2238 }
2239 }
2240
2241 device = (struct tu_device *) vk_zalloc2(
2242 &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2243 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2244 if (!device)
2245 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2246
2247 struct vk_device_dispatch_table dispatch_table;
2248 bool override_initial_entrypoints = true;
2249
2250 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2251 vk_device_dispatch_table_from_entrypoints(
2252 &dispatch_table, &tu_rmv_device_entrypoints, true);
2253 override_initial_entrypoints = false;
2254 }
2255
2256 vk_device_dispatch_table_from_entrypoints(
2257 &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2258
2259 switch (fd_dev_gen(&physical_device->dev_id)) {
2260 case 6:
2261 vk_device_dispatch_table_from_entrypoints(
2262 &dispatch_table, &tu_device_entrypoints_a6xx, false);
2263 break;
2264 case 7:
2265 vk_device_dispatch_table_from_entrypoints(
2266 &dispatch_table, &tu_device_entrypoints_a7xx, false);
2267 }
2268
2269 vk_device_dispatch_table_from_entrypoints(
2270 &dispatch_table, &wsi_device_entrypoints, false);
2271
2272 const struct vk_device_entrypoint_table *knl_device_entrypoints =
2273 physical_device->instance->knl->device_entrypoints;
2274 if (knl_device_entrypoints) {
2275 vk_device_dispatch_table_from_entrypoints(
2276 &dispatch_table, knl_device_entrypoints, false);
2277 }
2278
2279 result = vk_device_init(&device->vk, &physical_device->vk,
2280 &dispatch_table, pCreateInfo, pAllocator);
2281 if (result != VK_SUCCESS) {
2282 vk_free(&device->vk.alloc, device);
2283 return vk_startup_errorf(physical_device->instance, result,
2284 "vk_device_init failed");
2285 }
2286
2287 device->instance = physical_device->instance;
2288 device->physical_device = physical_device;
2289 device->device_idx = device->physical_device->device_count++;
2290
2291 result = tu_drm_device_init(device);
2292 if (result != VK_SUCCESS) {
2293 vk_free(&device->vk.alloc, device);
2294 return result;
2295 }
2296
2297 device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2298 device->vk.check_status = tu_device_check_status;
2299
2300 mtx_init(&device->bo_mutex, mtx_plain);
2301 mtx_init(&device->pipeline_mutex, mtx_plain);
2302 mtx_init(&device->autotune_mutex, mtx_plain);
2303 mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2304 u_rwlock_init(&device->dma_bo_lock);
2305 pthread_mutex_init(&device->submit_mutex, NULL);
2306
2307 if (physical_device->has_set_iova) {
2308 mtx_init(&device->vma_mutex, mtx_plain);
2309 util_vma_heap_init(&device->vma, physical_device->va_start,
2310 ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2311 }
2312
2313 if (TU_DEBUG(BOS))
2314 device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2315
2316 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2317 tu_memory_trace_init(device);
2318
2319 /* kgsl is not a drm device: */
2320 if (!is_kgsl(physical_device->instance))
2321 vk_device_set_drm_fd(&device->vk, device->fd);
2322
2323 struct tu6_global *global = NULL;
2324 uint32_t global_size = sizeof(struct tu6_global);
2325 struct vk_pipeline_cache_create_info pcc_info = { };
2326
2327 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2328 const VkDeviceQueueCreateInfo *queue_create =
2329 &pCreateInfo->pQueueCreateInfos[i];
2330 uint32_t qfi = queue_create->queueFamilyIndex;
2331 device->queues[qfi] = (struct tu_queue *) vk_alloc(
2332 &device->vk.alloc,
2333 queue_create->queueCount * sizeof(struct tu_queue), 8,
2334 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2335 if (!device->queues[qfi]) {
2336 result = vk_startup_errorf(physical_device->instance,
2337 VK_ERROR_OUT_OF_HOST_MEMORY,
2338 "OOM");
2339 goto fail_queues;
2340 }
2341
2342 memset(device->queues[qfi], 0,
2343 queue_create->queueCount * sizeof(struct tu_queue));
2344
2345 device->queue_count[qfi] = queue_create->queueCount;
2346
2347 for (unsigned q = 0; q < queue_create->queueCount; q++) {
2348 result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create,
2349 device->vk.enabled_features.globalPriorityQuery);
2350 if (result != VK_SUCCESS) {
2351 device->queue_count[qfi] = q;
2352 goto fail_queues;
2353 }
2354 }
2355 }
2356
2357 {
2358 struct ir3_compiler_options ir3_options = {
2359 .robust_buffer_access2 = device->vk.enabled_features.robustBufferAccess2,
2360 .push_ubo_with_preamble = true,
2361 .disable_cache = true,
2362 .bindless_fb_read_descriptor = -1,
2363 .bindless_fb_read_slot = -1,
2364 .storage_16bit = physical_device->info->a6xx.storage_16bit,
2365 .storage_8bit = physical_device->info->a7xx.storage_8bit,
2366 .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2367 };
2368 device->compiler = ir3_compiler_create(
2369 NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2370 }
2371 if (!device->compiler) {
2372 result = vk_startup_errorf(physical_device->instance,
2373 VK_ERROR_INITIALIZATION_FAILED,
2374 "failed to initialize ir3 compiler");
2375 goto fail_queues;
2376 }
2377
2378 /* Initialize sparse array for refcounting imported BOs */
2379 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2380
2381 if (physical_device->has_set_iova) {
2382 STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2383 if (!u_vector_init(&device->zombie_vmas, 64,
2384 sizeof(struct tu_zombie_vma))) {
2385 result = vk_startup_errorf(physical_device->instance,
2386 VK_ERROR_INITIALIZATION_FAILED,
2387 "zombie_vmas create failed");
2388 goto fail_free_zombie_vma;
2389 }
2390 }
2391
2392 /* initial sizes, these will increase if there is overflow */
2393 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2394 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2395
2396 if (device->vk.enabled_features.customBorderColors)
2397 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2398
2399 tu_bo_suballocator_init(
2400 &device->pipeline_suballoc, device, 128 * 1024,
2401 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2402 TU_BO_ALLOC_ALLOW_DUMP |
2403 TU_BO_ALLOC_INTERNAL_RESOURCE),
2404 "pipeline_suballoc");
2405 tu_bo_suballocator_init(&device->autotune_suballoc, device,
2406 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2407 "autotune_suballoc");
2408 if (is_kgsl(physical_device->instance)) {
2409 tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2410 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2411 "kgsl_profiling_suballoc");
2412 }
2413
2414 result = tu_bo_init_new(
2415 device, NULL, &device->global_bo, global_size,
2416 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2417 TU_BO_ALLOC_INTERNAL_RESOURCE),
2418 "global");
2419 if (result != VK_SUCCESS) {
2420 vk_startup_errorf(device->instance, result, "BO init");
2421 goto fail_global_bo;
2422 }
2423
2424 result = tu_bo_map(device, device->global_bo, NULL);
2425 if (result != VK_SUCCESS) {
2426 vk_startup_errorf(device->instance, result, "BO map");
2427 goto fail_global_bo_map;
2428 }
2429
2430 global = (struct tu6_global *)device->global_bo->map;
2431 device->global_bo_map = global;
2432 tu_init_clear_blit_shaders(device);
2433
2434 result = tu_init_empty_shaders(device);
2435 if (result != VK_SUCCESS) {
2436 vk_startup_errorf(device->instance, result, "empty shaders");
2437 goto fail_empty_shaders;
2438 }
2439
2440 global->predicate = 0;
2441 global->vtx_stats_query_not_running = 1;
2442 global->dbg_one = (uint32_t)-1;
2443 global->dbg_gmem_total_loads = 0;
2444 global->dbg_gmem_taken_loads = 0;
2445 global->dbg_gmem_total_stores = 0;
2446 global->dbg_gmem_taken_stores = 0;
2447 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2448 VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2449 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2450 vk_border_color_is_int((VkBorderColor) i));
2451 }
2452
2453 /* initialize to ones so ffs can be used to find unused slots */
2454 BITSET_ONES(device->custom_border_color);
2455
2456 result = tu_init_dynamic_rendering(device);
2457 if (result != VK_SUCCESS) {
2458 vk_startup_errorf(device->instance, result, "dynamic rendering");
2459 goto fail_dynamic_rendering;
2460 }
2461
2462 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2463 NULL);
2464 if (!device->mem_cache) {
2465 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2466 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2467 goto fail_pipeline_cache;
2468 }
2469
2470 if (device->vk.enabled_features.performanceCounterQueryPools) {
2471 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2472 * from 0 to 31. One of these will be picked up at cmd submit time
2473 * when the perf query is executed.
2474 */
2475 struct tu_cs *cs;
2476
2477 if (!(device->perfcntrs_pass_cs =
2478 (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2479 result = vk_startup_errorf(device->instance,
2480 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2481 goto fail_perfcntrs_pass_alloc;
2482 }
2483
2484 device->perfcntrs_pass_cs_entries =
2485 (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2486 if (!device->perfcntrs_pass_cs_entries) {
2487 result = vk_startup_errorf(device->instance,
2488 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2489 goto fail_perfcntrs_pass_entries_alloc;
2490 }
2491
2492 cs = device->perfcntrs_pass_cs;
2493 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96, "perfcntrs cs");
2494
2495 for (unsigned i = 0; i < 32; i++) {
2496 struct tu_cs sub_cs;
2497
2498 result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
2499 if (result != VK_SUCCESS) {
2500 vk_startup_errorf(device->instance, result,
2501 "failed to allocate commands streams");
2502 goto fail_prepare_perfcntrs_pass_cs;
2503 }
2504
2505 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2506 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2507
2508 device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
2509 }
2510 }
2511
2512 if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2513 result = tu_init_cmdbuf_start_a725_quirk(device);
2514 if (result != VK_SUCCESS)
2515 goto fail_a725_workaround;
2516 }
2517
2518 tu_init_dbg_reg_stomper(device);
2519
2520 /* Initialize a condition variable for timeline semaphore */
2521 pthread_condattr_t condattr;
2522 if (pthread_condattr_init(&condattr) != 0) {
2523 result = vk_startup_errorf(physical_device->instance,
2524 VK_ERROR_INITIALIZATION_FAILED,
2525 "pthread condattr init");
2526 goto fail_timeline_cond;
2527 }
2528 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2529 pthread_condattr_destroy(&condattr);
2530 result = vk_startup_errorf(physical_device->instance,
2531 VK_ERROR_INITIALIZATION_FAILED,
2532 "pthread condattr clock setup");
2533 goto fail_timeline_cond;
2534 }
2535 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2536 pthread_condattr_destroy(&condattr);
2537 result = vk_startup_errorf(physical_device->instance,
2538 VK_ERROR_INITIALIZATION_FAILED,
2539 "pthread cond init");
2540 goto fail_timeline_cond;
2541 }
2542 pthread_condattr_destroy(&condattr);
2543
2544 result = tu_autotune_init(&device->autotune, device);
2545 if (result != VK_SUCCESS) {
2546 goto fail_timeline_cond;
2547 }
2548
2549 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2550 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2551
2552 mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2553 mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2554
2555 mtx_init(&device->mutex, mtx_plain);
2556
2557 device->use_z24uint_s8uint =
2558 physical_device->info->a6xx.has_z24uint_s8uint &&
2559 (!border_color_without_format ||
2560 physical_device->instance->disable_d24s8_border_color_workaround);
2561 device->use_lrz = !TU_DEBUG(NOLRZ);
2562
2563 tu_gpu_tracepoint_config_variable();
2564
2565 device->submit_count = 0;
2566 u_trace_context_init(&device->trace_context, device,
2567 sizeof(uint64_t),
2568 12,
2569 tu_trace_create_buffer,
2570 tu_trace_destroy_buffer,
2571 TU_CALLX(device, tu_trace_record_ts),
2572 tu_trace_read_ts,
2573 tu_trace_capture_data,
2574 tu_trace_get_data,
2575 tu_trace_delete_flush_data);
2576
2577 tu_breadcrumbs_init(device);
2578
2579 if (FD_RD_DUMP(ENABLE)) {
2580 struct vk_app_info *app_info = &device->instance->vk.app_info;
2581 const char *app_name_str = app_info->app_name ?
2582 app_info->app_name : util_get_process_name();
2583 const char *engine_name_str = app_info->engine_name ?
2584 app_info->engine_name : "unknown-engine";
2585
2586 char app_name[64];
2587 snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2588
2589 char engine_name[32];
2590 snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2591
2592 char output_name[128];
2593 snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2594 app_name, engine_name, device->instance->instance_idx,
2595 device->device_idx);
2596
2597 fd_rd_output_init(&device->rd_output, output_name);
2598 }
2599
2600 *pDevice = tu_device_to_handle(device);
2601 return VK_SUCCESS;
2602
2603 fail_timeline_cond:
2604 if (device->cmdbuf_start_a725_quirk_entry) {
2605 free(device->cmdbuf_start_a725_quirk_entry);
2606 tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2607 free(device->cmdbuf_start_a725_quirk_cs);
2608 }
2609 fail_a725_workaround:
2610 fail_prepare_perfcntrs_pass_cs:
2611 free(device->perfcntrs_pass_cs_entries);
2612 tu_cs_finish(device->perfcntrs_pass_cs);
2613 fail_perfcntrs_pass_entries_alloc:
2614 free(device->perfcntrs_pass_cs);
2615 fail_perfcntrs_pass_alloc:
2616 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2617 fail_pipeline_cache:
2618 tu_destroy_dynamic_rendering(device);
2619 fail_dynamic_rendering:
2620 tu_destroy_empty_shaders(device);
2621 fail_empty_shaders:
2622 tu_destroy_clear_blit_shaders(device);
2623 fail_global_bo_map:
2624 TU_RMV(resource_destroy, device, device->global_bo);
2625 tu_bo_finish(device, device->global_bo);
2626 vk_free(&device->vk.alloc, device->bo_list);
2627 fail_global_bo:
2628 ir3_compiler_destroy(device->compiler);
2629 util_sparse_array_finish(&device->bo_map);
2630 if (physical_device->has_set_iova)
2631 util_vma_heap_finish(&device->vma);
2632 fail_free_zombie_vma:
2633 u_vector_finish(&device->zombie_vmas);
2634 fail_queues:
2635 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2636 for (unsigned q = 0; q < device->queue_count[i]; q++)
2637 tu_queue_finish(&device->queues[i][q]);
2638 if (device->queues[i])
2639 vk_free(&device->vk.alloc, device->queues[i]);
2640 }
2641
2642 u_rwlock_destroy(&device->dma_bo_lock);
2643 tu_drm_device_finish(device);
2644 vk_device_finish(&device->vk);
2645 vk_free(&device->vk.alloc, device);
2646 return result;
2647 }
2648
2649 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2650 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2651 {
2652 VK_FROM_HANDLE(tu_device, device, _device);
2653
2654 if (!device)
2655 return;
2656
2657 tu_memory_trace_finish(device);
2658
2659 if (FD_RD_DUMP(ENABLE))
2660 fd_rd_output_fini(&device->rd_output);
2661
2662 tu_breadcrumbs_finish(device);
2663
2664 u_trace_context_fini(&device->trace_context);
2665
2666 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2667 if (device->scratch_bos[i].initialized)
2668 tu_bo_finish(device, device->scratch_bos[i].bo);
2669 }
2670
2671 if (device->fiber_pvtmem_bo.bo)
2672 tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2673
2674 if (device->wave_pvtmem_bo.bo)
2675 tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2676
2677 tu_destroy_clear_blit_shaders(device);
2678
2679 tu_destroy_empty_shaders(device);
2680
2681 tu_destroy_dynamic_rendering(device);
2682
2683 ir3_compiler_destroy(device->compiler);
2684
2685 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2686
2687 if (device->perfcntrs_pass_cs) {
2688 free(device->perfcntrs_pass_cs_entries);
2689 tu_cs_finish(device->perfcntrs_pass_cs);
2690 free(device->perfcntrs_pass_cs);
2691 }
2692
2693 if (device->dbg_cmdbuf_stomp_cs) {
2694 tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2695 free(device->dbg_cmdbuf_stomp_cs);
2696 }
2697
2698 if (device->dbg_renderpass_stomp_cs) {
2699 tu_cs_finish(device->dbg_renderpass_stomp_cs);
2700 free(device->dbg_renderpass_stomp_cs);
2701 }
2702
2703 if (device->cmdbuf_start_a725_quirk_entry) {
2704 free(device->cmdbuf_start_a725_quirk_entry);
2705 tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2706 free(device->cmdbuf_start_a725_quirk_cs);
2707 }
2708
2709 tu_autotune_fini(&device->autotune, device);
2710
2711 tu_bo_suballocator_finish(&device->pipeline_suballoc);
2712 tu_bo_suballocator_finish(&device->autotune_suballoc);
2713 tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2714
2715 tu_bo_finish(device, device->global_bo);
2716
2717 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2718 for (unsigned q = 0; q < device->queue_count[i]; q++)
2719 tu_queue_finish(&device->queues[i][q]);
2720 if (device->queue_count[i])
2721 vk_free(&device->vk.alloc, device->queues[i]);
2722 }
2723
2724 tu_drm_device_finish(device);
2725
2726 if (device->physical_device->has_set_iova)
2727 util_vma_heap_finish(&device->vma);
2728
2729 util_sparse_array_finish(&device->bo_map);
2730 u_rwlock_destroy(&device->dma_bo_lock);
2731
2732 u_vector_finish(&device->zombie_vmas);
2733
2734 pthread_cond_destroy(&device->timeline_cond);
2735 _mesa_hash_table_destroy(device->bo_sizes, NULL);
2736 vk_free(&device->vk.alloc, device->bo_list);
2737 vk_device_finish(&device->vk);
2738 vk_free(&device->vk.alloc, device);
2739 }
2740
2741 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2742 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2743 {
2744 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2745 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2746 assert(index < ARRAY_SIZE(dev->scratch_bos));
2747
2748 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2749 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2750 /* Fast path: just return the already-allocated BO. */
2751 *bo = dev->scratch_bos[i].bo;
2752 return VK_SUCCESS;
2753 }
2754 }
2755
2756 /* Slow path: actually allocate the BO. We take a lock because the process
2757 * of allocating it is slow, and we don't want to block the CPU while it
2758 * finishes.
2759 */
2760 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2761
2762 /* Another thread may have allocated it already while we were waiting on
2763 * the lock. We need to check this in order to avoid double-allocating.
2764 */
2765 if (dev->scratch_bos[index].initialized) {
2766 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2767 *bo = dev->scratch_bos[index].bo;
2768 return VK_SUCCESS;
2769 }
2770
2771 unsigned bo_size = 1ull << size_log2;
2772 VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2773 TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2774 if (result != VK_SUCCESS) {
2775 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2776 return result;
2777 }
2778
2779 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2780
2781 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2782
2783 *bo = dev->scratch_bos[index].bo;
2784 return VK_SUCCESS;
2785 }
2786
2787 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2788 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2789 VkLayerProperties *pProperties)
2790 {
2791 *pPropertyCount = 0;
2792 return VK_SUCCESS;
2793 }
2794
2795 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2796 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2797 uint32_t *pPropertyCount,
2798 VkExtensionProperties *pProperties)
2799 {
2800 if (pLayerName)
2801 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2802
2803 return vk_enumerate_instance_extension_properties(
2804 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2805 }
2806
2807 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2808 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2809 {
2810 VK_FROM_HANDLE(tu_instance, instance, _instance);
2811 return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2812 &tu_instance_entrypoints,
2813 pName);
2814 }
2815
2816 /* The loader wants us to expose a second GetInstanceProcAddr function
2817 * to work around certain LD_PRELOAD issues seen in apps.
2818 */
2819 PUBLIC
2820 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2821 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2822 {
2823 return tu_GetInstanceProcAddr(instance, pName);
2824 }
2825
2826 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2827 tu_AllocateMemory(VkDevice _device,
2828 const VkMemoryAllocateInfo *pAllocateInfo,
2829 const VkAllocationCallbacks *pAllocator,
2830 VkDeviceMemory *pMem)
2831 {
2832 VK_FROM_HANDLE(tu_device, device, _device);
2833 struct tu_device_memory *mem;
2834 VkResult result;
2835
2836 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2837
2838 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2839 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2840 if (mem_heap_used > mem_heap->size)
2841 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2842
2843 mem = (struct tu_device_memory *) vk_device_memory_create(
2844 &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
2845 if (mem == NULL)
2846 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2847
2848 if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
2849 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2850 /* Apparently, this is allowed */
2851 *pMem = VK_NULL_HANDLE;
2852 return VK_SUCCESS;
2853 }
2854
2855 const VkImportMemoryFdInfoKHR *fd_info =
2856 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2857
2858 if (fd_info && fd_info->handleType) {
2859 assert(fd_info->handleType ==
2860 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2861 fd_info->handleType ==
2862 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2863
2864 /*
2865 * TODO Importing the same fd twice gives us the same handle without
2866 * reference counting. We need to maintain a per-instance handle-to-bo
2867 * table and add reference count to tu_bo.
2868 */
2869 result = tu_bo_init_dmabuf(device, &mem->bo,
2870 pAllocateInfo->allocationSize, fd_info->fd);
2871 if (result == VK_SUCCESS) {
2872 /* take ownership and close the fd */
2873 close(fd_info->fd);
2874 }
2875 } else if (mem->vk.ahardware_buffer) {
2876 #if DETECT_OS_ANDROID
2877 const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2878 assert(handle->numFds > 0);
2879 size_t size = lseek(handle->data[0], 0, SEEK_END);
2880 result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
2881 #else
2882 result = VK_ERROR_FEATURE_NOT_PRESENT;
2883 #endif
2884 } else {
2885 uint64_t client_address = 0;
2886 BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2887
2888 const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2889 vk_find_struct_const(pAllocateInfo->pNext,
2890 MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2891 if (replay_info && replay_info->opaqueCaptureAddress) {
2892 client_address = replay_info->opaqueCaptureAddress;
2893 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2894 }
2895
2896 const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2897 pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2898 if (flags_info &&
2899 (flags_info->flags &
2900 VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
2901 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2902 }
2903
2904 const VkExportMemoryAllocateInfo *export_info =
2905 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
2906 if (export_info && (export_info->handleTypes &
2907 (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
2908 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
2909 alloc_flags |= TU_BO_ALLOC_SHAREABLE;
2910
2911
2912 char name[64] = "vkAllocateMemory()";
2913 if (device->bo_sizes)
2914 snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
2915 (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
2916 VkMemoryPropertyFlags mem_property =
2917 device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
2918 result = tu_bo_init_new_explicit_iova(
2919 device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
2920 client_address, mem_property, alloc_flags, name);
2921 }
2922
2923 if (result == VK_SUCCESS) {
2924 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2925 if (mem_heap_used > mem_heap->size) {
2926 p_atomic_add(&mem_heap->used, -mem->bo->size);
2927 tu_bo_finish(device, mem->bo);
2928 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2929 "Out of heap memory");
2930 }
2931 }
2932
2933 if (result != VK_SUCCESS) {
2934 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2935 return result;
2936 }
2937
2938 /* Track in the device whether our BO list contains any implicit-sync BOs, so
2939 * we can suppress implicit sync on non-WSI usage.
2940 */
2941 const struct wsi_memory_allocate_info *wsi_info =
2942 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2943 if (wsi_info && wsi_info->implicit_sync) {
2944 mtx_lock(&device->bo_mutex);
2945 if (!mem->bo->implicit_sync) {
2946 mem->bo->implicit_sync = true;
2947 device->implicit_sync_bo_count++;
2948 }
2949 mtx_unlock(&device->bo_mutex);
2950 }
2951
2952 const VkMemoryDedicatedAllocateInfo *dedicate_info =
2953 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
2954 if (dedicate_info) {
2955 mem->image = tu_image_from_handle(dedicate_info->image);
2956 } else {
2957 mem->image = NULL;
2958 }
2959
2960 TU_RMV(heap_create, device, pAllocateInfo, mem);
2961
2962 *pMem = tu_device_memory_to_handle(mem);
2963
2964 return VK_SUCCESS;
2965 }
2966
2967 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2968 tu_FreeMemory(VkDevice _device,
2969 VkDeviceMemory _mem,
2970 const VkAllocationCallbacks *pAllocator)
2971 {
2972 VK_FROM_HANDLE(tu_device, device, _device);
2973 VK_FROM_HANDLE(tu_device_memory, mem, _mem);
2974
2975 if (mem == NULL)
2976 return;
2977
2978 TU_RMV(resource_destroy, device, mem);
2979
2980 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2981 tu_bo_finish(device, mem->bo);
2982 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2983 }
2984
2985 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)2986 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
2987 {
2988 VK_FROM_HANDLE(tu_device, device, _device);
2989 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
2990 VkResult result;
2991
2992 if (mem == NULL) {
2993 *ppData = NULL;
2994 return VK_SUCCESS;
2995 }
2996
2997 void *placed_addr = NULL;
2998 if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
2999 const VkMemoryMapPlacedInfoEXT *placed_info =
3000 vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3001 assert(placed_info != NULL);
3002 placed_addr = placed_info->pPlacedAddress;
3003 }
3004
3005 result = tu_bo_map(device, mem->bo, placed_addr);
3006 if (result != VK_SUCCESS)
3007 return result;
3008
3009 *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3010 return VK_SUCCESS;
3011 }
3012
3013 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3014 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3015 {
3016 VK_FROM_HANDLE(tu_device, device, _device);
3017 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3018
3019 if (mem == NULL)
3020 return VK_SUCCESS;
3021
3022 return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3023 }
3024 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3025 sync_cache(VkDevice _device,
3026 enum tu_mem_sync_op op,
3027 uint32_t count,
3028 const VkMappedMemoryRange *ranges)
3029 {
3030 VK_FROM_HANDLE(tu_device, device, _device);
3031
3032 if (!device->physical_device->has_cached_non_coherent_memory) {
3033 tu_finishme(
3034 "data cache clean and invalidation are unsupported on this arch!");
3035 return VK_SUCCESS;
3036 }
3037
3038 for (uint32_t i = 0; i < count; i++) {
3039 VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3040 tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3041 }
3042
3043 return VK_SUCCESS;
3044 }
3045
3046 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3047 tu_FlushMappedMemoryRanges(VkDevice _device,
3048 uint32_t memoryRangeCount,
3049 const VkMappedMemoryRange *pMemoryRanges)
3050 {
3051 return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3052 pMemoryRanges);
3053 }
3054
3055 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3056 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3057 uint32_t memoryRangeCount,
3058 const VkMappedMemoryRange *pMemoryRanges)
3059 {
3060 return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3061 pMemoryRanges);
3062 }
3063
3064 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3065 tu_GetDeviceMemoryCommitment(VkDevice device,
3066 VkDeviceMemory memory,
3067 VkDeviceSize *pCommittedMemoryInBytes)
3068 {
3069 *pCommittedMemoryInBytes = 0;
3070 }
3071
3072 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3073 tu_CreateFramebuffer(VkDevice _device,
3074 const VkFramebufferCreateInfo *pCreateInfo,
3075 const VkAllocationCallbacks *pAllocator,
3076 VkFramebuffer *pFramebuffer)
3077 {
3078 VK_FROM_HANDLE(tu_device, device, _device);
3079
3080 if (TU_DEBUG(DYNAMIC))
3081 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3082 pFramebuffer);
3083
3084 VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3085 struct tu_framebuffer *framebuffer;
3086
3087 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3088
3089 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3090
3091 size_t size = sizeof(*framebuffer);
3092 if (!imageless)
3093 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3094 framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3095 &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3096 if (framebuffer == NULL)
3097 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3098
3099 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3100 framebuffer->width = pCreateInfo->width;
3101 framebuffer->height = pCreateInfo->height;
3102 framebuffer->layers = pCreateInfo->layers;
3103
3104 if (!imageless) {
3105 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3106 VkImageView _iview = pCreateInfo->pAttachments[i];
3107 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3108 framebuffer->attachments[i].attachment = iview;
3109 }
3110 }
3111
3112 tu_framebuffer_tiling_config(framebuffer, device, pass);
3113
3114 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3115 return VK_SUCCESS;
3116 }
3117
3118 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3119 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3120 const VkRenderingInfo *pRenderingInfo)
3121 {
3122 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3123 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3124
3125 framebuffer->attachment_count = pass->attachment_count;
3126 framebuffer->width = pRenderingInfo->renderArea.offset.x +
3127 pRenderingInfo->renderArea.extent.width;
3128 framebuffer->height = pRenderingInfo->renderArea.offset.y +
3129 pRenderingInfo->renderArea.extent.height;
3130 framebuffer->layers = pRenderingInfo->layerCount;
3131
3132 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3133 }
3134
3135 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3136 tu_DestroyFramebuffer(VkDevice _device,
3137 VkFramebuffer _fb,
3138 const VkAllocationCallbacks *pAllocator)
3139 {
3140 VK_FROM_HANDLE(tu_device, device, _device);
3141
3142 if (TU_DEBUG(DYNAMIC)) {
3143 vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3144 return;
3145 }
3146
3147 VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3148
3149 if (!fb)
3150 return;
3151
3152 vk_object_free(&device->vk, pAllocator, fb);
3153 }
3154
3155 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3156 tu_GetMemoryFdKHR(VkDevice _device,
3157 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3158 int *pFd)
3159 {
3160 VK_FROM_HANDLE(tu_device, device, _device);
3161 VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3162
3163 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3164
3165 /* At the moment, we support only the below handle types. */
3166 assert(pGetFdInfo->handleType ==
3167 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3168 pGetFdInfo->handleType ==
3169 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3170
3171 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3172 if (prime_fd < 0)
3173 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3174
3175 *pFd = prime_fd;
3176
3177 if (memory->image) {
3178 struct fdl_layout *l = &memory->image->layout[0];
3179 uint64_t modifier;
3180 if (l->ubwc) {
3181 modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3182 } else if (l->tile_mode == 2) {
3183 modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3184 } else if (l->tile_mode == 3) {
3185 modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3186 } else {
3187 assert(!l->tile_mode);
3188 modifier = DRM_FORMAT_MOD_LINEAR;
3189 }
3190 struct fdl_metadata metadata = {
3191 .modifier = modifier,
3192 };
3193 tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3194 }
3195
3196 return VK_SUCCESS;
3197 }
3198
3199 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3200 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3201 VkExternalMemoryHandleTypeFlagBits handleType,
3202 int fd,
3203 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3204 {
3205 VK_FROM_HANDLE(tu_device, device, _device);
3206 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3207 pMemoryFdProperties->memoryTypeBits =
3208 (1 << device->physical_device->memory.type_count) - 1;
3209 return VK_SUCCESS;
3210 }
3211
3212 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3213 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3214 VkPhysicalDevice physicalDevice,
3215 VkSampleCountFlagBits samples,
3216 VkMultisamplePropertiesEXT* pMultisampleProperties)
3217 {
3218 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3219
3220 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3221 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3222 else
3223 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3224 }
3225
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3226 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3227 VkDevice device,
3228 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3229 {
3230 VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3231 return mem->bo->iova;
3232 }
3233
3234 struct tu_debug_bos_entry {
3235 uint32_t count;
3236 uint64_t size;
3237 const char *name;
3238 };
3239
3240 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3241 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3242 {
3243 assert(name);
3244
3245 if (likely(!dev->bo_sizes))
3246 return NULL;
3247
3248 mtx_lock(&dev->bo_mutex);
3249 struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3250 struct tu_debug_bos_entry *debug_bos;
3251
3252 if (!entry) {
3253 debug_bos = (struct tu_debug_bos_entry *) calloc(
3254 1, sizeof(struct tu_debug_bos_entry));
3255 debug_bos->name = strdup(name);
3256 _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3257 } else {
3258 debug_bos = (struct tu_debug_bos_entry *) entry->data;
3259 }
3260
3261 debug_bos->count++;
3262 debug_bos->size += align(size, 4096);
3263 mtx_unlock(&dev->bo_mutex);
3264
3265 return debug_bos->name;
3266 }
3267
3268 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3269 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3270 {
3271 if (likely(!dev->bo_sizes) || !bo->name)
3272 return;
3273
3274 mtx_lock(&dev->bo_mutex);
3275 struct hash_entry *entry =
3276 _mesa_hash_table_search(dev->bo_sizes, bo->name);
3277 /* If we're finishing the BO, it should have been added already */
3278 assert(entry);
3279
3280 struct tu_debug_bos_entry *debug_bos =
3281 (struct tu_debug_bos_entry *) entry->data;
3282 debug_bos->count--;
3283 debug_bos->size -= align(bo->size, 4096);
3284 if (!debug_bos->count) {
3285 _mesa_hash_table_remove(dev->bo_sizes, entry);
3286 free((void *) debug_bos->name);
3287 free(debug_bos);
3288 }
3289 mtx_unlock(&dev->bo_mutex);
3290 }
3291
debug_bos_count_compare(const void * in_a,const void * in_b)3292 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3293 {
3294 struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3295 struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3296 return a->count - b->count;
3297 }
3298
3299 void
tu_debug_bos_print_stats(struct tu_device * dev)3300 tu_debug_bos_print_stats(struct tu_device *dev)
3301 {
3302 if (likely(!dev->bo_sizes))
3303 return;
3304
3305 mtx_lock(&dev->bo_mutex);
3306
3307 /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3308 struct util_dynarray dyn;
3309 util_dynarray_init(&dyn, NULL);
3310
3311 uint32_t size = 0;
3312 uint32_t count = 0;
3313 hash_table_foreach(dev->bo_sizes, entry)
3314 {
3315 struct tu_debug_bos_entry *debug_bos =
3316 (struct tu_debug_bos_entry *) entry->data;
3317 util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3318 size += debug_bos->size / 1024;
3319 count += debug_bos->count;
3320 }
3321
3322 qsort(dyn.data,
3323 util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3324 sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3325
3326 util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3327 {
3328 struct tu_debug_bos_entry *debug_bos = *entryp;
3329 mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3330 (long long) (debug_bos->size / 1024));
3331 }
3332
3333 mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3334
3335 util_dynarray_fini(&dyn);
3336
3337 mtx_unlock(&dev->bo_mutex);
3338 }
3339
3340 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3341 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3342 const VkDebugUtilsLabelEXT *pLabelInfo)
3343 {
3344 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3345
3346 vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3347
3348 /* Note that the spec says:
3349 *
3350 * "An application may open a debug label region in one command buffer and
3351 * close it in another, or otherwise split debug label regions across
3352 * multiple command buffers or multiple queue submissions. When viewed
3353 * from the linear series of submissions to a single queue, the calls to
3354 * vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3355 * matched and balanced."
3356 *
3357 * But if you're beginning labeling during a renderpass and ending outside
3358 * it, or vice versa, these trace ranges in perfetto will be unbalanced. I
3359 * expect that u_trace and perfetto will do something like take just one of
3360 * the begins/ends, or drop the event entirely, but not crash. Similarly,
3361 * I think we'll have problems if the tracepoints are split across cmd
3362 * buffers. Still, getting the simple case of cmd buffer annotation into
3363 * perfetto should prove useful.
3364 */
3365 const char *label = pLabelInfo->pLabelName;
3366 if (cmd_buffer->state.pass) {
3367 trace_start_cmd_buffer_annotation_rp(
3368 &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3369 } else {
3370 trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3371 strlen(label), label);
3372 }
3373 }
3374
3375 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3376 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3377 {
3378 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3379
3380 if (cmd_buffer->vk.labels.size > 0) {
3381 if (cmd_buffer->state.pass) {
3382 trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3383 &cmd_buffer->draw_cs);
3384 } else {
3385 trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3386 }
3387 }
3388
3389 vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3390 }
3391