xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_device.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_device.h"
11 
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16 
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28 
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32 
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_query_pool.h"
41 #include "tu_rmv.h"
42 #include "tu_tracepoints.h"
43 #include "tu_wsi.h"
44 
45 #if DETECT_OS_ANDROID
46 #include "util/u_gralloc/u_gralloc.h"
47 #include <vndk/hardware_buffer.h>
48 #endif
49 
50 uint64_t os_page_size = 4096;
51 
52 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)53 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
54 {
55    struct mesa_sha1 ctx;
56    unsigned char sha1[20];
57    /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
58     * initialized until after compiler creation so we have to add it to the
59     * shader hash instead, since the compiler is only created with the logical
60     * device.
61     */
62    uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
63    uint16_t family = fd_dev_gpu_id(&device->dev_id);
64 
65    memset(uuid, 0, VK_UUID_SIZE);
66    _mesa_sha1_init(&ctx);
67 
68    if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
69       return -1;
70 
71    _mesa_sha1_update(&ctx, &family, sizeof(family));
72    _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
73    _mesa_sha1_final(&ctx, sha1);
74 
75    memcpy(uuid, sha1, VK_UUID_SIZE);
76    return 0;
77 }
78 
79 #define TU_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
80 
81 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)82 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
83 {
84     *pApiVersion = TU_API_VERSION;
85     return VK_SUCCESS;
86 }
87 
88 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
89    .KHR_device_group_creation           = true,
90 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
91    .KHR_display                         = true,
92 #endif
93    .KHR_external_fence_capabilities     = true,
94    .KHR_external_memory_capabilities    = true,
95    .KHR_external_semaphore_capabilities = true,
96 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
97    .KHR_get_display_properties2         = true,
98 #endif
99    .KHR_get_physical_device_properties2 = true,
100 #ifdef TU_USE_WSI_PLATFORM
101    .KHR_get_surface_capabilities2       = true,
102    .KHR_surface                         = true,
103    .KHR_surface_protected_capabilities  = true,
104 #endif
105 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
106    .KHR_wayland_surface                 = true,
107 #endif
108 #ifdef VK_USE_PLATFORM_XCB_KHR
109    .KHR_xcb_surface                     = true,
110 #endif
111 #ifdef VK_USE_PLATFORM_XLIB_KHR
112    .KHR_xlib_surface                    = true,
113 #endif
114 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
115    .EXT_acquire_drm_display             = true,
116 #endif
117 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
118    .EXT_acquire_xlib_display            = true,
119 #endif
120    .EXT_debug_report                    = true,
121    .EXT_debug_utils                     = true,
122 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
123    .EXT_direct_mode_display             = true,
124    .EXT_display_surface_counter         = true,
125 #endif
126 #ifndef VK_USE_PLATFORM_WIN32_KHR
127    .EXT_headless_surface                = true,
128 #endif
129 #ifdef TU_USE_WSI_PLATFORM
130    .EXT_surface_maintenance1            = true,
131    .EXT_swapchain_colorspace            = true,
132 #endif
133 } };
134 
135 static bool
is_kgsl(struct tu_instance * instance)136 is_kgsl(struct tu_instance *instance)
137 {
138    return strcmp(instance->knl->name, "kgsl") == 0;
139 }
140 
141 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)142 get_device_extensions(const struct tu_physical_device *device,
143                       struct vk_device_extension_table *ext)
144 {
145    *ext = (struct vk_device_extension_table) { .table = {
146       .KHR_8bit_storage = device->info->a7xx.storage_8bit,
147       .KHR_16bit_storage = device->info->a6xx.storage_16bit,
148       .KHR_bind_memory2 = true,
149       .KHR_buffer_device_address = true,
150       .KHR_copy_commands2 = true,
151       .KHR_create_renderpass2 = true,
152       .KHR_dedicated_allocation = true,
153       .KHR_depth_stencil_resolve = true,
154       .KHR_descriptor_update_template = true,
155       .KHR_device_group = true,
156       .KHR_draw_indirect_count = true,
157       .KHR_driver_properties = true,
158       .KHR_dynamic_rendering = true,
159       .KHR_external_fence = true,
160       .KHR_external_fence_fd = true,
161       .KHR_external_memory = true,
162       .KHR_external_memory_fd = true,
163       .KHR_external_semaphore = true,
164       .KHR_external_semaphore_fd = true,
165       .KHR_format_feature_flags2 = true,
166       .KHR_get_memory_requirements2 = true,
167       .KHR_global_priority = true,
168       .KHR_image_format_list = true,
169       .KHR_imageless_framebuffer = true,
170 #ifdef TU_USE_WSI_PLATFORM
171       .KHR_incremental_present = true,
172 #endif
173       .KHR_index_type_uint8 = true,
174       .KHR_line_rasterization = true,
175       .KHR_load_store_op_none = true,
176       .KHR_maintenance1 = true,
177       .KHR_maintenance2 = true,
178       .KHR_maintenance3 = true,
179       .KHR_maintenance4 = true,
180       .KHR_maintenance5 = true,
181       .KHR_maintenance6 = true,
182       .KHR_map_memory2 = true,
183       .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
184       .KHR_performance_query = TU_DEBUG(PERFC),
185       .KHR_pipeline_executable_properties = true,
186       .KHR_pipeline_library = true,
187 #ifdef TU_USE_WSI_PLATFORM
188       /* Hide these behind dri configs for now since we cannot implement it reliably on
189        * all surfaces yet. There is no surface capability query for present wait/id,
190        * but the feature is useful enough to hide behind an opt-in mechanism for now.
191        * If the instance only enables surface extensions that unconditionally support present wait,
192        * we can also expose the extension that way. */
193       .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
194                          wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
195       .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
196                            wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
197 #endif
198       .KHR_push_descriptor = true,
199       .KHR_relaxed_block_layout = true,
200       .KHR_sampler_mirror_clamp_to_edge = true,
201       .KHR_sampler_ycbcr_conversion = true,
202       .KHR_separate_depth_stencil_layouts = true,
203       .KHR_shader_draw_parameters = true,
204       .KHR_shader_expect_assume = true,
205       .KHR_shader_float16_int8 = true,
206       .KHR_shader_float_controls = true,
207       .KHR_shader_float_controls2 = true,
208       .KHR_shader_integer_dot_product = true,
209       .KHR_shader_non_semantic_info = true,
210       .KHR_shader_relaxed_extended_instruction = true,
211       .KHR_shader_subgroup_extended_types = true,
212       .KHR_shader_subgroup_uniform_control_flow = true,
213       .KHR_shader_terminate_invocation = true,
214       .KHR_spirv_1_4 = true,
215       .KHR_storage_buffer_storage_class = true,
216 #ifdef TU_USE_WSI_PLATFORM
217       .KHR_swapchain = true,
218       .KHR_swapchain_mutable_format = true,
219 #endif
220       .KHR_synchronization2 = true,
221       .KHR_timeline_semaphore = true,
222       .KHR_uniform_buffer_standard_layout = true,
223       .KHR_variable_pointers = true,
224       .KHR_vertex_attribute_divisor = true,
225       .KHR_vulkan_memory_model = true,
226       .KHR_workgroup_memory_explicit_layout = true,
227       .KHR_zero_initialize_workgroup_memory = true,
228 
229       .EXT_4444_formats = true,
230       .EXT_attachment_feedback_loop_dynamic_state = true,
231       .EXT_attachment_feedback_loop_layout = true,
232       .EXT_border_color_swizzle = true,
233       .EXT_color_write_enable = true,
234       .EXT_conditional_rendering = true,
235       .EXT_custom_border_color = true,
236       .EXT_depth_clamp_zero_one = true,
237       .EXT_depth_clip_control = true,
238       .EXT_depth_clip_enable = true,
239       .EXT_descriptor_buffer = true,
240       .EXT_descriptor_indexing = true,
241       .EXT_device_address_binding_report = true,
242 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
243       .EXT_display_control = true,
244 #endif
245       .EXT_extended_dynamic_state = true,
246       .EXT_extended_dynamic_state2 = true,
247       .EXT_extended_dynamic_state3 = true,
248       .EXT_external_memory_dma_buf = true,
249       .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
250       .EXT_fragment_density_map = true,
251       .EXT_global_priority = true,
252       .EXT_global_priority_query = true,
253       .EXT_graphics_pipeline_library = true,
254       .EXT_host_query_reset = true,
255       .EXT_image_2d_view_of_3d = true,
256       .EXT_image_drm_format_modifier = true,
257       .EXT_image_robustness = true,
258       .EXT_image_view_min_lod = true,
259       .EXT_index_type_uint8 = true,
260       .EXT_inline_uniform_block = true,
261       .EXT_legacy_dithering = true,
262       .EXT_legacy_vertex_attributes = true,
263       .EXT_line_rasterization = true,
264       .EXT_load_store_op_none = true,
265       .EXT_map_memory_placed = true,
266       .EXT_memory_budget = true,
267       .EXT_multi_draw = true,
268       .EXT_mutable_descriptor_type = true,
269       .EXT_nested_command_buffer = true,
270       .EXT_non_seamless_cube_map = true,
271       .EXT_physical_device_drm = !is_kgsl(device->instance),
272       .EXT_pipeline_creation_cache_control = true,
273       .EXT_pipeline_creation_feedback = true,
274       .EXT_post_depth_coverage = true,
275       .EXT_primitive_topology_list_restart = true,
276       .EXT_primitives_generated_query = true,
277       .EXT_private_data = true,
278       .EXT_provoking_vertex = true,
279       .EXT_queue_family_foreign = true,
280       .EXT_rasterization_order_attachment_access = true,
281       .EXT_robustness2 = true,
282       .EXT_sample_locations = device->info->a6xx.has_sample_locations,
283       .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
284       .EXT_scalar_block_layout = true,
285       .EXT_separate_stencil_usage = true,
286       .EXT_shader_demote_to_helper_invocation = true,
287       .EXT_shader_module_identifier = true,
288       .EXT_shader_replicated_composites = true,
289       .EXT_shader_stencil_export = true,
290       .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
291       .EXT_subgroup_size_control = true,
292 #ifdef TU_USE_WSI_PLATFORM
293       .EXT_swapchain_maintenance1 = true,
294 #endif
295       .EXT_texel_buffer_alignment = true,
296       .EXT_tooling_info = true,
297       .EXT_transform_feedback = true,
298       .EXT_vertex_attribute_divisor = true,
299       .EXT_vertex_input_dynamic_state = true,
300 
301       /* For Graphics Flight Recorder (GFR) */
302       .AMD_buffer_marker = true,
303       .ARM_rasterization_order_attachment_access = true,
304       .GOOGLE_decorate_string = true,
305       .GOOGLE_hlsl_functionality1 = true,
306       .GOOGLE_user_type = true,
307       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
308       .VALVE_mutable_descriptor_type = true,
309    } };
310 
311 #if DETECT_OS_ANDROID
312    if (vk_android_get_ugralloc() != NULL) {
313       ext->ANDROID_external_memory_android_hardware_buffer = true,
314       ext->ANDROID_native_buffer = true;
315    }
316 #endif
317 }
318 
319 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)320 tu_get_features(struct tu_physical_device *pdevice,
321                 struct vk_features *features)
322 {
323    *features = (struct vk_features) { false };
324 
325    /* Vulkan 1.0 */
326    features->robustBufferAccess = true;
327    features->fullDrawIndexUint32 = true;
328    features->imageCubeArray = true;
329    features->independentBlend = true;
330    features->geometryShader = true;
331    features->tessellationShader = true;
332    features->sampleRateShading = true;
333    features->dualSrcBlend = true;
334    features->logicOp = true;
335    features->multiDrawIndirect = true;
336    features->drawIndirectFirstInstance = true;
337    features->depthClamp = true;
338    features->depthBiasClamp = true;
339    features->fillModeNonSolid = true;
340    features->depthBounds = true;
341    features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
342    features->largePoints = true;
343    features->alphaToOne = true;
344    features->multiViewport = true;
345    features->samplerAnisotropy = true;
346    features->textureCompressionETC2 = true;
347    features->textureCompressionASTC_LDR = true;
348    features->textureCompressionBC = true;
349    features->occlusionQueryPrecise = true;
350    features->pipelineStatisticsQuery = true;
351    features->vertexPipelineStoresAndAtomics = true;
352    features->fragmentStoresAndAtomics = true;
353    features->shaderTessellationAndGeometryPointSize = true;
354    features->shaderImageGatherExtended = true;
355    features->shaderStorageImageExtendedFormats = true;
356    features->shaderStorageImageMultisample = false;
357    features->shaderStorageImageReadWithoutFormat = true;
358    features->shaderStorageImageWriteWithoutFormat = true;
359    features->shaderUniformBufferArrayDynamicIndexing = true;
360    features->shaderSampledImageArrayDynamicIndexing = true;
361    features->shaderStorageBufferArrayDynamicIndexing = true;
362    features->shaderStorageImageArrayDynamicIndexing = true;
363    features->shaderClipDistance = true;
364    features->shaderCullDistance = true;
365    features->shaderFloat64 = false;
366    features->shaderInt64 = false;
367    features->shaderInt16 = true;
368    features->sparseBinding = false;
369    features->variableMultisampleRate = true;
370    features->inheritedQueries = true;
371 
372    /* Vulkan 1.1 */
373    features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
374    features->uniformAndStorageBuffer16BitAccess  = false;
375    features->storagePushConstant16               = false;
376    features->storageInputOutput16                = false;
377    features->multiview                           = true;
378    features->multiviewGeometryShader             = false;
379    features->multiviewTessellationShader         = false;
380    features->variablePointersStorageBuffer       = true;
381    features->variablePointers                    = true;
382    features->protectedMemory                     = false;
383    features->samplerYcbcrConversion              = true;
384    features->shaderDrawParameters                = true;
385 
386    /* Vulkan 1.2 */
387    features->samplerMirrorClampToEdge            = true;
388    features->drawIndirectCount                   = true;
389    features->storageBuffer8BitAccess             = pdevice->info->a7xx.storage_8bit;
390    features->uniformAndStorageBuffer8BitAccess   = false;
391    features->storagePushConstant8                = false;
392    features->shaderBufferInt64Atomics            = false;
393    features->shaderSharedInt64Atomics            = false;
394    features->shaderFloat16                       = true;
395    features->shaderInt8                          = true;
396 
397    features->descriptorIndexing                                 = true;
398    features->shaderInputAttachmentArrayDynamicIndexing          = false;
399    features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
400    features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
401    features->shaderUniformBufferArrayNonUniformIndexing         = true;
402    features->shaderSampledImageArrayNonUniformIndexing          = true;
403    features->shaderStorageBufferArrayNonUniformIndexing         = true;
404    features->shaderStorageImageArrayNonUniformIndexing          = true;
405    features->shaderInputAttachmentArrayNonUniformIndexing       = false;
406    features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
407    features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
408    features->descriptorBindingUniformBufferUpdateAfterBind      = true;
409    features->descriptorBindingSampledImageUpdateAfterBind       = true;
410    features->descriptorBindingStorageImageUpdateAfterBind       = true;
411    features->descriptorBindingStorageBufferUpdateAfterBind      = true;
412    features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
413    features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
414    features->descriptorBindingUpdateUnusedWhilePending          = true;
415    features->descriptorBindingPartiallyBound                    = true;
416    features->descriptorBindingVariableDescriptorCount           = true;
417    features->runtimeDescriptorArray                             = true;
418 
419    features->samplerFilterMinmax                 =
420       pdevice->info->a6xx.has_sampler_minmax;
421    features->scalarBlockLayout                   = true;
422    features->imagelessFramebuffer                = true;
423    features->uniformBufferStandardLayout         = true;
424    features->shaderSubgroupExtendedTypes         = true;
425    features->separateDepthStencilLayouts         = true;
426    features->hostQueryReset                      = true;
427    features->timelineSemaphore                   = true;
428    features->bufferDeviceAddress                 = true;
429    features->bufferDeviceAddressCaptureReplay    = pdevice->has_set_iova;
430    features->bufferDeviceAddressMultiDevice      = false;
431    features->vulkanMemoryModel                   = true;
432    features->vulkanMemoryModelDeviceScope        = true;
433    features->vulkanMemoryModelAvailabilityVisibilityChains = true;
434    features->shaderOutputViewportIndex           = true;
435    features->shaderOutputLayer                   = true;
436    features->subgroupBroadcastDynamicId          = true;
437 
438    /* Vulkan 1.3 */
439    features->robustImageAccess                   = true;
440    features->inlineUniformBlock                  = true;
441    features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
442    features->pipelineCreationCacheControl        = true;
443    features->privateData                         = true;
444    features->shaderDemoteToHelperInvocation      = true;
445    features->shaderTerminateInvocation           = true;
446    features->subgroupSizeControl                 = true;
447    features->computeFullSubgroups                = true;
448    features->synchronization2                    = true;
449    features->textureCompressionASTC_HDR          = false;
450    features->shaderZeroInitializeWorkgroupMemory = true;
451    features->dynamicRendering                    = true;
452    features->shaderIntegerDotProduct             = true;
453    features->maintenance4                        = true;
454 
455    /* VK_KHR_index_type_uint8 */
456    features->indexTypeUint8 = true;
457 
458    /* VK_KHR_line_rasterization */
459    features->rectangularLines = true;
460    features->bresenhamLines = true;
461    features->smoothLines = false;
462    features->stippledRectangularLines = false;
463    features->stippledBresenhamLines = false;
464    features->stippledSmoothLines = false;
465 
466    /* VK_KHR_maintenance5 */
467    features->maintenance5 = true;
468 
469    /* VK_KHR_maintenance6 */
470    features->maintenance6 = true;
471 
472    /* VK_KHR_performance_query */
473    features->performanceCounterQueryPools = true;
474    features->performanceCounterMultipleQueryPools = false;
475 
476    /* VK_KHR_pipeline_executable_properties */
477    features->pipelineExecutableInfo = true;
478 
479    /* VK_KHR_present_id */
480    features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
481 
482    /* VK_KHR_present_wait */
483    features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
484 
485    /* VK_KHR_shader_expect_assume */
486    features->shaderExpectAssume = true;
487 
488    /* VK_KHR_shader_float_controls2 */
489    features->shaderFloatControls2 = true;
490 
491    /* VK_KHR_shader_subgroup_uniform_control_flow */
492    features->shaderSubgroupUniformControlFlow = true;
493 
494    /* VK_KHR_vertex_attribute_divisor */
495    features->vertexAttributeInstanceRateDivisor = true;
496    features->vertexAttributeInstanceRateZeroDivisor = true;
497 
498    /* VK_KHR_workgroup_memory_explicit_layout */
499    features->workgroupMemoryExplicitLayout = true;
500    features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
501    features->workgroupMemoryExplicitLayout8BitAccess = true;
502    features->workgroupMemoryExplicitLayout16BitAccess = true;
503 
504    /* VK_EXT_4444_formats */
505    features->formatA4R4G4B4 = true;
506    features->formatA4B4G4R4 = true;
507 
508    /* VK_EXT_attachment_feedback_loop_dynamic_state */
509    features->attachmentFeedbackLoopDynamicState = true;
510 
511    /* VK_EXT_attachment_feedback_loop_layout */
512    features->attachmentFeedbackLoopLayout = true;
513 
514    /* VK_EXT_border_color_swizzle */
515    features->borderColorSwizzle = true;
516    features->borderColorSwizzleFromImage = true;
517 
518    /* VK_EXT_color_write_enable */
519    features->colorWriteEnable = true;
520 
521    /* VK_EXT_conditional_rendering */
522    features->conditionalRendering = true;
523    features->inheritedConditionalRendering = true;
524 
525    /* VK_EXT_custom_border_color */
526    features->customBorderColors = true;
527    features->customBorderColorWithoutFormat = true;
528 
529    /* VK_EXT_depth_clamp_zero_one */
530    features->depthClampZeroOne = true;
531 
532    /* VK_EXT_depth_clip_control */
533    features->depthClipControl = true;
534 
535    /* VK_EXT_depth_clip_enable */
536    features->depthClipEnable = true;
537 
538    /* VK_EXT_descriptor_buffer */
539    features->descriptorBuffer = true;
540    features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
541    features->descriptorBufferImageLayoutIgnored = true;
542    features->descriptorBufferPushDescriptors = true;
543 
544    /* VK_EXT_device_address_binding_report */
545    features->reportAddressBinding = true;
546 
547    /* VK_EXT_extended_dynamic_state */
548    features->extendedDynamicState = true;
549 
550    /* VK_EXT_extended_dynamic_state2 */
551    features->extendedDynamicState2 = true;
552    features->extendedDynamicState2LogicOp = true;
553    features->extendedDynamicState2PatchControlPoints = true;
554 
555    /* VK_EXT_extended_dynamic_state3 */
556    features->extendedDynamicState3PolygonMode = true;
557    features->extendedDynamicState3TessellationDomainOrigin = true;
558    features->extendedDynamicState3DepthClampEnable = true;
559    features->extendedDynamicState3DepthClipEnable = true;
560    features->extendedDynamicState3LogicOpEnable = true;
561    features->extendedDynamicState3SampleMask = true;
562    features->extendedDynamicState3RasterizationSamples = true;
563    features->extendedDynamicState3AlphaToCoverageEnable = true;
564    features->extendedDynamicState3AlphaToOneEnable = true;
565    features->extendedDynamicState3DepthClipNegativeOneToOne = true;
566    features->extendedDynamicState3RasterizationStream = true;
567    features->extendedDynamicState3ConservativeRasterizationMode = false;
568    features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
569    features->extendedDynamicState3LineRasterizationMode = true;
570    features->extendedDynamicState3LineStippleEnable = false;
571    features->extendedDynamicState3ProvokingVertexMode = true;
572    features->extendedDynamicState3SampleLocationsEnable =
573       pdevice->info->a6xx.has_sample_locations;
574    features->extendedDynamicState3ColorBlendEnable = true;
575    features->extendedDynamicState3ColorBlendEquation = true;
576    features->extendedDynamicState3ColorWriteMask = true;
577    features->extendedDynamicState3ViewportWScalingEnable = false;
578    features->extendedDynamicState3ViewportSwizzle = false;
579    features->extendedDynamicState3ShadingRateImageEnable = false;
580    features->extendedDynamicState3CoverageToColorEnable = false;
581    features->extendedDynamicState3CoverageToColorLocation = false;
582    features->extendedDynamicState3CoverageModulationMode = false;
583    features->extendedDynamicState3CoverageModulationTableEnable = false;
584    features->extendedDynamicState3CoverageModulationTable = false;
585    features->extendedDynamicState3CoverageReductionMode = false;
586    features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
587    features->extendedDynamicState3ColorBlendAdvanced = false;
588 
589    /* VK_EXT_fragment_density_map */
590    features->fragmentDensityMap = true;
591    features->fragmentDensityMapDynamic = false;
592    features->fragmentDensityMapNonSubsampledImages = true;
593 
594    /* VK_EXT_global_priority_query */
595    features->globalPriorityQuery = true;
596 
597    /* VK_EXT_graphics_pipeline_library */
598    features->graphicsPipelineLibrary = true;
599 
600    /* VK_EXT_image_2d_view_of_3d  */
601    features->image2DViewOf3D = true;
602    features->sampler2DViewOf3D = true;
603 
604    /* VK_EXT_image_view_min_lod */
605    features->minLod = true;
606 
607    /* VK_EXT_legacy_vertex_attributes */
608    features->legacyVertexAttributes = true;
609 
610    /* VK_EXT_legacy_dithering */
611    features->legacyDithering = true;
612 
613    /* VK_EXT_map_memory_placed */
614    features->memoryMapPlaced = true;
615    features->memoryMapRangePlaced = false;
616    features->memoryUnmapReserve = true;
617 
618    /* VK_EXT_multi_draw */
619    features->multiDraw = true;
620 
621    /* VK_EXT_mutable_descriptor_type */
622    features->mutableDescriptorType = true;
623 
624    /* VK_EXT_nested_command_buffer */
625    features->nestedCommandBuffer = true;
626    features->nestedCommandBufferRendering = true;
627    features->nestedCommandBufferSimultaneousUse = true;
628 
629    /* VK_EXT_non_seamless_cube_map */
630    features->nonSeamlessCubeMap = true;
631 
632    /* VK_EXT_primitive_topology_list_restart */
633    features->primitiveTopologyListRestart = true;
634    features->primitiveTopologyPatchListRestart = false;
635 
636    /* VK_EXT_primitives_generated_query */
637    features->primitivesGeneratedQuery = true;
638    features->primitivesGeneratedQueryWithRasterizerDiscard = false;
639    features->primitivesGeneratedQueryWithNonZeroStreams = false;
640 
641    /* VK_EXT_provoking_vertex */
642    features->provokingVertexLast = true;
643 
644    /* VK_EXT_rasterization_order_attachment_access */
645    features->rasterizationOrderColorAttachmentAccess = true;
646    features->rasterizationOrderDepthAttachmentAccess = true;
647    features->rasterizationOrderStencilAttachmentAccess = true;
648 
649    /* VK_EXT_robustness2 */
650    features->robustBufferAccess2 = true;
651    features->robustImageAccess2 = true;
652    features->nullDescriptor = true;
653 
654    /* VK_EXT_shader_module_identifier */
655    features->shaderModuleIdentifier = true;
656 
657    /* VK_EXT_shader_replicated_composites */
658    features->shaderReplicatedComposites = true;
659 
660 #ifdef TU_USE_WSI_PLATFORM
661    /* VK_EXT_swapchain_maintenance1 */
662    features->swapchainMaintenance1 = true;
663 #endif
664 
665    /* VK_EXT_texel_buffer_alignment */
666    features->texelBufferAlignment = true;
667 
668    /* VK_EXT_transform_feedback */
669    features->transformFeedback = true;
670    features->geometryStreams = true;
671 
672    /* VK_EXT_vertex_input_dynamic_state */
673    features->vertexInputDynamicState = true;
674 
675    /* VK_KHR_shader_relaxed_extended_instruction */
676    features->shaderRelaxedExtendedInstruction = true;
677 }
678 
679 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)680 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
681                                       struct vk_properties *p)
682 {
683    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
684    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
685    memset(p->deviceLUID, 0, VK_LUID_SIZE);
686    p->deviceNodeMask = 0;
687    p->deviceLUIDValid = false;
688 
689    p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
690       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
691    p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
692    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
693                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
694                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
695                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
696                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
697                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
698    if (pdevice->info->a6xx.has_getfiberid) {
699       p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
700       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
701    }
702 
703    p->subgroupQuadOperationsInAllStages = false;
704 
705    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
706    p->maxMultiviewViewCount =
707       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
708    p->maxMultiviewInstanceIndex = INT_MAX;
709    p->protectedNoFault = false;
710    /* Our largest descriptors are 2 texture descriptors, or a texture and
711     * sampler descriptor.
712     */
713    p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
714    /* Our buffer size fields allow only this much */
715    p->maxMemoryAllocationSize = 0xFFFFFFFFull;
716 
717 }
718 
719 
720 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
721 static const VkSampleCountFlags sample_counts =
722    VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
723 
724 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)725 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
726                                       struct vk_properties *p)
727 {
728    p->driverID = VK_DRIVER_ID_MESA_TURNIP;
729    memset(p->driverName, 0, sizeof(p->driverName));
730    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
731             "turnip Mesa driver");
732    memset(p->driverInfo, 0, sizeof(p->driverInfo));
733    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
734             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
735    p->conformanceVersion = (VkConformanceVersion) {
736       .major = 1,
737       .minor = 2,
738       .subminor = 7,
739       .patch = 1,
740    };
741 
742    p->denormBehaviorIndependence =
743       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
744    p->roundingModeIndependence =
745       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
746 
747    p->shaderDenormFlushToZeroFloat16         = true;
748    p->shaderDenormPreserveFloat16            = false;
749    p->shaderRoundingModeRTEFloat16           = true;
750    p->shaderRoundingModeRTZFloat16           = false;
751    p->shaderSignedZeroInfNanPreserveFloat16  = true;
752 
753    p->shaderDenormFlushToZeroFloat32         = true;
754    p->shaderDenormPreserveFloat32            = false;
755    p->shaderRoundingModeRTEFloat32           = true;
756    p->shaderRoundingModeRTZFloat32           = false;
757    p->shaderSignedZeroInfNanPreserveFloat32  = true;
758 
759    p->shaderDenormFlushToZeroFloat64         = false;
760    p->shaderDenormPreserveFloat64            = false;
761    p->shaderRoundingModeRTEFloat64           = false;
762    p->shaderRoundingModeRTZFloat64           = false;
763    p->shaderSignedZeroInfNanPreserveFloat64  = false;
764 
765    p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
766    p->shaderSampledImageArrayNonUniformIndexingNative    = true;
767    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
768    p->shaderStorageImageArrayNonUniformIndexingNative    = true;
769    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
770    p->robustBufferAccessUpdateAfterBind                  = false;
771    p->quadDivergentImplicitLod                           = false;
772 
773    p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
774    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
775    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
776    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
777    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
778    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
779    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
780    p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
781    p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
782    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
783    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
784    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
785    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
786    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
787    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
788    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_RTS;
789 
790    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
791    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
792    p->independentResolveNone  = false;
793    p->independentResolve      = false;
794 
795    p->filterMinmaxSingleComponentFormats  = true;
796    p->filterMinmaxImageComponentMapping   = true;
797 
798    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
799 
800    p->framebufferIntegerColorSampleCounts = sample_counts;
801 }
802 
803 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)804 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
805                                       struct vk_properties *p)
806 {
807    p->minSubgroupSize = pdevice->info->threadsize_base;
808    p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
809       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
810    p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
811    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
812 
813    p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
814    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
815    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
816    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
817    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
818    p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
819 
820    p->integerDotProduct8BitUnsignedAccelerated = false;
821    p->integerDotProduct8BitSignedAccelerated = false;
822    p->integerDotProduct8BitMixedSignednessAccelerated = false;
823    p->integerDotProduct4x8BitPackedUnsignedAccelerated =
824       pdevice->info->a6xx.has_dp2acc;
825    /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
826    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
827    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
828       pdevice->info->a6xx.has_dp2acc;
829    p->integerDotProduct16BitUnsignedAccelerated = false;
830    p->integerDotProduct16BitSignedAccelerated = false;
831    p->integerDotProduct16BitMixedSignednessAccelerated = false;
832    p->integerDotProduct32BitUnsignedAccelerated = false;
833    p->integerDotProduct32BitSignedAccelerated = false;
834    p->integerDotProduct32BitMixedSignednessAccelerated = false;
835    p->integerDotProduct64BitUnsignedAccelerated = false;
836    p->integerDotProduct64BitSignedAccelerated = false;
837    p->integerDotProduct64BitMixedSignednessAccelerated = false;
838    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
839    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
840    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
841    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
842       pdevice->info->a6xx.has_dp2acc;
843    /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
844    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
845    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
846       pdevice->info->a6xx.has_dp2acc;
847    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
848    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
849    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
850    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
851    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
852    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
853    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
854    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
855    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
856 
857    p->storageTexelBufferOffsetAlignmentBytes = 64;
858    p->storageTexelBufferOffsetSingleTexelAlignment = true;
859    p->uniformTexelBufferOffsetAlignmentBytes = 64;
860    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
861 
862    /* The address space is 4GB for current kernels, so there's no point
863     * allowing a larger buffer. Our buffer sizes are 64-bit though, so
864     * GetBufferDeviceRequirements won't fall over if someone actually creates
865     * a 4GB buffer.
866     */
867    p->maxBufferSize = 1ull << 32;
868 }
869 
870 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)871 tu_get_properties(struct tu_physical_device *pdevice,
872                   struct vk_properties *props)
873 {
874    /* Limits */
875    props->maxImageDimension1D = (1 << 14);
876    props->maxImageDimension2D = (1 << 14);
877    props->maxImageDimension3D = (1 << 11);
878    props->maxImageDimensionCube = (1 << 14);
879    props->maxImageArrayLayers = (1 << 11);
880    props->maxTexelBufferElements = 128 * 1024 * 1024;
881    props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
882    props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
883    props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
884    props->maxMemoryAllocationCount = UINT32_MAX;
885    props->maxSamplerAllocationCount = 64 * 1024;
886    props->bufferImageGranularity = 64;          /* A cache line */
887    props->sparseAddressSpaceSize = 0;
888    props->maxBoundDescriptorSets = pdevice->usable_sets;
889    props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
890    props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
891    props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
892    props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
893    props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
894    props->maxPerStageDescriptorInputAttachments = MAX_RTS;
895    props->maxPerStageResources = max_descriptor_set_size;
896    props->maxDescriptorSetSamplers = max_descriptor_set_size;
897    props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
898    props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
899    props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
900    props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
901    props->maxDescriptorSetSampledImages = max_descriptor_set_size;
902    props->maxDescriptorSetStorageImages = max_descriptor_set_size;
903    props->maxDescriptorSetInputAttachments = MAX_RTS;
904    props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
905    props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
906    props->maxVertexInputAttributeOffset = 4095;
907    props->maxVertexInputBindingStride = 2048;
908    props->maxVertexOutputComponents = 128;
909    props->maxTessellationGenerationLevel = 64;
910    props->maxTessellationPatchSize = 32;
911    props->maxTessellationControlPerVertexInputComponents = 128;
912    props->maxTessellationControlPerVertexOutputComponents = 128;
913    props->maxTessellationControlPerPatchOutputComponents = 120;
914    props->maxTessellationControlTotalOutputComponents = 4096;
915    props->maxTessellationEvaluationInputComponents = 128;
916    props->maxTessellationEvaluationOutputComponents = 128;
917    props->maxGeometryShaderInvocations = 32;
918    props->maxGeometryInputComponents = 64;
919    props->maxGeometryOutputComponents = 128;
920    props->maxGeometryOutputVertices = 256;
921    props->maxGeometryTotalOutputComponents = 1024;
922    props->maxFragmentInputComponents = 124;
923    props->maxFragmentOutputAttachments = 8;
924    props->maxFragmentDualSrcAttachments = 1;
925    props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
926    props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
927    props->maxComputeWorkGroupCount[0] =
928       props->maxComputeWorkGroupCount[1] =
929       props->maxComputeWorkGroupCount[2] = 65535;
930    props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
931       pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
932       pdevice->info->threadsize_base * pdevice->info->max_waves;
933    props->maxComputeWorkGroupSize[0] =
934       props->maxComputeWorkGroupSize[1] =
935       props->maxComputeWorkGroupSize[2] = 1024;
936    props->subPixelPrecisionBits = 8;
937    props->subTexelPrecisionBits = 8;
938    props->mipmapPrecisionBits = 8;
939    props->maxDrawIndexedIndexValue = UINT32_MAX;
940    props->maxDrawIndirectCount = UINT32_MAX;
941    props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
942    props->maxSamplerAnisotropy = 16;
943    props->maxViewports =
944          (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
945    props->maxViewportDimensions[0] =
946       props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
947    props->viewportBoundsRange[0] = INT16_MIN;
948    props->viewportBoundsRange[1] = INT16_MAX;
949    props->viewportSubPixelBits = 8;
950    props->minMemoryMapAlignment = 4096; /* A page */
951    props->minTexelBufferOffsetAlignment = 64;
952    props->minUniformBufferOffsetAlignment = 64;
953    props->minStorageBufferOffsetAlignment = 4;
954    props->minTexelOffset = -16;
955    props->maxTexelOffset = 15;
956    props->minTexelGatherOffset = -32;
957    props->maxTexelGatherOffset = 31;
958    props->minInterpolationOffset = -0.5;
959    props->maxInterpolationOffset = 0.4375;
960    props->subPixelInterpolationOffsetBits = 4;
961    props->maxFramebufferWidth = (1 << 14);
962    props->maxFramebufferHeight = (1 << 14);
963    props->maxFramebufferLayers = (1 << 10);
964    props->framebufferColorSampleCounts = sample_counts;
965    props->framebufferDepthSampleCounts = sample_counts;
966    props->framebufferStencilSampleCounts = sample_counts;
967    props->framebufferNoAttachmentsSampleCounts = sample_counts;
968    props->maxColorAttachments = MAX_RTS;
969    props->sampledImageColorSampleCounts = sample_counts;
970    props->sampledImageIntegerSampleCounts = sample_counts;
971    props->sampledImageDepthSampleCounts = sample_counts;
972    props->sampledImageStencilSampleCounts = sample_counts;
973    props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
974    props->maxSampleMaskWords = 1;
975    props->timestampComputeAndGraphics = true;
976    props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
977    props->maxClipDistances = 8;
978    props->maxCullDistances = 8;
979    props->maxCombinedClipAndCullDistances = 8;
980    props->discreteQueuePriorities = 2;
981    props->pointSizeRange[0] = 1;
982    props->pointSizeRange[1] = 4092;
983    props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
984    props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
985    props->pointSizeGranularity = 	0.0625;
986    props->lineWidthGranularity =
987       pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
988    props->strictLines = true;
989    props->standardSampleLocations = true;
990    props->optimalBufferCopyOffsetAlignment = 128;
991    props->optimalBufferCopyRowPitchAlignment = 128;
992    props->nonCoherentAtomSize = 64;
993 
994    props->apiVersion =
995       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
996          TU_API_VERSION : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
997    props->driverVersion = vk_get_driver_version();
998    props->vendorID = 0x5143;
999    props->deviceID = pdevice->dev_id.chip_id;
1000    props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1001 
1002    /* sparse properties */
1003    props->sparseResidencyStandard2DBlockShape = { 0 };
1004    props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1005    props->sparseResidencyStandard3DBlockShape = { 0 };
1006    props->sparseResidencyAlignedMipSize = { 0 };
1007    props->sparseResidencyNonResidentStrict = { 0 };
1008 
1009    strcpy(props->deviceName, pdevice->name);
1010    memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1011 
1012    tu_get_physical_device_properties_1_1(pdevice, props);
1013    tu_get_physical_device_properties_1_2(pdevice, props);
1014    tu_get_physical_device_properties_1_3(pdevice, props);
1015 
1016    /* VK_KHR_push_descriptor */
1017    props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1018 
1019    /* VK_EXT_transform_feedback */
1020    props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1021    props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1022    props->maxTransformFeedbackBufferSize = UINT32_MAX;
1023    props->maxTransformFeedbackStreamDataSize = 512;
1024    props->maxTransformFeedbackBufferDataSize = 512;
1025    props->maxTransformFeedbackBufferDataStride = 512;
1026    props->transformFeedbackQueries = true;
1027    props->transformFeedbackStreamsLinesTriangles = true;
1028    props->transformFeedbackRasterizationStreamSelect = true;
1029    props->transformFeedbackDraw = true;
1030 
1031    /* VK_EXT_sample_locations */
1032    props->sampleLocationSampleCounts =
1033       pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1034    props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1035    props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1036    props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1037    props->sampleLocationSubPixelBits = 4;
1038    props->variableSampleLocations = true;
1039 
1040    /* VK_KHR_vertex_attribute_divisor */
1041    props->maxVertexAttribDivisor = UINT32_MAX;
1042    props->supportsNonZeroFirstInstance = true;
1043 
1044    /* VK_EXT_custom_border_color */
1045    props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1046 
1047    /* VK_KHR_performance_query */
1048    props->allowCommandBufferQueryCopies = false;
1049 
1050    /* VK_EXT_robustness2 */
1051    /* see write_buffer_descriptor() */
1052    props->robustStorageBufferAccessSizeAlignment = 4;
1053    /* see write_ubo_descriptor() */
1054    props->robustUniformBufferAccessSizeAlignment = 16;
1055 
1056    /* VK_EXT_provoking_vertex */
1057    props->provokingVertexModePerPipeline = true;
1058    props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1059 
1060    /* VK_KHR_line_rasterization */
1061    props->lineSubPixelPrecisionBits = 8;
1062 
1063    /* VK_EXT_physical_device_drm */
1064    props->drmHasPrimary = pdevice->has_master;
1065    props->drmPrimaryMajor = pdevice->master_major;
1066    props->drmPrimaryMinor = pdevice->master_minor;
1067 
1068    props->drmHasRender = pdevice->has_local;
1069    props->drmRenderMajor = pdevice->local_major;
1070    props->drmRenderMinor = pdevice->local_minor;
1071 
1072    /* VK_EXT_shader_module_identifier */
1073    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1074                  sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1075    memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1076           vk_shaderModuleIdentifierAlgorithmUUID,
1077           sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1078 
1079    /* VK_EXT_map_memory_placed */
1080    os_get_page_size(&os_page_size);
1081    props->minPlacedMemoryMapAlignment = os_page_size;
1082 
1083    /* VK_EXT_multi_draw */
1084    props->maxMultiDrawCount = 2048;
1085 
1086    /* VK_EXT_nested_command_buffer */
1087    props->maxCommandBufferNestingLevel = UINT32_MAX;
1088 
1089    /* VK_EXT_graphics_pipeline_library */
1090    props->graphicsPipelineLibraryFastLinking = true;
1091    props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1092 
1093    /* VK_EXT_extended_dynamic_state3 */
1094    props->dynamicPrimitiveTopologyUnrestricted = true;
1095 
1096    /* VK_EXT_descriptor_buffer */
1097    props->combinedImageSamplerDescriptorSingleArray = true;
1098    props->bufferlessPushDescriptors = true;
1099    props->allowSamplerImageViewPostSubmitCreation = true;
1100    props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1101    props->maxDescriptorBufferBindings = pdevice->usable_sets;
1102    props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1103    props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1104    props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1105    props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1106    props->bufferCaptureReplayDescriptorDataSize = 0;
1107    props->imageCaptureReplayDescriptorDataSize = 0;
1108    props->imageViewCaptureReplayDescriptorDataSize = 0;
1109    props->samplerCaptureReplayDescriptorDataSize = 0;
1110    props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1111    /* Note: these sizes must match descriptor_size() */
1112    props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1113    props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1114    props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1115    props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1116    props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1117    props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1118    props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1119    props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1120    props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1121    props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1122    props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1123       COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1124       COND(pdevice->info->a7xx.storage_8bit, 1));
1125    props->robustStorageBufferDescriptorSize =
1126       props->storageBufferDescriptorSize;
1127    props->inputAttachmentDescriptorSize = TU_DEBUG(DYNAMIC) ?
1128       A6XX_TEX_CONST_DWORDS * 4 : 0;
1129    props->maxSamplerDescriptorBufferRange = ~0ull;
1130    props->maxResourceDescriptorBufferRange = ~0ull;
1131    props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1132    props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1133    props->descriptorBufferAddressSpaceSize = ~0ull;
1134    props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1135 
1136    /* VK_EXT_legacy_vertex_attributes */
1137    props->nativeUnalignedPerformance = true;
1138 
1139    /* VK_EXT_fragment_density_map*/
1140    props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1141    props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1142    props->fragmentDensityInvocations = false;
1143 
1144    /* VK_KHR_maintenance5 */
1145    props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1146    props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1147    props->depthStencilSwizzleOneSupport = true;
1148    props->polygonModePointSize = true;
1149    props->nonStrictWideLinesUseParallelogram = false;
1150    props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1151 
1152    /* VK_KHR_maintenance6 */
1153    props->blockTexelViewCompatibleMultipleLayers = true;
1154    props->maxCombinedImageSamplerDescriptorCount = 1;
1155    props->fragmentShadingRateClampCombinerInputs = false; /* TODO */
1156 }
1157 
1158 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1159    &tu_shader_ops,
1160    &tu_nir_shaders_ops,
1161    NULL,
1162 };
1163 
1164 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1165 tu_physical_device_init(struct tu_physical_device *device,
1166                         struct tu_instance *instance)
1167 {
1168    VkResult result = VK_SUCCESS;
1169 
1170    const char *fd_name = fd_dev_name(&device->dev_id);
1171    if (!fd_name) {
1172       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1173                                "device (chip_id = %" PRIX64
1174                                ", gpu_id = %u) is unsupported",
1175                                device->dev_id.chip_id, device->dev_id.gpu_id);
1176    }
1177 
1178    if (strncmp(fd_name, "FD", 2) == 0) {
1179       device->name = vk_asprintf(&instance->vk.alloc,
1180                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1181                                  "Turnip Adreno (TM) %s", &fd_name[2]);
1182    } else {
1183       device->name = vk_strdup(&instance->vk.alloc, fd_name,
1184                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1185 
1186    }
1187    if (!device->name) {
1188       return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1189                                "device name alloc fail");
1190    }
1191 
1192    const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1193    if (!info.chip) {
1194       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1195                                  "device %s is unsupported", device->name);
1196       goto fail_free_name;
1197    }
1198    switch (fd_dev_gen(&device->dev_id)) {
1199    case 6:
1200    case 7: {
1201       device->dev_info = info;
1202       device->info = &device->dev_info;
1203       uint32_t depth_cache_size =
1204          device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1205       uint32_t color_cache_size =
1206          (device->info->num_ccu *
1207           device->info->a6xx.sysmem_per_ccu_color_cache_size);
1208       uint32_t color_cache_size_gmem =
1209          color_cache_size /
1210          (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1211 
1212       device->ccu_depth_offset_bypass = 0;
1213       device->ccu_offset_bypass =
1214          device->ccu_depth_offset_bypass + depth_cache_size;
1215 
1216       if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1217          device->vpc_attr_buf_size_bypass =
1218             device->info->a7xx.sysmem_vpc_attr_buf_size;
1219          device->vpc_attr_buf_offset_bypass =
1220             device->ccu_offset_bypass + color_cache_size;
1221 
1222          device->vpc_attr_buf_size_gmem =
1223             device->info->a7xx.gmem_vpc_attr_buf_size;
1224          device->vpc_attr_buf_offset_gmem =
1225             device->gmem_size -
1226             (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1227 
1228          device->ccu_offset_gmem =
1229             device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1230 
1231          device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1232       } else {
1233          device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1234          device->usable_gmem_size_gmem = device->gmem_size;
1235       }
1236 
1237       if (instance->reserve_descriptor_set) {
1238          device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1239       } else {
1240          device->usable_sets = device->info->a6xx.max_sets;
1241          device->reserved_set_idx = -1;
1242       }
1243       break;
1244    }
1245    default:
1246       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1247                                  "device %s is unsupported", device->name);
1248       goto fail_free_name;
1249    }
1250    if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1251       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1252                                  "cannot generate UUID");
1253       goto fail_free_name;
1254    }
1255 
1256    device->level1_dcache_size = tu_get_l1_dcache_size();
1257    device->has_cached_non_coherent_memory =
1258       device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1259 
1260    device->memory.type_count = 1;
1261    device->memory.types[0] =
1262       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1263       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1264       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1265 
1266    if (device->has_cached_coherent_memory) {
1267       device->memory.types[device->memory.type_count] =
1268          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1269          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1270          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1271          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1272       device->memory.type_count++;
1273    }
1274 
1275    if (device->has_cached_non_coherent_memory) {
1276       device->memory.types[device->memory.type_count] =
1277          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1278          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1279          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1280       device->memory.type_count++;
1281    }
1282 
1283    fd_get_driver_uuid(device->driver_uuid);
1284    fd_get_device_uuid(device->device_uuid, &device->dev_id);
1285 
1286    struct vk_physical_device_dispatch_table dispatch_table;
1287    vk_physical_device_dispatch_table_from_entrypoints(
1288       &dispatch_table, &tu_physical_device_entrypoints, true);
1289    vk_physical_device_dispatch_table_from_entrypoints(
1290       &dispatch_table, &wsi_physical_device_entrypoints, false);
1291 
1292    result = vk_physical_device_init(&device->vk, &instance->vk,
1293                                     NULL, NULL, NULL, /* We set up extensions later */
1294                                     &dispatch_table);
1295    if (result != VK_SUCCESS)
1296       goto fail_free_name;
1297 
1298    get_device_extensions(device, &device->vk.supported_extensions);
1299    tu_get_features(device, &device->vk.supported_features);
1300    tu_get_properties(device, &device->vk.properties);
1301 
1302    device->vk.supported_sync_types = device->sync_types;
1303 
1304 #ifdef TU_USE_WSI_PLATFORM
1305    result = tu_wsi_init(device);
1306    if (result != VK_SUCCESS) {
1307       vk_startup_errorf(instance, result, "WSI init failure");
1308       vk_physical_device_finish(&device->vk);
1309       goto fail_free_name;
1310    }
1311 #endif
1312 
1313    /* The gpu id is already embedded in the uuid so we just pass "tu"
1314     * when creating the cache.
1315     */
1316    char buf[VK_UUID_SIZE * 2 + 1];
1317    mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1318    device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1319 
1320    device->vk.pipeline_cache_import_ops = cache_import_ops;
1321 
1322    return VK_SUCCESS;
1323 
1324 fail_free_name:
1325    vk_free(&instance->vk.alloc, (void *)device->name);
1326    return result;
1327 }
1328 
1329 static void
tu_physical_device_finish(struct tu_physical_device * device)1330 tu_physical_device_finish(struct tu_physical_device *device)
1331 {
1332 #ifdef TU_USE_WSI_PLATFORM
1333    tu_wsi_finish(device);
1334 #endif
1335 
1336    close(device->local_fd);
1337    if (device->master_fd != -1)
1338       close(device->master_fd);
1339 
1340    if (device->kgsl_dma_fd != -1)
1341       close(device->kgsl_dma_fd);
1342 
1343    disk_cache_destroy(device->vk.disk_cache);
1344    vk_free(&device->instance->vk.alloc, (void *)device->name);
1345 
1346    vk_physical_device_finish(&device->vk);
1347 }
1348 
1349 static void
tu_destroy_physical_device(struct vk_physical_device * device)1350 tu_destroy_physical_device(struct vk_physical_device *device)
1351 {
1352    tu_physical_device_finish((struct tu_physical_device *) device);
1353    vk_free(&device->instance->alloc, device);
1354 }
1355 
1356 static const driOptionDescription tu_dri_options[] = {
1357    DRI_CONF_SECTION_PERFORMANCE
1358       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1359       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1360       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1361       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1362       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1363    DRI_CONF_SECTION_END
1364 
1365    DRI_CONF_SECTION_DEBUG
1366       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1367       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1368       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1369       DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1370    DRI_CONF_SECTION_END
1371 
1372    DRI_CONF_SECTION_MISCELLANEOUS
1373       DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1374       DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1375       DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1376       DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1377    DRI_CONF_SECTION_END
1378 };
1379 
1380 static void
tu_init_dri_options(struct tu_instance * instance)1381 tu_init_dri_options(struct tu_instance *instance)
1382 {
1383    driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1384                       ARRAY_SIZE(tu_dri_options));
1385    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1386                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1387                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1388 
1389    instance->dont_care_as_load =
1390          driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1391    instance->conservative_lrz =
1392          !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1393    instance->reserve_descriptor_set =
1394          !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1395    instance->allow_oob_indirect_ubo_loads =
1396          driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1397    instance->disable_d24s8_border_color_workaround =
1398          driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1399 }
1400 
1401 static uint32_t instance_count = 0;
1402 
1403 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1404 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1405                   const VkAllocationCallbacks *pAllocator,
1406                   VkInstance *pInstance)
1407 {
1408    struct tu_instance *instance;
1409    VkResult result;
1410 
1411    tu_env_init();
1412 
1413    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1414 
1415    if (pAllocator == NULL)
1416       pAllocator = vk_default_allocator();
1417 
1418    instance = (struct tu_instance *) vk_zalloc(
1419       pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1420 
1421    if (!instance)
1422       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1423 
1424    struct vk_instance_dispatch_table dispatch_table;
1425    vk_instance_dispatch_table_from_entrypoints(
1426       &dispatch_table, &tu_instance_entrypoints, true);
1427    vk_instance_dispatch_table_from_entrypoints(
1428       &dispatch_table, &wsi_instance_entrypoints, false);
1429 
1430    result = vk_instance_init(&instance->vk,
1431                              &tu_instance_extensions_supported,
1432                              &dispatch_table,
1433                              pCreateInfo, pAllocator);
1434    if (result != VK_SUCCESS) {
1435       vk_free(pAllocator, instance);
1436       return vk_error(NULL, result);
1437    }
1438 
1439    instance->vk.physical_devices.try_create_for_drm =
1440       tu_physical_device_try_create;
1441    instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1442    instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1443 
1444    instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1445    if (TU_DEBUG(STARTUP))
1446       mesa_logi("Created an instance");
1447 
1448    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1449 
1450    tu_init_dri_options(instance);
1451 
1452    *pInstance = tu_instance_to_handle(instance);
1453 
1454 #ifdef HAVE_PERFETTO
1455    tu_perfetto_init();
1456 #endif
1457 
1458    util_gpuvis_init();
1459 
1460    return VK_SUCCESS;
1461 }
1462 
1463 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1464 tu_DestroyInstance(VkInstance _instance,
1465                    const VkAllocationCallbacks *pAllocator)
1466 {
1467    VK_FROM_HANDLE(tu_instance, instance, _instance);
1468 
1469    if (!instance)
1470       return;
1471 
1472    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1473 
1474    driDestroyOptionCache(&instance->dri_options);
1475    driDestroyOptionInfo(&instance->available_dri_options);
1476 
1477    vk_instance_finish(&instance->vk);
1478    vk_free(&instance->vk.alloc, instance);
1479 }
1480 
1481 static const VkQueueFamilyProperties tu_queue_family_properties = {
1482    .queueFlags =
1483       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1484    .queueCount = 1,
1485    .timestampValidBits = 48,
1486    .minImageTransferGranularity = { 1, 1, 1 },
1487 };
1488 
1489 static void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1490 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1491                                                   VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1492 {
1493    props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1494    switch (props->priorityCount) {
1495    case 1:
1496       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1497       break;
1498    case 2:
1499       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1500       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1501       break;
1502    case 3:
1503       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1504       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1505       props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1506       break;
1507    default:
1508       unreachable("unexpected priority count");
1509       break;
1510    }
1511 }
1512 
1513 static int
tu_physical_device_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)1514 tu_physical_device_get_submitqueue_priority(const struct tu_physical_device *pdevice,
1515                                             VkQueueGlobalPriorityKHR global_priority,
1516                                             bool global_priority_query)
1517 {
1518    if (global_priority_query) {
1519       VkQueueFamilyGlobalPriorityPropertiesKHR props;
1520       tu_physical_device_get_global_priority_properties(pdevice, &props);
1521 
1522       bool valid = false;
1523       for (uint32_t i = 0; i < props.priorityCount; i++) {
1524          if (props.priorities[i] == global_priority) {
1525             valid = true;
1526             break;
1527          }
1528       }
1529 
1530       if (!valid)
1531          return -1;
1532    }
1533 
1534    /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
1535     * with 0 being the highest priority.  This matches what freedreno does.
1536     */
1537    int priority;
1538    if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1539       priority = pdevice->submitqueue_priority_count / 2;
1540    else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1541       priority = pdevice->submitqueue_priority_count - 1;
1542    else
1543       priority = 0;
1544 
1545    return priority;
1546 }
1547 
1548 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1549 tu_GetPhysicalDeviceQueueFamilyProperties2(
1550    VkPhysicalDevice physicalDevice,
1551    uint32_t *pQueueFamilyPropertyCount,
1552    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1553 {
1554    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1555 
1556    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1557                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
1558 
1559    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1560    {
1561       p->queueFamilyProperties = tu_queue_family_properties;
1562 
1563       vk_foreach_struct(ext, p->pNext) {
1564          switch (ext->sType) {
1565          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1566             VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1567                (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1568             tu_physical_device_get_global_priority_properties(pdevice, props);
1569             break;
1570          }
1571          default:
1572             break;
1573          }
1574       }
1575    }
1576 }
1577 
1578 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1579 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1580 {
1581    uint64_t total_ram = 0;
1582    ASSERTED bool has_physical_memory =
1583       os_get_total_physical_memory(&total_ram);
1584    assert(has_physical_memory);
1585 
1586    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1587     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1588     */
1589    uint64_t available_ram;
1590    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1591       available_ram = total_ram / 2;
1592    else
1593       available_ram = total_ram * 3 / 4;
1594 
1595    if (physical_device->va_size)
1596       available_ram = MIN2(available_ram, physical_device->va_size);
1597 
1598    return available_ram;
1599 }
1600 
1601 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1602 tu_get_budget_memory(struct tu_physical_device *physical_device)
1603 {
1604    uint64_t heap_size = physical_device->heap.size;
1605    uint64_t heap_used = physical_device->heap.used;
1606    uint64_t sys_available;
1607    ASSERTED bool has_available_memory =
1608       os_get_available_system_memory(&sys_available);
1609    assert(has_available_memory);
1610 
1611    if (physical_device->va_size)
1612       sys_available = MIN2(sys_available, physical_device->va_size);
1613 
1614    /*
1615     * Let's not incite the app to starve the system: report at most 90% of
1616     * available system memory.
1617     */
1618    uint64_t heap_available = sys_available * 9 / 10;
1619    return MIN2(heap_size, heap_used + heap_available);
1620 }
1621 
1622 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1623 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1624                                       VkPhysicalDeviceMemoryProperties2 *props2)
1625 {
1626    VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1627 
1628    VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1629    props->memoryHeapCount = 1;
1630    props->memoryHeaps[0].size = physical_device->heap.size;
1631    props->memoryHeaps[0].flags = physical_device->heap.flags;
1632 
1633    props->memoryTypeCount = physical_device->memory.type_count;
1634    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1635       props->memoryTypes[i] = (VkMemoryType) {
1636          .propertyFlags = physical_device->memory.types[i],
1637          .heapIndex     = 0,
1638       };
1639    }
1640 
1641    vk_foreach_struct(ext, props2->pNext)
1642    {
1643       switch (ext->sType) {
1644       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1645          VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1646             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1647          memory_budget_props->heapUsage[0] = physical_device->heap.used;
1648          memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1649 
1650          /* The heapBudget and heapUsage values must be zero for array elements
1651           * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1652           */
1653          for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1654             memory_budget_props->heapBudget[i] = 0u;
1655             memory_budget_props->heapUsage[i] = 0u;
1656          }
1657          break;
1658       }
1659       default:
1660          break;
1661       }
1662    }
1663 }
1664 
1665 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,bool global_priority_query)1666 tu_queue_init(struct tu_device *device,
1667               struct tu_queue *queue,
1668               int idx,
1669               const VkDeviceQueueCreateInfo *create_info,
1670               bool global_priority_query)
1671 {
1672    const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
1673       vk_find_struct_const(create_info->pNext,
1674             DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
1675    const enum VkQueueGlobalPriorityKHR global_priority = priority_info ?
1676       priority_info->globalPriority : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1677 
1678    const int priority = tu_physical_device_get_submitqueue_priority(
1679          device->physical_device, global_priority, global_priority_query);
1680    if (priority < 0) {
1681       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1682                                "invalid global priority");
1683    }
1684 
1685    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1686    if (result != VK_SUCCESS)
1687       return result;
1688 
1689    queue->device = device;
1690    queue->priority = priority;
1691    queue->vk.driver_submit = tu_queue_submit;
1692 
1693    int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1694    if (ret)
1695       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1696                                "submitqueue create failed");
1697 
1698    queue->fence = -1;
1699 
1700    return VK_SUCCESS;
1701 }
1702 
1703 static void
tu_queue_finish(struct tu_queue * queue)1704 tu_queue_finish(struct tu_queue *queue)
1705 {
1706    vk_queue_finish(&queue->vk);
1707    tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1708 }
1709 
1710 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1711 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1712 {
1713    /* This is based on the 19.2MHz always-on rbbm timer.
1714     *
1715     * TODO we should probably query this value from kernel..
1716     */
1717    return ts * (1000000000 / 19200000);
1718 }
1719 
1720 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1721 tu_device_get_u_trace(struct tu_device *device)
1722 {
1723    return &device->trace_context;
1724 }
1725 
1726 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1727 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1728 {
1729    struct tu_device *device =
1730       container_of(utctx, struct tu_device, trace_context);
1731 
1732    struct tu_bo *bo;
1733    tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1734    tu_bo_map(device, bo, NULL);
1735 
1736    return bo;
1737 }
1738 
1739 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1740 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1741 {
1742    struct tu_device *device =
1743       container_of(utctx, struct tu_device, trace_context);
1744    struct tu_bo *bo = (struct tu_bo *) timestamps;
1745 
1746    tu_bo_finish(device, bo);
1747 }
1748 
1749 template <chip CHIP>
1750 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1751 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1752                    uint64_t offset_B, uint32_t)
1753 {
1754    struct tu_bo *bo = (struct tu_bo *) timestamps;
1755    struct tu_cs *ts_cs = (struct tu_cs *) cs;
1756 
1757    if (CHIP == A6XX) {
1758       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1759       tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1760                            CP_EVENT_WRITE_0_TIMESTAMP);
1761       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1762       tu_cs_emit(ts_cs, 0x00000000);
1763    } else {
1764       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1765       tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1766                                           .write_src = EV_WRITE_ALWAYSON,
1767                                           .write_dst = EV_DST_RAM,
1768                                           .write_enabled = true)
1769                            .value);
1770       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1771    }
1772 }
1773 
1774 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1775 tu_trace_read_ts(struct u_trace_context *utctx,
1776                  void *timestamps, uint64_t offset_B, void *flush_data)
1777 {
1778    struct tu_device *device =
1779       container_of(utctx, struct tu_device, trace_context);
1780    struct tu_bo *bo = (struct tu_bo *) timestamps;
1781    struct tu_u_trace_submission_data *submission_data =
1782       (struct tu_u_trace_submission_data *) flush_data;
1783 
1784    /* Only need to stall on results for the first entry: */
1785    if (offset_B == 0) {
1786       tu_device_wait_u_trace(device, submission_data->syncobj);
1787    }
1788 
1789    if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1790       return U_TRACE_NO_TIMESTAMP;
1791    }
1792 
1793    uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1794 
1795    /* Don't translate the no-timestamp marker: */
1796    if (*ts == U_TRACE_NO_TIMESTAMP)
1797       return U_TRACE_NO_TIMESTAMP;
1798 
1799    return tu_device_ticks_to_ns(device, *ts);
1800 }
1801 
1802 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1803 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1804 {
1805    struct tu_device *device =
1806       container_of(utctx, struct tu_device, trace_context);
1807    struct tu_u_trace_submission_data *submission_data =
1808       (struct tu_u_trace_submission_data *) flush_data;
1809 
1810    tu_u_trace_submission_data_finish(device, submission_data);
1811 }
1812 
1813 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1814 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1815                void *ts_from, uint64_t from_offset_B,
1816                void *ts_to, uint64_t to_offset_B,
1817                uint64_t size_B)
1818 {
1819    struct tu_cs *cs = (struct tu_cs *) cmdstream;
1820    struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1821    struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1822 
1823    tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1824    tu_cs_emit(cs, size_B / sizeof(uint32_t));
1825    tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
1826    tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
1827 }
1828 
1829 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)1830 tu_trace_capture_data(struct u_trace *ut,
1831                         void *cs,
1832                         void *dst_buffer,
1833                         uint64_t dst_offset_B,
1834                         void *src_buffer,
1835                         uint64_t src_offset_B,
1836                         uint32_t size_B)
1837 {
1838    if (src_buffer)
1839       tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
1840                      dst_offset_B, size_B);
1841 }
1842 
1843 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)1844 tu_trace_get_data(struct u_trace_context *utctx,
1845                   void *buffer,
1846                   uint64_t offset_B,
1847                   uint32_t size_B)
1848 {
1849    struct tu_bo *bo = (struct tu_bo *) buffer;
1850    return (char *) bo->map + offset_B;
1851 }
1852 
1853 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1854  * that ignore tracepoints at the beginning/end that are part of a
1855  * suspend/resume chain.
1856  */
1857 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1858 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1859 {
1860    switch (cmdbuf->state.suspend_resume) {
1861    case SR_IN_PRE_CHAIN:
1862       return cmdbuf->trace_renderpass_end;
1863    case SR_AFTER_PRE_CHAIN:
1864    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1865       return cmdbuf->pre_chain.trace_renderpass_end;
1866    default:
1867       return u_trace_begin_iterator(&cmdbuf->trace);
1868    }
1869 }
1870 
1871 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1872 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1873 {
1874    switch (cmdbuf->state.suspend_resume) {
1875    case SR_IN_PRE_CHAIN:
1876       return cmdbuf->trace_renderpass_end;
1877    case SR_IN_CHAIN:
1878    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1879       return cmdbuf->trace_renderpass_start;
1880    default:
1881       return u_trace_end_iterator(&cmdbuf->trace);
1882    }
1883 }
1884 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1885 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1886                             struct u_trace **trace_copy)
1887 {
1888    *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
1889                                     sizeof(struct tu_cs), 8,
1890                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1891 
1892    if (*cs == NULL) {
1893       return VK_ERROR_OUT_OF_HOST_MEMORY;
1894    }
1895 
1896    tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1897               list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
1898 
1899    tu_cs_begin(*cs);
1900 
1901    tu_cs_emit_wfi(*cs);
1902    tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1903 
1904    *trace_copy = (struct u_trace *) vk_zalloc(
1905       &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1906       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1907 
1908    if (*trace_copy == NULL) {
1909       return VK_ERROR_OUT_OF_HOST_MEMORY;
1910    }
1911 
1912    u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1913    u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1914                         tu_cmd_end_iterator(cmdbuf),
1915                         *trace_copy, *cs,
1916                         tu_copy_buffer);
1917 
1918    tu_cs_emit_wfi(*cs);
1919 
1920    tu_cs_end(*cs);
1921 
1922    return VK_SUCCESS;
1923 }
1924 
1925 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1926 tu_u_trace_submission_data_create(
1927    struct tu_device *device,
1928    struct tu_cmd_buffer **cmd_buffers,
1929    uint32_t cmd_buffer_count,
1930    struct tu_u_trace_submission_data **submission_data)
1931 {
1932    *submission_data = (struct tu_u_trace_submission_data *)
1933       vk_zalloc(&device->vk.alloc,
1934                 sizeof(struct tu_u_trace_submission_data), 8,
1935                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1936 
1937    if (!(*submission_data)) {
1938       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1939    }
1940 
1941    struct tu_u_trace_submission_data *data = *submission_data;
1942 
1943    data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
1944       &device->vk.alloc,
1945       cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1946       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1947 
1948    if (!data->cmd_trace_data) {
1949       goto fail;
1950    }
1951 
1952    data->cmd_buffer_count = cmd_buffer_count;
1953    data->last_buffer_with_tracepoints = -1;
1954 
1955    for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1956       struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1957 
1958       if (!u_trace_has_points(&cmdbuf->trace))
1959          continue;
1960 
1961       data->last_buffer_with_tracepoints = i;
1962 
1963       if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1964          /* A single command buffer could be submitted several times, but we
1965           * already baked timestamp iova addresses and trace points are
1966           * single-use. Therefor we have to copy trace points and create
1967           * a new timestamp buffer on every submit of reusable command buffer.
1968           */
1969          if (tu_create_copy_timestamp_cs(cmdbuf,
1970                &data->cmd_trace_data[i].timestamp_copy_cs,
1971                &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1972             goto fail;
1973          }
1974 
1975          assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1976       } else {
1977          data->cmd_trace_data[i].trace = &cmdbuf->trace;
1978       }
1979    }
1980 
1981    assert(data->last_buffer_with_tracepoints != -1);
1982 
1983    return VK_SUCCESS;
1984 
1985 fail:
1986    tu_u_trace_submission_data_finish(device, data);
1987    *submission_data = NULL;
1988 
1989    return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1990 }
1991 
1992 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1993 tu_u_trace_submission_data_finish(
1994    struct tu_device *device,
1995    struct tu_u_trace_submission_data *submission_data)
1996 {
1997    for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1998       /* Only if we had to create a copy of trace we should free it */
1999       struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2000       if (cmd_data->timestamp_copy_cs) {
2001          tu_cs_finish(cmd_data->timestamp_copy_cs);
2002          vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2003 
2004          u_trace_fini(cmd_data->trace);
2005          vk_free(&device->vk.alloc, cmd_data->trace);
2006       }
2007    }
2008 
2009    if (submission_data->kgsl_timestamp_bo.bo) {
2010       mtx_lock(&device->kgsl_profiling_mutex);
2011       tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2012                         &submission_data->kgsl_timestamp_bo);
2013       mtx_unlock(&device->kgsl_profiling_mutex);
2014    }
2015 
2016    vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2017    vk_free(&device->vk.alloc, submission_data->syncobj);
2018    vk_free(&device->vk.alloc, submission_data);
2019 }
2020 
2021 enum tu_reg_stomper_flags
2022 {
2023    TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2024    TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2025    TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2026 };
2027 
2028 /* See freedreno.rst for usage tips */
2029 static const struct debug_named_value tu_reg_stomper_options[] = {
2030    { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2031      "By default the range specifies the regs to stomp, with 'inverse' it "
2032      "specifies the regs NOT to stomp" },
2033    { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2034      "Stomp regs at the start of a cmdbuf" },
2035    { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2036      "Stomp regs before a renderpass" },
2037    { NULL, 0 }
2038 };
2039 
2040 template <chip CHIP>
2041 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2042 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2043                      bool is_rp_blit,
2044                      uint32_t first_reg,
2045                      uint32_t last_reg,
2046                      bool inverse)
2047 {
2048    const uint16_t *regs = NULL;
2049    size_t count = 0;
2050 
2051    if (is_rp_blit) {
2052       regs = &RP_BLIT_REGS<CHIP>[0];
2053       count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2054    } else {
2055       regs = &CMD_REGS<CHIP>[0];
2056       count = ARRAY_SIZE(CMD_REGS<CHIP>);
2057    }
2058 
2059    for (size_t i = 0; i < count; i++) {
2060       if (inverse) {
2061          if (regs[i] >= first_reg && regs[i] <= last_reg)
2062             continue;
2063       } else {
2064          if (regs[i] < first_reg || regs[i] > last_reg)
2065             continue;
2066       }
2067 
2068       if (fd_reg_stomp_allowed(CHIP, regs[i]))
2069          tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2070    }
2071 }
2072 
2073 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2074 tu_init_dbg_reg_stomper(struct tu_device *device)
2075 {
2076    const char *stale_reg_range_str =
2077       os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2078    if (!stale_reg_range_str)
2079       return;
2080 
2081    uint32_t first_reg, last_reg;
2082 
2083    if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2084       mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2085       return;
2086    }
2087 
2088    uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2089                                                  tu_reg_stomper_options,
2090                                                  TU_DEBUG_REG_STOMP_CMDBUF);
2091 
2092    struct tu_cs *cmdbuf_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2093    tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2094               "cmdbuf reg stomp cs");
2095    tu_cs_begin(cmdbuf_cs);
2096 
2097    struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2098    tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2099    tu_cs_begin(rp_cs);
2100 
2101    bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2102    TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2103    TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2104 
2105    tu_cs_end(cmdbuf_cs);
2106    tu_cs_end(rp_cs);
2107 
2108    device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2109    device->dbg_renderpass_stomp_cs = rp_cs;
2110 }
2111 
2112 /* It is unknown what this workaround is for and what it fixes. */
2113 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2114 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2115 {
2116    struct tu_cs *cs;
2117 
2118    if (!(device->cmdbuf_start_a725_quirk_cs =
2119             (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2120       return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2121                                "OOM");
2122    }
2123 
2124    if (!(device->cmdbuf_start_a725_quirk_entry =
2125             (struct tu_cs_entry *) calloc(1, sizeof(struct tu_cs_entry)))) {
2126       free(device->cmdbuf_start_a725_quirk_cs);
2127       return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
2128                                "OOM");
2129    }
2130 
2131    cs = device->cmdbuf_start_a725_quirk_cs;
2132    tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 57, "a725 workaround cs");
2133 
2134    struct tu_cs shader_cs;
2135    tu_cs_begin_sub_stream(cs, 10, &shader_cs);
2136 
2137    uint32_t raw_shader[] = {
2138       0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2139       0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2140       0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2141       0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2142       0x00000000, 0x03000000, // end
2143    };
2144 
2145    tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2146    struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(cs, &shader_cs);
2147    uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2148 
2149    struct tu_cs sub_cs;
2150    tu_cs_begin_sub_stream(cs, 47, &sub_cs);
2151 
2152    tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2153             .vs_state = true, .hs_state = true, .ds_state = true,
2154             .gs_state = true, .fs_state = true, .gfx_ibo = true,
2155             .cs_bindless = 0xff, .gfx_bindless = 0xff));
2156    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2157             .constlen = 4,
2158             .enabled = true));
2159    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2160    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2161             .threadmode = MULTI,
2162             .threadsize = THREAD128,
2163             .mergedregs = true));
2164    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2165    tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2166                      HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2167                      HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2168    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2169    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2170    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2171    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2172             .linearlocalidregid = regid(63, 0),
2173             .threadsize = THREAD128,
2174             .workgrouprastorderzfirsten = true,
2175             .wgtilewidth = 4,
2176             .wgtileheight = 17));
2177    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2178             .wgidconstid = regid(51, 3),
2179             .wgsizeconstid = regid(48, 0),
2180             .wgoffsetconstid = regid(63, 0),
2181             .localidregid = regid(63, 0)));
2182    tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2183             .linearlocalidregid = regid(63, 0),
2184             .threadsize = THREAD128,
2185             .workitemrastorder = WORKITEMRASTORDER_TILED));
2186    tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2187 
2188    tu_cs_emit_regs(&sub_cs,
2189                   HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2190                                           .localsizex = 255,
2191                                           .localsizey = 1,
2192                                           .localsizez = 1),
2193                   HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2194                   HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2195                   HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2196                   HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2197                   HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2198                   HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2199    tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2200             .localsizex = 255,
2201             .localsizey = 0,
2202             .localsizez = 0));
2203    tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2204    tu_cs_emit(&sub_cs, 0);
2205    tu_cs_emit_qw(&sub_cs, shader_iova);
2206 
2207    tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2208    tu_cs_emit(&sub_cs, 0x00000000);
2209    tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2210    tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2211    tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2212 
2213    *device->cmdbuf_start_a725_quirk_entry = tu_cs_end_sub_stream(cs, &sub_cs);
2214 
2215    return VK_SUCCESS;
2216 }
2217 
2218 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2219 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2220                 const VkDeviceCreateInfo *pCreateInfo,
2221                 const VkAllocationCallbacks *pAllocator,
2222                 VkDevice *pDevice)
2223 {
2224    VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2225    VkResult result;
2226    struct tu_device *device;
2227    bool border_color_without_format = false;
2228 
2229    vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2230       switch (ext->sType) {
2231       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2232          border_color_without_format =
2233             ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2234                ->customBorderColorWithoutFormat;
2235          break;
2236       default:
2237          break;
2238       }
2239    }
2240 
2241    device = (struct tu_device *) vk_zalloc2(
2242       &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2243       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2244    if (!device)
2245       return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2246 
2247    struct vk_device_dispatch_table dispatch_table;
2248    bool override_initial_entrypoints = true;
2249 
2250    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2251       vk_device_dispatch_table_from_entrypoints(
2252          &dispatch_table, &tu_rmv_device_entrypoints, true);
2253       override_initial_entrypoints = false;
2254    }
2255 
2256    vk_device_dispatch_table_from_entrypoints(
2257       &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2258 
2259    switch (fd_dev_gen(&physical_device->dev_id)) {
2260    case 6:
2261       vk_device_dispatch_table_from_entrypoints(
2262          &dispatch_table, &tu_device_entrypoints_a6xx, false);
2263       break;
2264    case 7:
2265       vk_device_dispatch_table_from_entrypoints(
2266          &dispatch_table, &tu_device_entrypoints_a7xx, false);
2267    }
2268 
2269    vk_device_dispatch_table_from_entrypoints(
2270       &dispatch_table, &wsi_device_entrypoints, false);
2271 
2272    const struct vk_device_entrypoint_table *knl_device_entrypoints =
2273          physical_device->instance->knl->device_entrypoints;
2274    if (knl_device_entrypoints) {
2275       vk_device_dispatch_table_from_entrypoints(
2276          &dispatch_table, knl_device_entrypoints, false);
2277    }
2278 
2279    result = vk_device_init(&device->vk, &physical_device->vk,
2280                            &dispatch_table, pCreateInfo, pAllocator);
2281    if (result != VK_SUCCESS) {
2282       vk_free(&device->vk.alloc, device);
2283       return vk_startup_errorf(physical_device->instance, result,
2284                                "vk_device_init failed");
2285    }
2286 
2287    device->instance = physical_device->instance;
2288    device->physical_device = physical_device;
2289    device->device_idx = device->physical_device->device_count++;
2290 
2291    result = tu_drm_device_init(device);
2292    if (result != VK_SUCCESS) {
2293       vk_free(&device->vk.alloc, device);
2294       return result;
2295    }
2296 
2297    device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2298    device->vk.check_status = tu_device_check_status;
2299 
2300    mtx_init(&device->bo_mutex, mtx_plain);
2301    mtx_init(&device->pipeline_mutex, mtx_plain);
2302    mtx_init(&device->autotune_mutex, mtx_plain);
2303    mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2304    u_rwlock_init(&device->dma_bo_lock);
2305    pthread_mutex_init(&device->submit_mutex, NULL);
2306 
2307    if (physical_device->has_set_iova) {
2308       mtx_init(&device->vma_mutex, mtx_plain);
2309       util_vma_heap_init(&device->vma, physical_device->va_start,
2310                          ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2311    }
2312 
2313    if (TU_DEBUG(BOS))
2314       device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2315 
2316    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2317       tu_memory_trace_init(device);
2318 
2319    /* kgsl is not a drm device: */
2320    if (!is_kgsl(physical_device->instance))
2321       vk_device_set_drm_fd(&device->vk, device->fd);
2322 
2323    struct tu6_global *global = NULL;
2324    uint32_t global_size = sizeof(struct tu6_global);
2325    struct vk_pipeline_cache_create_info pcc_info = { };
2326 
2327    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2328       const VkDeviceQueueCreateInfo *queue_create =
2329          &pCreateInfo->pQueueCreateInfos[i];
2330       uint32_t qfi = queue_create->queueFamilyIndex;
2331       device->queues[qfi] = (struct tu_queue *) vk_alloc(
2332          &device->vk.alloc,
2333          queue_create->queueCount * sizeof(struct tu_queue), 8,
2334          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2335       if (!device->queues[qfi]) {
2336          result = vk_startup_errorf(physical_device->instance,
2337                                     VK_ERROR_OUT_OF_HOST_MEMORY,
2338                                     "OOM");
2339          goto fail_queues;
2340       }
2341 
2342       memset(device->queues[qfi], 0,
2343              queue_create->queueCount * sizeof(struct tu_queue));
2344 
2345       device->queue_count[qfi] = queue_create->queueCount;
2346 
2347       for (unsigned q = 0; q < queue_create->queueCount; q++) {
2348          result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create,
2349                                 device->vk.enabled_features.globalPriorityQuery);
2350          if (result != VK_SUCCESS) {
2351             device->queue_count[qfi] = q;
2352             goto fail_queues;
2353          }
2354       }
2355    }
2356 
2357    {
2358       struct ir3_compiler_options ir3_options = {
2359          .robust_buffer_access2 = device->vk.enabled_features.robustBufferAccess2,
2360          .push_ubo_with_preamble = true,
2361          .disable_cache = true,
2362          .bindless_fb_read_descriptor = -1,
2363          .bindless_fb_read_slot = -1,
2364          .storage_16bit = physical_device->info->a6xx.storage_16bit,
2365          .storage_8bit = physical_device->info->a7xx.storage_8bit,
2366          .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2367       };
2368       device->compiler = ir3_compiler_create(
2369          NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2370    }
2371    if (!device->compiler) {
2372       result = vk_startup_errorf(physical_device->instance,
2373                                  VK_ERROR_INITIALIZATION_FAILED,
2374                                  "failed to initialize ir3 compiler");
2375       goto fail_queues;
2376    }
2377 
2378    /* Initialize sparse array for refcounting imported BOs */
2379    util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2380 
2381    if (physical_device->has_set_iova) {
2382       STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2383       if (!u_vector_init(&device->zombie_vmas, 64,
2384                          sizeof(struct tu_zombie_vma))) {
2385          result = vk_startup_errorf(physical_device->instance,
2386                                     VK_ERROR_INITIALIZATION_FAILED,
2387                                     "zombie_vmas create failed");
2388          goto fail_free_zombie_vma;
2389       }
2390    }
2391 
2392    /* initial sizes, these will increase if there is overflow */
2393    device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2394    device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2395 
2396    if (device->vk.enabled_features.customBorderColors)
2397       global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2398 
2399    tu_bo_suballocator_init(
2400       &device->pipeline_suballoc, device, 128 * 1024,
2401       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2402                                 TU_BO_ALLOC_ALLOW_DUMP |
2403                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2404       "pipeline_suballoc");
2405    tu_bo_suballocator_init(&device->autotune_suballoc, device,
2406                            128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2407                            "autotune_suballoc");
2408    if (is_kgsl(physical_device->instance)) {
2409       tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2410                               128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2411                               "kgsl_profiling_suballoc");
2412    }
2413 
2414    result = tu_bo_init_new(
2415       device, NULL, &device->global_bo, global_size,
2416       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2417                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2418       "global");
2419    if (result != VK_SUCCESS) {
2420       vk_startup_errorf(device->instance, result, "BO init");
2421       goto fail_global_bo;
2422    }
2423 
2424    result = tu_bo_map(device, device->global_bo, NULL);
2425    if (result != VK_SUCCESS) {
2426       vk_startup_errorf(device->instance, result, "BO map");
2427       goto fail_global_bo_map;
2428    }
2429 
2430    global = (struct tu6_global *)device->global_bo->map;
2431    device->global_bo_map = global;
2432    tu_init_clear_blit_shaders(device);
2433 
2434    result = tu_init_empty_shaders(device);
2435    if (result != VK_SUCCESS) {
2436       vk_startup_errorf(device->instance, result, "empty shaders");
2437       goto fail_empty_shaders;
2438    }
2439 
2440    global->predicate = 0;
2441    global->vtx_stats_query_not_running = 1;
2442    global->dbg_one = (uint32_t)-1;
2443    global->dbg_gmem_total_loads = 0;
2444    global->dbg_gmem_taken_loads = 0;
2445    global->dbg_gmem_total_stores = 0;
2446    global->dbg_gmem_taken_stores = 0;
2447    for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2448       VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2449       tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2450                             vk_border_color_is_int((VkBorderColor) i));
2451    }
2452 
2453    /* initialize to ones so ffs can be used to find unused slots */
2454    BITSET_ONES(device->custom_border_color);
2455 
2456    result = tu_init_dynamic_rendering(device);
2457    if (result != VK_SUCCESS) {
2458       vk_startup_errorf(device->instance, result, "dynamic rendering");
2459       goto fail_dynamic_rendering;
2460    }
2461 
2462    device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2463                                                 NULL);
2464    if (!device->mem_cache) {
2465       result = VK_ERROR_OUT_OF_HOST_MEMORY;
2466       vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2467       goto fail_pipeline_cache;
2468    }
2469 
2470    if (device->vk.enabled_features.performanceCounterQueryPools) {
2471       /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2472        * from 0 to 31. One of these will be picked up at cmd submit time
2473        * when the perf query is executed.
2474        */
2475       struct tu_cs *cs;
2476 
2477       if (!(device->perfcntrs_pass_cs =
2478                (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2479          result = vk_startup_errorf(device->instance,
2480                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2481          goto fail_perfcntrs_pass_alloc;
2482       }
2483 
2484       device->perfcntrs_pass_cs_entries =
2485          (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2486       if (!device->perfcntrs_pass_cs_entries) {
2487          result = vk_startup_errorf(device->instance,
2488                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2489          goto fail_perfcntrs_pass_entries_alloc;
2490       }
2491 
2492       cs = device->perfcntrs_pass_cs;
2493       tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96, "perfcntrs cs");
2494 
2495       for (unsigned i = 0; i < 32; i++) {
2496          struct tu_cs sub_cs;
2497 
2498          result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
2499          if (result != VK_SUCCESS) {
2500             vk_startup_errorf(device->instance, result,
2501                   "failed to allocate commands streams");
2502             goto fail_prepare_perfcntrs_pass_cs;
2503          }
2504 
2505          tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2506          tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2507 
2508          device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
2509       }
2510    }
2511 
2512    if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2513          result = tu_init_cmdbuf_start_a725_quirk(device);
2514          if (result != VK_SUCCESS)
2515             goto fail_a725_workaround;
2516    }
2517 
2518    tu_init_dbg_reg_stomper(device);
2519 
2520    /* Initialize a condition variable for timeline semaphore */
2521    pthread_condattr_t condattr;
2522    if (pthread_condattr_init(&condattr) != 0) {
2523       result = vk_startup_errorf(physical_device->instance,
2524                                  VK_ERROR_INITIALIZATION_FAILED,
2525                                  "pthread condattr init");
2526       goto fail_timeline_cond;
2527    }
2528    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2529       pthread_condattr_destroy(&condattr);
2530       result = vk_startup_errorf(physical_device->instance,
2531                                  VK_ERROR_INITIALIZATION_FAILED,
2532                                  "pthread condattr clock setup");
2533       goto fail_timeline_cond;
2534    }
2535    if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2536       pthread_condattr_destroy(&condattr);
2537       result = vk_startup_errorf(physical_device->instance,
2538                                  VK_ERROR_INITIALIZATION_FAILED,
2539                                  "pthread cond init");
2540       goto fail_timeline_cond;
2541    }
2542    pthread_condattr_destroy(&condattr);
2543 
2544    result = tu_autotune_init(&device->autotune, device);
2545    if (result != VK_SUCCESS) {
2546       goto fail_timeline_cond;
2547    }
2548 
2549    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2550       mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2551 
2552    mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2553    mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2554 
2555    mtx_init(&device->mutex, mtx_plain);
2556 
2557    device->use_z24uint_s8uint =
2558       physical_device->info->a6xx.has_z24uint_s8uint &&
2559       (!border_color_without_format ||
2560        physical_device->instance->disable_d24s8_border_color_workaround);
2561    device->use_lrz = !TU_DEBUG(NOLRZ);
2562 
2563    tu_gpu_tracepoint_config_variable();
2564 
2565    device->submit_count = 0;
2566    u_trace_context_init(&device->trace_context, device,
2567                      sizeof(uint64_t),
2568                      12,
2569                      tu_trace_create_buffer,
2570                      tu_trace_destroy_buffer,
2571                      TU_CALLX(device, tu_trace_record_ts),
2572                      tu_trace_read_ts,
2573                      tu_trace_capture_data,
2574                      tu_trace_get_data,
2575                      tu_trace_delete_flush_data);
2576 
2577    tu_breadcrumbs_init(device);
2578 
2579    if (FD_RD_DUMP(ENABLE)) {
2580       struct vk_app_info *app_info = &device->instance->vk.app_info;
2581       const char *app_name_str = app_info->app_name ?
2582          app_info->app_name : util_get_process_name();
2583       const char *engine_name_str = app_info->engine_name ?
2584          app_info->engine_name : "unknown-engine";
2585 
2586       char app_name[64];
2587       snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2588 
2589       char engine_name[32];
2590       snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2591 
2592       char output_name[128];
2593       snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2594                app_name, engine_name, device->instance->instance_idx,
2595                device->device_idx);
2596 
2597       fd_rd_output_init(&device->rd_output, output_name);
2598    }
2599 
2600    *pDevice = tu_device_to_handle(device);
2601    return VK_SUCCESS;
2602 
2603 fail_timeline_cond:
2604    if (device->cmdbuf_start_a725_quirk_entry) {
2605       free(device->cmdbuf_start_a725_quirk_entry);
2606       tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2607       free(device->cmdbuf_start_a725_quirk_cs);
2608    }
2609 fail_a725_workaround:
2610 fail_prepare_perfcntrs_pass_cs:
2611    free(device->perfcntrs_pass_cs_entries);
2612    tu_cs_finish(device->perfcntrs_pass_cs);
2613 fail_perfcntrs_pass_entries_alloc:
2614    free(device->perfcntrs_pass_cs);
2615 fail_perfcntrs_pass_alloc:
2616    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2617 fail_pipeline_cache:
2618    tu_destroy_dynamic_rendering(device);
2619 fail_dynamic_rendering:
2620    tu_destroy_empty_shaders(device);
2621 fail_empty_shaders:
2622    tu_destroy_clear_blit_shaders(device);
2623 fail_global_bo_map:
2624    TU_RMV(resource_destroy, device, device->global_bo);
2625    tu_bo_finish(device, device->global_bo);
2626    vk_free(&device->vk.alloc, device->bo_list);
2627 fail_global_bo:
2628    ir3_compiler_destroy(device->compiler);
2629    util_sparse_array_finish(&device->bo_map);
2630    if (physical_device->has_set_iova)
2631       util_vma_heap_finish(&device->vma);
2632 fail_free_zombie_vma:
2633    u_vector_finish(&device->zombie_vmas);
2634 fail_queues:
2635    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2636       for (unsigned q = 0; q < device->queue_count[i]; q++)
2637          tu_queue_finish(&device->queues[i][q]);
2638       if (device->queues[i])
2639          vk_free(&device->vk.alloc, device->queues[i]);
2640    }
2641 
2642    u_rwlock_destroy(&device->dma_bo_lock);
2643    tu_drm_device_finish(device);
2644    vk_device_finish(&device->vk);
2645    vk_free(&device->vk.alloc, device);
2646    return result;
2647 }
2648 
2649 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2650 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2651 {
2652    VK_FROM_HANDLE(tu_device, device, _device);
2653 
2654    if (!device)
2655       return;
2656 
2657    tu_memory_trace_finish(device);
2658 
2659    if (FD_RD_DUMP(ENABLE))
2660       fd_rd_output_fini(&device->rd_output);
2661 
2662    tu_breadcrumbs_finish(device);
2663 
2664    u_trace_context_fini(&device->trace_context);
2665 
2666    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2667       if (device->scratch_bos[i].initialized)
2668          tu_bo_finish(device, device->scratch_bos[i].bo);
2669    }
2670 
2671    if (device->fiber_pvtmem_bo.bo)
2672       tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2673 
2674    if (device->wave_pvtmem_bo.bo)
2675       tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2676 
2677    tu_destroy_clear_blit_shaders(device);
2678 
2679    tu_destroy_empty_shaders(device);
2680 
2681    tu_destroy_dynamic_rendering(device);
2682 
2683    ir3_compiler_destroy(device->compiler);
2684 
2685    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2686 
2687    if (device->perfcntrs_pass_cs) {
2688       free(device->perfcntrs_pass_cs_entries);
2689       tu_cs_finish(device->perfcntrs_pass_cs);
2690       free(device->perfcntrs_pass_cs);
2691    }
2692 
2693    if (device->dbg_cmdbuf_stomp_cs) {
2694       tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2695       free(device->dbg_cmdbuf_stomp_cs);
2696    }
2697 
2698    if (device->dbg_renderpass_stomp_cs) {
2699       tu_cs_finish(device->dbg_renderpass_stomp_cs);
2700       free(device->dbg_renderpass_stomp_cs);
2701    }
2702 
2703    if (device->cmdbuf_start_a725_quirk_entry) {
2704       free(device->cmdbuf_start_a725_quirk_entry);
2705       tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2706       free(device->cmdbuf_start_a725_quirk_cs);
2707    }
2708 
2709    tu_autotune_fini(&device->autotune, device);
2710 
2711    tu_bo_suballocator_finish(&device->pipeline_suballoc);
2712    tu_bo_suballocator_finish(&device->autotune_suballoc);
2713    tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2714 
2715    tu_bo_finish(device, device->global_bo);
2716 
2717    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2718       for (unsigned q = 0; q < device->queue_count[i]; q++)
2719          tu_queue_finish(&device->queues[i][q]);
2720       if (device->queue_count[i])
2721          vk_free(&device->vk.alloc, device->queues[i]);
2722    }
2723 
2724    tu_drm_device_finish(device);
2725 
2726    if (device->physical_device->has_set_iova)
2727       util_vma_heap_finish(&device->vma);
2728 
2729    util_sparse_array_finish(&device->bo_map);
2730    u_rwlock_destroy(&device->dma_bo_lock);
2731 
2732    u_vector_finish(&device->zombie_vmas);
2733 
2734    pthread_cond_destroy(&device->timeline_cond);
2735    _mesa_hash_table_destroy(device->bo_sizes, NULL);
2736    vk_free(&device->vk.alloc, device->bo_list);
2737    vk_device_finish(&device->vk);
2738    vk_free(&device->vk.alloc, device);
2739 }
2740 
2741 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2742 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2743 {
2744    unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2745    unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2746    assert(index < ARRAY_SIZE(dev->scratch_bos));
2747 
2748    for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2749       if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2750          /* Fast path: just return the already-allocated BO. */
2751          *bo = dev->scratch_bos[i].bo;
2752          return VK_SUCCESS;
2753       }
2754    }
2755 
2756    /* Slow path: actually allocate the BO. We take a lock because the process
2757     * of allocating it is slow, and we don't want to block the CPU while it
2758     * finishes.
2759    */
2760    mtx_lock(&dev->scratch_bos[index].construct_mtx);
2761 
2762    /* Another thread may have allocated it already while we were waiting on
2763     * the lock. We need to check this in order to avoid double-allocating.
2764     */
2765    if (dev->scratch_bos[index].initialized) {
2766       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2767       *bo = dev->scratch_bos[index].bo;
2768       return VK_SUCCESS;
2769    }
2770 
2771    unsigned bo_size = 1ull << size_log2;
2772    VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2773                                     TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2774    if (result != VK_SUCCESS) {
2775       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2776       return result;
2777    }
2778 
2779    p_atomic_set(&dev->scratch_bos[index].initialized, true);
2780 
2781    mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2782 
2783    *bo = dev->scratch_bos[index].bo;
2784    return VK_SUCCESS;
2785 }
2786 
2787 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2788 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2789                                     VkLayerProperties *pProperties)
2790 {
2791    *pPropertyCount = 0;
2792    return VK_SUCCESS;
2793 }
2794 
2795 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2796 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2797                                         uint32_t *pPropertyCount,
2798                                         VkExtensionProperties *pProperties)
2799 {
2800    if (pLayerName)
2801       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2802 
2803    return vk_enumerate_instance_extension_properties(
2804       &tu_instance_extensions_supported, pPropertyCount, pProperties);
2805 }
2806 
2807 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2808 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2809 {
2810    VK_FROM_HANDLE(tu_instance, instance, _instance);
2811    return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2812                                     &tu_instance_entrypoints,
2813                                     pName);
2814 }
2815 
2816 /* The loader wants us to expose a second GetInstanceProcAddr function
2817  * to work around certain LD_PRELOAD issues seen in apps.
2818  */
2819 PUBLIC
2820 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2821 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2822 {
2823    return tu_GetInstanceProcAddr(instance, pName);
2824 }
2825 
2826 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2827 tu_AllocateMemory(VkDevice _device,
2828                   const VkMemoryAllocateInfo *pAllocateInfo,
2829                   const VkAllocationCallbacks *pAllocator,
2830                   VkDeviceMemory *pMem)
2831 {
2832    VK_FROM_HANDLE(tu_device, device, _device);
2833    struct tu_device_memory *mem;
2834    VkResult result;
2835 
2836    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2837 
2838    struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2839    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2840    if (mem_heap_used > mem_heap->size)
2841       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2842 
2843    mem = (struct tu_device_memory *) vk_device_memory_create(
2844       &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
2845    if (mem == NULL)
2846       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2847 
2848    if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
2849       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2850       /* Apparently, this is allowed */
2851       *pMem = VK_NULL_HANDLE;
2852       return VK_SUCCESS;
2853    }
2854 
2855    const VkImportMemoryFdInfoKHR *fd_info =
2856       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2857 
2858    if (fd_info && fd_info->handleType) {
2859       assert(fd_info->handleType ==
2860                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2861              fd_info->handleType ==
2862                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2863 
2864       /*
2865        * TODO Importing the same fd twice gives us the same handle without
2866        * reference counting.  We need to maintain a per-instance handle-to-bo
2867        * table and add reference count to tu_bo.
2868        */
2869       result = tu_bo_init_dmabuf(device, &mem->bo,
2870                                  pAllocateInfo->allocationSize, fd_info->fd);
2871       if (result == VK_SUCCESS) {
2872          /* take ownership and close the fd */
2873          close(fd_info->fd);
2874       }
2875    } else if (mem->vk.ahardware_buffer) {
2876 #if DETECT_OS_ANDROID
2877       const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2878       assert(handle->numFds > 0);
2879       size_t size = lseek(handle->data[0], 0, SEEK_END);
2880       result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
2881 #else
2882       result = VK_ERROR_FEATURE_NOT_PRESENT;
2883 #endif
2884    } else {
2885       uint64_t client_address = 0;
2886       BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2887 
2888       const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2889          vk_find_struct_const(pAllocateInfo->pNext,
2890                               MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2891       if (replay_info && replay_info->opaqueCaptureAddress) {
2892          client_address = replay_info->opaqueCaptureAddress;
2893          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2894       }
2895 
2896       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2897          pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2898       if (flags_info &&
2899           (flags_info->flags &
2900            VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
2901          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2902       }
2903 
2904       const VkExportMemoryAllocateInfo *export_info =
2905          vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
2906       if (export_info && (export_info->handleTypes &
2907                           (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
2908                            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
2909          alloc_flags |= TU_BO_ALLOC_SHAREABLE;
2910 
2911 
2912       char name[64] = "vkAllocateMemory()";
2913       if (device->bo_sizes)
2914          snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
2915                   (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
2916       VkMemoryPropertyFlags mem_property =
2917          device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
2918       result = tu_bo_init_new_explicit_iova(
2919          device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
2920          client_address, mem_property, alloc_flags, name);
2921    }
2922 
2923    if (result == VK_SUCCESS) {
2924       mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2925       if (mem_heap_used > mem_heap->size) {
2926          p_atomic_add(&mem_heap->used, -mem->bo->size);
2927          tu_bo_finish(device, mem->bo);
2928          result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2929                             "Out of heap memory");
2930       }
2931    }
2932 
2933    if (result != VK_SUCCESS) {
2934       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2935       return result;
2936    }
2937 
2938    /* Track in the device whether our BO list contains any implicit-sync BOs, so
2939     * we can suppress implicit sync on non-WSI usage.
2940     */
2941    const struct wsi_memory_allocate_info *wsi_info =
2942       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2943    if (wsi_info && wsi_info->implicit_sync) {
2944       mtx_lock(&device->bo_mutex);
2945       if (!mem->bo->implicit_sync) {
2946          mem->bo->implicit_sync = true;
2947          device->implicit_sync_bo_count++;
2948       }
2949       mtx_unlock(&device->bo_mutex);
2950    }
2951 
2952    const VkMemoryDedicatedAllocateInfo *dedicate_info =
2953       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
2954    if (dedicate_info) {
2955       mem->image = tu_image_from_handle(dedicate_info->image);
2956    } else {
2957       mem->image = NULL;
2958    }
2959 
2960    TU_RMV(heap_create, device, pAllocateInfo, mem);
2961 
2962    *pMem = tu_device_memory_to_handle(mem);
2963 
2964    return VK_SUCCESS;
2965 }
2966 
2967 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2968 tu_FreeMemory(VkDevice _device,
2969               VkDeviceMemory _mem,
2970               const VkAllocationCallbacks *pAllocator)
2971 {
2972    VK_FROM_HANDLE(tu_device, device, _device);
2973    VK_FROM_HANDLE(tu_device_memory, mem, _mem);
2974 
2975    if (mem == NULL)
2976       return;
2977 
2978    TU_RMV(resource_destroy, device, mem);
2979 
2980    p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2981    tu_bo_finish(device, mem->bo);
2982    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2983 }
2984 
2985 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)2986 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
2987 {
2988    VK_FROM_HANDLE(tu_device, device, _device);
2989    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
2990    VkResult result;
2991 
2992    if (mem == NULL) {
2993       *ppData = NULL;
2994       return VK_SUCCESS;
2995    }
2996 
2997    void *placed_addr = NULL;
2998    if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
2999       const VkMemoryMapPlacedInfoEXT *placed_info =
3000          vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3001       assert(placed_info != NULL);
3002       placed_addr = placed_info->pPlacedAddress;
3003    }
3004 
3005    result = tu_bo_map(device, mem->bo, placed_addr);
3006    if (result != VK_SUCCESS)
3007       return result;
3008 
3009    *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3010    return VK_SUCCESS;
3011 }
3012 
3013 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3014 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3015 {
3016    VK_FROM_HANDLE(tu_device, device, _device);
3017    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3018 
3019    if (mem == NULL)
3020       return VK_SUCCESS;
3021 
3022    return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3023 }
3024 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3025 sync_cache(VkDevice _device,
3026            enum tu_mem_sync_op op,
3027            uint32_t count,
3028            const VkMappedMemoryRange *ranges)
3029 {
3030    VK_FROM_HANDLE(tu_device, device, _device);
3031 
3032    if (!device->physical_device->has_cached_non_coherent_memory) {
3033       tu_finishme(
3034          "data cache clean and invalidation are unsupported on this arch!");
3035       return VK_SUCCESS;
3036    }
3037 
3038    for (uint32_t i = 0; i < count; i++) {
3039       VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3040       tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3041    }
3042 
3043    return VK_SUCCESS;
3044 }
3045 
3046 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3047 tu_FlushMappedMemoryRanges(VkDevice _device,
3048                            uint32_t memoryRangeCount,
3049                            const VkMappedMemoryRange *pMemoryRanges)
3050 {
3051    return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3052                      pMemoryRanges);
3053 }
3054 
3055 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3056 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3057                                 uint32_t memoryRangeCount,
3058                                 const VkMappedMemoryRange *pMemoryRanges)
3059 {
3060    return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3061                      pMemoryRanges);
3062 }
3063 
3064 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3065 tu_GetDeviceMemoryCommitment(VkDevice device,
3066                              VkDeviceMemory memory,
3067                              VkDeviceSize *pCommittedMemoryInBytes)
3068 {
3069    *pCommittedMemoryInBytes = 0;
3070 }
3071 
3072 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3073 tu_CreateFramebuffer(VkDevice _device,
3074                      const VkFramebufferCreateInfo *pCreateInfo,
3075                      const VkAllocationCallbacks *pAllocator,
3076                      VkFramebuffer *pFramebuffer)
3077 {
3078    VK_FROM_HANDLE(tu_device, device, _device);
3079 
3080    if (TU_DEBUG(DYNAMIC))
3081       return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3082                                          pFramebuffer);
3083 
3084    VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3085    struct tu_framebuffer *framebuffer;
3086 
3087    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3088 
3089    bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3090 
3091    size_t size = sizeof(*framebuffer);
3092    if (!imageless)
3093       size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3094    framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3095       &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3096    if (framebuffer == NULL)
3097       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3098 
3099    framebuffer->attachment_count = pCreateInfo->attachmentCount;
3100    framebuffer->width = pCreateInfo->width;
3101    framebuffer->height = pCreateInfo->height;
3102    framebuffer->layers = pCreateInfo->layers;
3103 
3104    if (!imageless) {
3105       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3106          VkImageView _iview = pCreateInfo->pAttachments[i];
3107          struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3108          framebuffer->attachments[i].attachment = iview;
3109       }
3110    }
3111 
3112    tu_framebuffer_tiling_config(framebuffer, device, pass);
3113 
3114    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3115    return VK_SUCCESS;
3116 }
3117 
3118 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3119 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3120                              const VkRenderingInfo *pRenderingInfo)
3121 {
3122    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3123    struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3124 
3125    framebuffer->attachment_count = pass->attachment_count;
3126    framebuffer->width = pRenderingInfo->renderArea.offset.x +
3127       pRenderingInfo->renderArea.extent.width;
3128    framebuffer->height = pRenderingInfo->renderArea.offset.y +
3129       pRenderingInfo->renderArea.extent.height;
3130    framebuffer->layers = pRenderingInfo->layerCount;
3131 
3132    tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3133 }
3134 
3135 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3136 tu_DestroyFramebuffer(VkDevice _device,
3137                       VkFramebuffer _fb,
3138                       const VkAllocationCallbacks *pAllocator)
3139 {
3140    VK_FROM_HANDLE(tu_device, device, _device);
3141 
3142    if (TU_DEBUG(DYNAMIC)) {
3143       vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3144       return;
3145    }
3146 
3147    VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3148 
3149    if (!fb)
3150       return;
3151 
3152    vk_object_free(&device->vk, pAllocator, fb);
3153 }
3154 
3155 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3156 tu_GetMemoryFdKHR(VkDevice _device,
3157                   const VkMemoryGetFdInfoKHR *pGetFdInfo,
3158                   int *pFd)
3159 {
3160    VK_FROM_HANDLE(tu_device, device, _device);
3161    VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3162 
3163    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3164 
3165    /* At the moment, we support only the below handle types. */
3166    assert(pGetFdInfo->handleType ==
3167              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3168           pGetFdInfo->handleType ==
3169              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3170 
3171    int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3172    if (prime_fd < 0)
3173       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3174 
3175    *pFd = prime_fd;
3176 
3177    if (memory->image) {
3178       struct fdl_layout *l = &memory->image->layout[0];
3179       uint64_t modifier;
3180       if (l->ubwc) {
3181          modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3182       } else if (l->tile_mode == 2) {
3183          modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3184       } else if (l->tile_mode == 3) {
3185          modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3186       } else {
3187          assert(!l->tile_mode);
3188          modifier = DRM_FORMAT_MOD_LINEAR;
3189       }
3190       struct fdl_metadata metadata = {
3191          .modifier = modifier,
3192       };
3193       tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3194    }
3195 
3196    return VK_SUCCESS;
3197 }
3198 
3199 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3200 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3201                             VkExternalMemoryHandleTypeFlagBits handleType,
3202                             int fd,
3203                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3204 {
3205    VK_FROM_HANDLE(tu_device, device, _device);
3206    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3207    pMemoryFdProperties->memoryTypeBits =
3208       (1 << device->physical_device->memory.type_count) - 1;
3209    return VK_SUCCESS;
3210 }
3211 
3212 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3213 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3214    VkPhysicalDevice                            physicalDevice,
3215    VkSampleCountFlagBits                       samples,
3216    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3217 {
3218    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3219 
3220    if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3221       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3222    else
3223       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3224 }
3225 
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3226 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3227     VkDevice                                    device,
3228     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3229 {
3230    VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3231    return mem->bo->iova;
3232 }
3233 
3234 struct tu_debug_bos_entry {
3235    uint32_t count;
3236    uint64_t size;
3237    const char *name;
3238 };
3239 
3240 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3241 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3242 {
3243    assert(name);
3244 
3245    if (likely(!dev->bo_sizes))
3246       return NULL;
3247 
3248    mtx_lock(&dev->bo_mutex);
3249    struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3250    struct tu_debug_bos_entry *debug_bos;
3251 
3252    if (!entry) {
3253       debug_bos = (struct tu_debug_bos_entry *) calloc(
3254          1, sizeof(struct tu_debug_bos_entry));
3255       debug_bos->name = strdup(name);
3256       _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3257    } else {
3258       debug_bos = (struct tu_debug_bos_entry *) entry->data;
3259    }
3260 
3261    debug_bos->count++;
3262    debug_bos->size += align(size, 4096);
3263    mtx_unlock(&dev->bo_mutex);
3264 
3265    return debug_bos->name;
3266 }
3267 
3268 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3269 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3270 {
3271    if (likely(!dev->bo_sizes) || !bo->name)
3272       return;
3273 
3274    mtx_lock(&dev->bo_mutex);
3275    struct hash_entry *entry =
3276       _mesa_hash_table_search(dev->bo_sizes, bo->name);
3277    /* If we're finishing the BO, it should have been added already */
3278    assert(entry);
3279 
3280    struct tu_debug_bos_entry *debug_bos =
3281       (struct tu_debug_bos_entry *) entry->data;
3282    debug_bos->count--;
3283    debug_bos->size -= align(bo->size, 4096);
3284    if (!debug_bos->count) {
3285       _mesa_hash_table_remove(dev->bo_sizes, entry);
3286       free((void *) debug_bos->name);
3287       free(debug_bos);
3288    }
3289    mtx_unlock(&dev->bo_mutex);
3290 }
3291 
debug_bos_count_compare(const void * in_a,const void * in_b)3292 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3293 {
3294    struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3295    struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3296    return a->count - b->count;
3297 }
3298 
3299 void
tu_debug_bos_print_stats(struct tu_device * dev)3300 tu_debug_bos_print_stats(struct tu_device *dev)
3301 {
3302    if (likely(!dev->bo_sizes))
3303       return;
3304 
3305    mtx_lock(&dev->bo_mutex);
3306 
3307    /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3308    struct util_dynarray dyn;
3309    util_dynarray_init(&dyn, NULL);
3310 
3311    uint32_t size = 0;
3312    uint32_t count = 0;
3313    hash_table_foreach(dev->bo_sizes, entry)
3314    {
3315       struct tu_debug_bos_entry *debug_bos =
3316          (struct tu_debug_bos_entry *) entry->data;
3317       util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3318       size += debug_bos->size / 1024;
3319       count += debug_bos->count;
3320    }
3321 
3322    qsort(dyn.data,
3323          util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3324          sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3325 
3326    util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3327    {
3328       struct tu_debug_bos_entry *debug_bos = *entryp;
3329       mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3330                 (long long) (debug_bos->size / 1024));
3331    }
3332 
3333    mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3334 
3335    util_dynarray_fini(&dyn);
3336 
3337    mtx_unlock(&dev->bo_mutex);
3338 }
3339 
3340 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3341 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3342                               const VkDebugUtilsLabelEXT *pLabelInfo)
3343 {
3344    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3345 
3346    vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3347 
3348    /* Note that the spec says:
3349     *
3350     * "An application may open a debug label region in one command buffer and
3351     *  close it in another, or otherwise split debug label regions across
3352     *  multiple command buffers or multiple queue submissions. When viewed
3353     * from the linear series of submissions to a single queue, the calls to
3354     *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3355     *  matched and balanced."
3356     *
3357     * But if you're beginning labeling during a renderpass and ending outside
3358     * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
3359     * expect that u_trace and perfetto will do something like take just one of
3360     * the begins/ends, or drop the event entirely, but not crash.  Similarly,
3361     * I think we'll have problems if the tracepoints are split across cmd
3362     * buffers. Still, getting the simple case of cmd buffer annotation into
3363     * perfetto should prove useful.
3364     */
3365    const char *label = pLabelInfo->pLabelName;
3366    if (cmd_buffer->state.pass) {
3367       trace_start_cmd_buffer_annotation_rp(
3368          &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3369    } else {
3370       trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3371                                         strlen(label), label);
3372    }
3373 }
3374 
3375 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3376 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3377 {
3378    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3379 
3380    if (cmd_buffer->vk.labels.size > 0) {
3381       if (cmd_buffer->state.pass) {
3382          trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3383                                             &cmd_buffer->draw_cs);
3384       } else {
3385          trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3386       }
3387    }
3388 
3389    vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3390 }
3391