1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on v3dv driver which is:
8 * Copyright © 2019 Raspberry Pi
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the next
18 * paragraph) shall be included in all copies or substantial portions of the
19 * Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pvr_bo.h"
45 #include "pvr_border.h"
46 #include "pvr_clear.h"
47 #include "pvr_csb.h"
48 #include "pvr_csb_enum_helpers.h"
49 #include "pvr_debug.h"
50 #include "pvr_device_info.h"
51 #include "pvr_dump_info.h"
52 #include "pvr_hardcode.h"
53 #include "pvr_job_render.h"
54 #include "pvr_limits.h"
55 #include "pvr_pds.h"
56 #include "pvr_private.h"
57 #include "pvr_robustness.h"
58 #include "pvr_tex_state.h"
59 #include "pvr_types.h"
60 #include "pvr_uscgen.h"
61 #include "pvr_util.h"
62 #include "pvr_winsys.h"
63 #include "rogue/rogue.h"
64 #include "util/build_id.h"
65 #include "util/log.h"
66 #include "util/macros.h"
67 #include "util/mesa-sha1.h"
68 #include "util/os_misc.h"
69 #include "util/u_dynarray.h"
70 #include "util/u_math.h"
71 #include "vk_alloc.h"
72 #include "vk_extensions.h"
73 #include "vk_log.h"
74 #include "vk_object.h"
75 #include "vk_physical_device_features.h"
76 #include "vk_physical_device_properties.h"
77 #include "vk_sampler.h"
78 #include "vk_util.h"
79
80 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
81 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
83
84 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
85 * devices will have a smaller global free list size, as usually this use-case
86 * implies smaller amounts of work spread out. The free list can still grow as
87 * required.
88 */
89 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
90 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
91
92 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
93 * been rounded up since the percentage is treated as an integer.
94 */
95 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
96
97 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
98 # define PVR_USE_WSI_PLATFORM_DISPLAY true
99 #else
100 # define PVR_USE_WSI_PLATFORM_DISPLAY false
101 #endif
102
103 #if PVR_USE_WSI_PLATFORM_DISPLAY
104 # define PVR_USE_WSI_PLATFORM true
105 #else
106 # define PVR_USE_WSI_PLATFORM false
107 #endif
108
109 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
110
111 /* Amount of padding required for VkBuffers to ensure we don't read beyond
112 * a page boundary.
113 */
114 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
115
116 /* Default size in bytes used by pvr_CreateDevice() for setting up the
117 * suballoc_general, suballoc_pds and suballoc_usc suballocators.
118 *
119 * TODO: Investigate if a different default size can improve the overall
120 * performance of internal driver allocations.
121 */
122 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
123 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
127
128 struct pvr_drm_device_config {
129 struct pvr_drm_device_info {
130 const char *name;
131 size_t len;
132 } render, display;
133 };
134
135 #define DEF_CONFIG(render_, display_) \
136 { \
137 .render = { .name = render_, .len = sizeof(render_) - 1 }, \
138 .display = { .name = display_, .len = sizeof(display_) - 1 }, \
139 }
140
141 /* This is the list of supported DRM render/display driver configs. */
142 static const struct pvr_drm_device_config pvr_drm_configs[] = {
143 DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
144 DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
145 };
146
147 #undef DEF_CONFIG
148
149 static const struct vk_instance_extension_table pvr_instance_extensions = {
150 .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
151 .KHR_external_fence_capabilities = true,
152 .KHR_external_memory_capabilities = true,
153 .KHR_external_semaphore_capabilities = true,
154 .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
155 .KHR_get_physical_device_properties2 = true,
156 .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
157 .KHR_surface = PVR_USE_WSI_PLATFORM,
158 #ifndef VK_USE_PLATFORM_WIN32_KHR
159 .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
160 #endif
161 .EXT_debug_report = true,
162 .EXT_debug_utils = true,
163 };
164
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)165 static void pvr_physical_device_get_supported_extensions(
166 struct vk_device_extension_table *extensions)
167 {
168 *extensions = (struct vk_device_extension_table){
169 .KHR_bind_memory2 = true,
170 .KHR_copy_commands2 = true,
171 /* TODO: enable this extension when the conformance tests get
172 * updated to version 1.3.6.0, the current version does not
173 * include the imagination driver ID, which will make a dEQP
174 * test fail
175 */
176 .KHR_driver_properties = false,
177 .KHR_external_fence = true,
178 .KHR_external_fence_fd = true,
179 .KHR_external_memory = true,
180 .KHR_external_memory_fd = true,
181 .KHR_format_feature_flags2 = true,
182 .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
183 .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
184 .KHR_get_memory_requirements2 = true,
185 .KHR_image_format_list = true,
186 .KHR_index_type_uint8 = true,
187 .KHR_shader_expect_assume = true,
188 .KHR_swapchain = PVR_USE_WSI_PLATFORM,
189 .KHR_timeline_semaphore = true,
190 .KHR_uniform_buffer_standard_layout = true,
191 .EXT_external_memory_dma_buf = true,
192 .EXT_host_query_reset = true,
193 .EXT_index_type_uint8 = true,
194 .EXT_memory_budget = true,
195 .EXT_private_data = true,
196 .EXT_scalar_block_layout = true,
197 .EXT_texel_buffer_alignment = true,
198 .EXT_tooling_info = true,
199 };
200 }
201
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)202 static void pvr_physical_device_get_supported_features(
203 const struct pvr_device_info *const dev_info,
204 struct vk_features *const features)
205 {
206 *features = (struct vk_features){
207 /* Vulkan 1.0 */
208 .robustBufferAccess = true,
209 .fullDrawIndexUint32 = true,
210 .imageCubeArray = true,
211 .independentBlend = false,
212 .geometryShader = false,
213 .tessellationShader = false,
214 .sampleRateShading = true,
215 .dualSrcBlend = false,
216 .logicOp = false,
217 .multiDrawIndirect = true,
218 .drawIndirectFirstInstance = true,
219 .depthClamp = true,
220 .depthBiasClamp = true,
221 .fillModeNonSolid = false,
222 .depthBounds = false,
223 .wideLines = true,
224 .largePoints = true,
225 .alphaToOne = false,
226 .multiViewport = false,
227 .samplerAnisotropy = false,
228 .textureCompressionETC2 = true,
229 .textureCompressionASTC_LDR = false,
230 .textureCompressionBC = false,
231 .occlusionQueryPrecise = false,
232 .pipelineStatisticsQuery = false,
233 .vertexPipelineStoresAndAtomics = true,
234 .fragmentStoresAndAtomics = true,
235 .shaderTessellationAndGeometryPointSize = false,
236 .shaderImageGatherExtended = false,
237 .shaderStorageImageExtendedFormats = true,
238 .shaderStorageImageMultisample = false,
239 .shaderStorageImageReadWithoutFormat = true,
240 .shaderStorageImageWriteWithoutFormat = false,
241 .shaderUniformBufferArrayDynamicIndexing = true,
242 .shaderSampledImageArrayDynamicIndexing = true,
243 .shaderStorageBufferArrayDynamicIndexing = true,
244 .shaderStorageImageArrayDynamicIndexing = true,
245 .shaderClipDistance = false,
246 .shaderCullDistance = false,
247 .shaderFloat64 = false,
248 .shaderInt64 = true,
249 .shaderInt16 = true,
250 .shaderResourceResidency = false,
251 .shaderResourceMinLod = false,
252 .sparseBinding = false,
253 .sparseResidencyBuffer = false,
254 .sparseResidencyImage2D = false,
255 .sparseResidencyImage3D = false,
256 .sparseResidency2Samples = false,
257 .sparseResidency4Samples = false,
258 .sparseResidency8Samples = false,
259 .sparseResidency16Samples = false,
260 .sparseResidencyAliased = false,
261 .variableMultisampleRate = false,
262 .inheritedQueries = false,
263
264 /* VK_KHR_index_type_uint8 */
265 .indexTypeUint8 = true,
266
267 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
268 .timelineSemaphore = true,
269
270 /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
271 .uniformBufferStandardLayout = true,
272
273 /* Vulkan 1.2 / VK_EXT_host_query_reset */
274 .hostQueryReset = true,
275
276 /* Vulkan 1.3 / VK_EXT_private_data */
277 .privateData = true,
278
279 /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
280 .scalarBlockLayout = true,
281
282 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
283 .texelBufferAlignment = true,
284
285 /* VK_KHR_shader_expect_assume */
286 .shaderExpectAssume = true,
287 };
288 }
289
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])290 static bool pvr_physical_device_init_pipeline_cache_uuid(
291 const struct pvr_device_info *const dev_info,
292 uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
293 {
294 struct mesa_sha1 sha1_ctx;
295 unsigned build_id_len;
296 uint8_t sha1[20];
297 uint64_t bvnc;
298
299 const struct build_id_note *note =
300 build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
301 if (!note) {
302 mesa_loge("Failed to find build-id");
303 return false;
304 }
305
306 build_id_len = build_id_length(note);
307 if (build_id_len < 20) {
308 mesa_loge("Build-id too short. It needs to be a SHA");
309 return false;
310 }
311
312 bvnc = pvr_get_packed_bvnc(dev_info);
313
314 _mesa_sha1_init(&sha1_ctx);
315 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
316 _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
317 _mesa_sha1_final(&sha1_ctx, sha1);
318 memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
319
320 return true;
321 }
322
323 struct pvr_descriptor_limits {
324 uint32_t max_per_stage_resources;
325 uint32_t max_per_stage_samplers;
326 uint32_t max_per_stage_uniform_buffers;
327 uint32_t max_per_stage_storage_buffers;
328 uint32_t max_per_stage_sampled_images;
329 uint32_t max_per_stage_storage_images;
330 uint32_t max_per_stage_input_attachments;
331 };
332
333 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)334 pvr_get_physical_device_descriptor_limits(
335 const struct pvr_device_info *dev_info,
336 const struct pvr_device_runtime_info *dev_runtime_info)
337 {
338 enum pvr_descriptor_cs_level {
339 /* clang-format off */
340 CS4096, /* 6XT and some XE cores with large CS. */
341 CS2560, /* Mid range Rogue XE cores. */
342 CS2048, /* Low end Rogue XE cores. */
343 CS1536, /* Ultra-low-end 9XEP. */
344 CS680, /* lower limits for older devices. */
345 CS408, /* 7XE. */
346 /* clang-format on */
347 };
348
349 static const struct pvr_descriptor_limits descriptor_limits[] = {
350 [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
351 [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
352 [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
353 [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
354 [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
355 [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
356 };
357
358 const uint32_t common_size =
359 pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
360 dev_runtime_info,
361 UINT32_MAX,
362 1);
363 enum pvr_descriptor_cs_level cs_level;
364
365 if (common_size >= 2048) {
366 cs_level = CS2048;
367 } else if (common_size >= 1526) {
368 cs_level = CS1536;
369 } else if (common_size >= 680) {
370 cs_level = CS680;
371 } else if (common_size >= 408) {
372 cs_level = CS408;
373 } else {
374 mesa_loge("This core appears to have a very limited amount of shared "
375 "register space and may not meet the Vulkan spec limits.");
376 abort();
377 }
378
379 return &descriptor_limits[cs_level];
380 }
381
pvr_physical_device_get_properties(const struct pvr_physical_device * const pdevice,struct vk_properties * const properties)382 static bool pvr_physical_device_get_properties(
383 const struct pvr_physical_device *const pdevice,
384 struct vk_properties *const properties)
385 {
386 const struct pvr_device_info *const dev_info = &pdevice->dev_info;
387 const struct pvr_device_runtime_info *const dev_runtime_info =
388 &pdevice->dev_runtime_info;
389 const struct pvr_descriptor_limits *descriptor_limits =
390 pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
391
392 /* Default value based on the minimum value found in all existing cores. */
393 const uint32_t max_multisample =
394 PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
395
396 /* Default value based on the minimum value found in all existing cores. */
397 const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
398
399 /* Default value based on the minimum value found in all existing cores. */
400 const uint32_t uvs_pba_entries =
401 PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
402
403 /* Default value based on the minimum value found in all existing cores. */
404 const uint32_t num_user_clip_planes =
405 PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
406
407 const uint32_t sub_pixel_precision =
408 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
409
410 const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
411
412 const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
413
414 const uint32_t max_user_vertex_components =
415 ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
416
417 /* The workgroup invocations are limited by the case where we have a compute
418 * barrier - each slot has a fixed number of invocations, the whole workgroup
419 * may need to span multiple slots. As each slot will WAIT at the barrier
420 * until the last invocation completes, all have to be schedulable at the
421 * same time.
422 *
423 * Typically all Rogue cores have 16 slots. Some of the smallest cores are
424 * reduced to 14.
425 *
426 * The compute barrier slot exhaustion scenario can be tested with:
427 * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
428 * .atomicwrite*guard*comp
429 */
430
431 /* Default value based on the minimum value found in all existing cores. */
432 const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
433
434 /* Default value based on the minimum value found in all existing cores. */
435 const uint32_t max_instances_per_pds_task =
436 PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
437
438 const uint32_t max_compute_work_group_invocations =
439 (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
440
441 bool ret;
442
443 *properties = (struct vk_properties){
444 /* Vulkan 1.0 */
445 .apiVersion = PVR_API_VERSION,
446 .driverVersion = vk_get_driver_version(),
447 .vendorID = VK_VENDOR_ID_IMAGINATION,
448 .deviceID = dev_info->ident.device_id,
449 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
450 /* deviceName and pipelineCacheUUID are filled below .*/
451
452 .maxImageDimension1D = max_render_size,
453 .maxImageDimension2D = max_render_size,
454 .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
455 .maxImageDimensionCube = max_render_size,
456 .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
457 .maxTexelBufferElements = 64U * 1024U,
458 .maxUniformBufferRange = 128U * 1024U * 1024U,
459 .maxStorageBufferRange = 128U * 1024U * 1024U,
460 .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
461 .maxMemoryAllocationCount = UINT32_MAX,
462 .maxSamplerAllocationCount = UINT32_MAX,
463 .bufferImageGranularity = 1U,
464 .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
465 /* Maximum number of descriptor sets that can be bound simultaneously. */
466 .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
467 .maxPerStageResources = descriptor_limits->max_per_stage_resources,
468 .maxPerStageDescriptorSamplers =
469 descriptor_limits->max_per_stage_samplers,
470 .maxPerStageDescriptorUniformBuffers =
471 descriptor_limits->max_per_stage_uniform_buffers,
472 .maxPerStageDescriptorStorageBuffers =
473 descriptor_limits->max_per_stage_storage_buffers,
474 .maxPerStageDescriptorSampledImages =
475 descriptor_limits->max_per_stage_sampled_images,
476 .maxPerStageDescriptorStorageImages =
477 descriptor_limits->max_per_stage_storage_images,
478 .maxPerStageDescriptorInputAttachments =
479 descriptor_limits->max_per_stage_input_attachments,
480 .maxDescriptorSetSamplers = 256U,
481 .maxDescriptorSetUniformBuffers = 256U,
482 .maxDescriptorSetUniformBuffersDynamic =
483 PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
484 .maxDescriptorSetStorageBuffers = 256U,
485 .maxDescriptorSetStorageBuffersDynamic =
486 PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
487 .maxDescriptorSetSampledImages = 256U,
488 .maxDescriptorSetStorageImages = 256U,
489 .maxDescriptorSetInputAttachments = 256U,
490
491 /* Vertex Shader Limits */
492 .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
493 .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
494 .maxVertexInputAttributeOffset = 0xFFFF,
495 .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
496 .maxVertexOutputComponents = max_user_vertex_components,
497
498 /* Tessellation Limits */
499 .maxTessellationGenerationLevel = 0,
500 .maxTessellationPatchSize = 0,
501 .maxTessellationControlPerVertexInputComponents = 0,
502 .maxTessellationControlPerVertexOutputComponents = 0,
503 .maxTessellationControlPerPatchOutputComponents = 0,
504 .maxTessellationControlTotalOutputComponents = 0,
505 .maxTessellationEvaluationInputComponents = 0,
506 .maxTessellationEvaluationOutputComponents = 0,
507
508 /* Geometry Shader Limits */
509 .maxGeometryShaderInvocations = 0,
510 .maxGeometryInputComponents = 0,
511 .maxGeometryOutputComponents = 0,
512 .maxGeometryOutputVertices = 0,
513 .maxGeometryTotalOutputComponents = 0,
514
515 /* Fragment Shader Limits */
516 .maxFragmentInputComponents = max_user_vertex_components,
517 .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
518 .maxFragmentDualSrcAttachments = 0,
519 .maxFragmentCombinedOutputResources =
520 descriptor_limits->max_per_stage_storage_buffers +
521 descriptor_limits->max_per_stage_storage_images +
522 PVR_MAX_COLOR_ATTACHMENTS,
523
524 /* Compute Shader Limits */
525 .maxComputeSharedMemorySize = 16U * 1024U,
526 .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
527 .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
528 .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
529 max_compute_work_group_invocations,
530 64U },
531
532 /* Rasterization Limits */
533 .subPixelPrecisionBits = sub_pixel_precision,
534 .subTexelPrecisionBits = 8U,
535 .mipmapPrecisionBits = 8U,
536
537 .maxDrawIndexedIndexValue = UINT32_MAX,
538 .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
539 .maxSamplerLodBias = 16.0f,
540 .maxSamplerAnisotropy = 1.0f,
541 .maxViewports = PVR_MAX_VIEWPORTS,
542
543 .maxViewportDimensions[0] = max_render_size,
544 .maxViewportDimensions[1] = max_render_size,
545 .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
546 .viewportBoundsRange[1] = 2U * max_render_size,
547
548 .viewportSubPixelBits = 0,
549 .minMemoryMapAlignment = pdevice->ws->page_size,
550 .minTexelBufferOffsetAlignment = 16U,
551 .minUniformBufferOffsetAlignment = 4U,
552 .minStorageBufferOffsetAlignment = 4U,
553
554 .minTexelOffset = -8,
555 .maxTexelOffset = 7U,
556 .minTexelGatherOffset = -8,
557 .maxTexelGatherOffset = 7,
558 .minInterpolationOffset = -0.5,
559 .maxInterpolationOffset = 0.5,
560 .subPixelInterpolationOffsetBits = 4U,
561
562 .maxFramebufferWidth = max_render_size,
563 .maxFramebufferHeight = max_render_size,
564 .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
565
566 .framebufferColorSampleCounts = max_sample_bits,
567 .framebufferDepthSampleCounts = max_sample_bits,
568 .framebufferStencilSampleCounts = max_sample_bits,
569 .framebufferNoAttachmentsSampleCounts = max_sample_bits,
570 .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
571 .sampledImageColorSampleCounts = max_sample_bits,
572 .sampledImageIntegerSampleCounts = max_sample_bits,
573 .sampledImageDepthSampleCounts = max_sample_bits,
574 .sampledImageStencilSampleCounts = max_sample_bits,
575 .storageImageSampleCounts = max_sample_bits,
576 .maxSampleMaskWords = 1U,
577 .timestampComputeAndGraphics = false,
578 .timestampPeriod = 0.0f,
579 .maxClipDistances = num_user_clip_planes,
580 .maxCullDistances = num_user_clip_planes,
581 .maxCombinedClipAndCullDistances = num_user_clip_planes,
582 .discreteQueuePriorities = 2U,
583 .pointSizeRange[0] = 1.0f,
584 .pointSizeRange[1] = 511.0f,
585 .pointSizeGranularity = 0.0625f,
586 .lineWidthRange[0] = 1.0f / 16.0f,
587 .lineWidthRange[1] = 16.0f,
588 .lineWidthGranularity = 1.0f / 16.0f,
589 .strictLines = false,
590 .standardSampleLocations = true,
591 .optimalBufferCopyOffsetAlignment = 4U,
592 .optimalBufferCopyRowPitchAlignment = 4U,
593 .nonCoherentAtomSize = 1U,
594
595 /* Vulkan 1.2 / VK_KHR_driver_properties */
596 .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
597 .driverName = "Imagination open-source Mesa driver",
598 .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
599 .conformanceVersion = {
600 .major = 1,
601 .minor = 3,
602 .subminor = 4,
603 .patch = 1,
604 },
605
606 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
607 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
608
609 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
610 .storageTexelBufferOffsetAlignmentBytes = 16,
611 .storageTexelBufferOffsetSingleTexelAlignment = true,
612 .uniformTexelBufferOffsetAlignmentBytes = 16,
613 .uniformTexelBufferOffsetSingleTexelAlignment = false,
614 };
615
616 snprintf(properties->deviceName,
617 sizeof(properties->deviceName),
618 "Imagination PowerVR %s %s",
619 dev_info->ident.series_name,
620 dev_info->ident.public_name);
621
622 ret = pvr_physical_device_init_pipeline_cache_uuid(
623 dev_info,
624 properties->pipelineCacheUUID);
625 if (!ret)
626 return false;
627
628 return true;
629 }
630
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)631 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
632 {
633 *pApiVersion = PVR_API_VERSION;
634 return VK_SUCCESS;
635 }
636
637 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)638 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
639 uint32_t *pPropertyCount,
640 VkExtensionProperties *pProperties)
641 {
642 if (pLayerName)
643 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
644
645 return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
646 pPropertyCount,
647 pProperties);
648 }
649
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)650 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
651 {
652 struct pvr_physical_device *pdevice =
653 container_of(vk_pdevice, struct pvr_physical_device, vk);
654
655 /* Be careful here. The device might not have been initialized. This can
656 * happen since initialization is done in vkEnumeratePhysicalDevices() but
657 * finish is done in vkDestroyInstance(). Make sure that you check for NULL
658 * before freeing or that the freeing functions accept NULL pointers.
659 */
660
661 if (pdevice->compiler)
662 ralloc_free(pdevice->compiler);
663
664 pvr_wsi_finish(pdevice);
665
666 if (pdevice->ws)
667 pvr_winsys_destroy(pdevice->ws);
668
669 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
670 vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
671
672 vk_physical_device_finish(&pdevice->vk);
673
674 vk_free(&pdevice->vk.instance->alloc, pdevice);
675 }
676
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)677 void pvr_DestroyInstance(VkInstance _instance,
678 const VkAllocationCallbacks *pAllocator)
679 {
680 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
681
682 if (!instance)
683 return;
684
685 VG(VALGRIND_DESTROY_MEMPOOL(instance));
686
687 vk_instance_finish(&instance->vk);
688 vk_free(&instance->vk.alloc, instance);
689 }
690
pvr_compute_heap_size(void)691 static uint64_t pvr_compute_heap_size(void)
692 {
693 /* Query the total ram from the system */
694 uint64_t total_ram;
695 if (!os_get_total_physical_memory(&total_ram))
696 return 0;
697
698 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
699 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
700 */
701 uint64_t available_ram;
702 if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
703 available_ram = total_ram / 2U;
704 else
705 available_ram = total_ram * 3U / 4U;
706
707 return available_ram;
708 }
709
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)710 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
711 struct pvr_instance *instance,
712 drmDevicePtr drm_render_device,
713 drmDevicePtr drm_display_device)
714 {
715 struct vk_physical_device_dispatch_table dispatch_table;
716 struct vk_device_extension_table supported_extensions;
717 struct vk_properties supported_properties;
718 struct vk_features supported_features;
719 struct pvr_winsys *ws;
720 char *display_path;
721 char *render_path;
722 VkResult result;
723
724 if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
725 return vk_errorf(instance,
726 VK_ERROR_INCOMPATIBLE_DRIVER,
727 "WARNING: powervr is not a conformant Vulkan "
728 "implementation. Pass "
729 "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
730 "what you're doing.");
731 }
732
733 render_path = vk_strdup(&instance->vk.alloc,
734 drm_render_device->nodes[DRM_NODE_RENDER],
735 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
736 if (!render_path) {
737 result = VK_ERROR_OUT_OF_HOST_MEMORY;
738 goto err_out;
739 }
740
741 if (instance->vk.enabled_extensions.KHR_display) {
742 display_path = vk_strdup(&instance->vk.alloc,
743 drm_display_device->nodes[DRM_NODE_PRIMARY],
744 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
745 if (!display_path) {
746 result = VK_ERROR_OUT_OF_HOST_MEMORY;
747 goto err_vk_free_render_path;
748 }
749 } else {
750 display_path = NULL;
751 }
752
753 result =
754 pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
755 if (result != VK_SUCCESS)
756 goto err_vk_free_display_path;
757
758 pdevice->instance = instance;
759 pdevice->render_path = render_path;
760 pdevice->display_path = display_path;
761 pdevice->ws = ws;
762
763 result = ws->ops->device_info_init(ws,
764 &pdevice->dev_info,
765 &pdevice->dev_runtime_info);
766 if (result != VK_SUCCESS)
767 goto err_pvr_winsys_destroy;
768
769 pvr_physical_device_get_supported_extensions(&supported_extensions);
770 pvr_physical_device_get_supported_features(&pdevice->dev_info,
771 &supported_features);
772 if (!pvr_physical_device_get_properties(pdevice, &supported_properties)) {
773 result = vk_errorf(instance,
774 VK_ERROR_INITIALIZATION_FAILED,
775 "Failed to collect physical device properties");
776 goto err_pvr_winsys_destroy;
777 }
778
779 vk_physical_device_dispatch_table_from_entrypoints(
780 &dispatch_table,
781 &pvr_physical_device_entrypoints,
782 true);
783
784 vk_physical_device_dispatch_table_from_entrypoints(
785 &dispatch_table,
786 &wsi_physical_device_entrypoints,
787 false);
788
789 result = vk_physical_device_init(&pdevice->vk,
790 &instance->vk,
791 &supported_extensions,
792 &supported_features,
793 &supported_properties,
794 &dispatch_table);
795 if (result != VK_SUCCESS)
796 goto err_pvr_winsys_destroy;
797
798 pdevice->vk.supported_sync_types = ws->sync_types;
799
800 /* Setup available memory heaps and types */
801 pdevice->memory.memoryHeapCount = 1;
802 pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
803 pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
804
805 pdevice->memory.memoryTypeCount = 1;
806 pdevice->memory.memoryTypes[0].propertyFlags =
807 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
808 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
809 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
810 pdevice->memory.memoryTypes[0].heapIndex = 0;
811
812 result = pvr_wsi_init(pdevice);
813 if (result != VK_SUCCESS) {
814 vk_error(instance, result);
815 goto err_vk_physical_device_finish;
816 }
817
818 pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
819 if (!pdevice->compiler) {
820 result = vk_errorf(instance,
821 VK_ERROR_INITIALIZATION_FAILED,
822 "Failed to initialize Rogue compiler");
823 goto err_wsi_finish;
824 }
825
826 return VK_SUCCESS;
827
828 err_wsi_finish:
829 pvr_wsi_finish(pdevice);
830
831 err_vk_physical_device_finish:
832 vk_physical_device_finish(&pdevice->vk);
833
834 err_pvr_winsys_destroy:
835 pvr_winsys_destroy(ws);
836
837 err_vk_free_display_path:
838 vk_free(&instance->vk.alloc, display_path);
839
840 err_vk_free_render_path:
841 vk_free(&instance->vk.alloc, render_path);
842
843 err_out:
844 return result;
845 }
846
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)847 static VkResult pvr_get_drm_devices(void *const obj,
848 drmDevicePtr *const devices,
849 const int max_devices,
850 int *const num_devices_out)
851 {
852 int ret = drmGetDevices2(0, devices, max_devices);
853 if (ret < 0) {
854 return vk_errorf(obj,
855 VK_ERROR_INITIALIZATION_FAILED,
856 "Failed to enumerate drm devices (errno %d: %s)",
857 -ret,
858 strerror(-ret));
859 }
860
861 if (num_devices_out)
862 *num_devices_out = ret;
863
864 return VK_SUCCESS;
865 }
866
867 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)868 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
869 drmDevice *const drm_dev)
870 {
871 char **const compatible = drm_dev->deviceinfo.platform->compatible;
872
873 for (char **compat = compatible; *compat; compat++) {
874 if (strncmp(*compat, info->name, info->len) == 0)
875 return true;
876 }
877
878 return false;
879 }
880
881 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)882 pvr_drm_device_get_config(drmDevice *const drm_dev)
883 {
884 for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
885 if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
886 return &pvr_drm_configs[i];
887 }
888
889 return NULL;
890 }
891
892 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)893 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
894 char *const *comp_display,
895 char *const *comp_render)
896 {
897 drmVersionPtr version_display, version_render;
898 struct pvr_device_dump_info info;
899
900 version_display = drmGetVersion(pdevice->ws->display_fd);
901 if (!version_display)
902 return;
903
904 version_render = drmGetVersion(pdevice->ws->render_fd);
905 if (!version_render) {
906 drmFreeVersion(version_display);
907 return;
908 }
909
910 info.device_info = &pdevice->dev_info;
911 info.device_runtime_info = &pdevice->dev_runtime_info;
912 info.drm_display.patchlevel = version_display->version_patchlevel;
913 info.drm_display.major = version_display->version_major;
914 info.drm_display.minor = version_display->version_minor;
915 info.drm_display.name = version_display->name;
916 info.drm_display.date = version_display->date;
917 info.drm_display.comp = comp_display;
918 info.drm_render.patchlevel = version_render->version_patchlevel;
919 info.drm_render.major = version_render->version_major;
920 info.drm_render.minor = version_render->version_minor;
921 info.drm_render.name = version_render->name;
922 info.drm_render.date = version_render->date;
923 info.drm_render.comp = comp_render;
924
925 pvr_dump_physical_device_info(&info);
926
927 drmFreeVersion(version_display);
928 drmFreeVersion(version_render);
929 }
930
931 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)932 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
933 {
934 struct pvr_instance *const instance =
935 container_of(vk_instance, struct pvr_instance, vk);
936
937 const struct pvr_drm_device_config *config = NULL;
938
939 drmDevicePtr drm_display_device = NULL;
940 drmDevicePtr drm_render_device = NULL;
941 struct pvr_physical_device *pdevice;
942 drmDevicePtr *drm_devices;
943 int num_drm_devices = 0;
944 VkResult result;
945
946 result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
947 if (result != VK_SUCCESS)
948 goto out;
949
950 if (num_drm_devices == 0) {
951 result = VK_SUCCESS;
952 goto out;
953 }
954
955 drm_devices = vk_alloc(&vk_instance->alloc,
956 sizeof(*drm_devices) * num_drm_devices,
957 8,
958 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
959 if (!drm_devices) {
960 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
961 goto out;
962 }
963
964 result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
965 if (result != VK_SUCCESS)
966 goto out_free_drm_device_ptrs;
967
968 /* First search for our render node... */
969 for (int i = 0; i < num_drm_devices; i++) {
970 drmDevice *const drm_dev = drm_devices[i];
971
972 if (drm_dev->bustype != DRM_BUS_PLATFORM)
973 continue;
974
975 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
976 continue;
977
978 config = pvr_drm_device_get_config(drm_dev);
979 if (config) {
980 drm_render_device = drm_dev;
981 break;
982 }
983 }
984
985 if (!config) {
986 result = VK_SUCCESS;
987 goto out_free_drm_devices;
988 }
989
990 mesa_logd("Found compatible render device '%s'.",
991 drm_render_device->nodes[DRM_NODE_RENDER]);
992
993 /* ...then find the compatible display node. */
994 for (int i = 0; i < num_drm_devices; i++) {
995 drmDevice *const drm_dev = drm_devices[i];
996
997 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
998 continue;
999
1000 if (pvr_drm_device_compatible(&config->display, drm_dev)) {
1001 drm_display_device = drm_dev;
1002 break;
1003 }
1004 }
1005
1006 if (!drm_display_device) {
1007 mesa_loge("Render device '%s' has no compatible display device.",
1008 drm_render_device->nodes[DRM_NODE_RENDER]);
1009 result = VK_SUCCESS;
1010 goto out_free_drm_devices;
1011 }
1012
1013 mesa_logd("Found compatible display device '%s'.",
1014 drm_display_device->nodes[DRM_NODE_PRIMARY]);
1015
1016 pdevice = vk_zalloc(&vk_instance->alloc,
1017 sizeof(*pdevice),
1018 8,
1019 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1020 if (!pdevice) {
1021 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1022 goto out_free_drm_devices;
1023 }
1024
1025 result = pvr_physical_device_init(pdevice,
1026 instance,
1027 drm_render_device,
1028 drm_display_device);
1029 if (result != VK_SUCCESS) {
1030 if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1031 result = VK_SUCCESS;
1032
1033 goto err_free_pdevice;
1034 }
1035
1036 if (PVR_IS_DEBUG_SET(INFO)) {
1037 pvr_physical_device_dump_info(
1038 pdevice,
1039 drm_display_device->deviceinfo.platform->compatible,
1040 drm_render_device->deviceinfo.platform->compatible);
1041 }
1042
1043 list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1044
1045 result = VK_SUCCESS;
1046 goto out_free_drm_devices;
1047
1048 err_free_pdevice:
1049 vk_free(&vk_instance->alloc, pdevice);
1050
1051 out_free_drm_devices:
1052 drmFreeDevices(drm_devices, num_drm_devices);
1053
1054 out_free_drm_device_ptrs:
1055 vk_free(&vk_instance->alloc, drm_devices);
1056
1057 out:
1058 return result;
1059 }
1060
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1061 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1062 const VkAllocationCallbacks *pAllocator,
1063 VkInstance *pInstance)
1064 {
1065 struct vk_instance_dispatch_table dispatch_table;
1066 struct pvr_instance *instance;
1067 VkResult result;
1068
1069 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1070
1071 if (!pAllocator)
1072 pAllocator = vk_default_allocator();
1073
1074 instance = vk_alloc(pAllocator,
1075 sizeof(*instance),
1076 8,
1077 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1078 if (!instance)
1079 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1080
1081 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1082 &pvr_instance_entrypoints,
1083 true);
1084
1085 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1086 &wsi_instance_entrypoints,
1087 false);
1088
1089 result = vk_instance_init(&instance->vk,
1090 &pvr_instance_extensions,
1091 &dispatch_table,
1092 pCreateInfo,
1093 pAllocator);
1094 if (result != VK_SUCCESS) {
1095 vk_free(pAllocator, instance);
1096 return result;
1097 }
1098
1099 pvr_process_debug_variable();
1100
1101 instance->active_device_count = 0;
1102
1103 instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1104 instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1105
1106 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1107
1108 *pInstance = pvr_instance_to_handle(instance);
1109
1110 return VK_SUCCESS;
1111 }
1112
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1113 static uint32_t pvr_get_simultaneous_num_allocs(
1114 const struct pvr_device_info *dev_info,
1115 ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1116 {
1117 uint32_t min_cluster_per_phantom;
1118
1119 if (PVR_HAS_FEATURE(dev_info, s8xe))
1120 return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1121
1122 assert(dev_runtime_info->num_phantoms == 1);
1123 min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1124
1125 if (min_cluster_per_phantom >= 4)
1126 return 1;
1127 else if (min_cluster_per_phantom == 2)
1128 return 2;
1129 else
1130 return 4;
1131 }
1132
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1133 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1134 const struct pvr_device_info *dev_info,
1135 const struct pvr_device_runtime_info *dev_runtime_info,
1136 uint32_t fs_common_size,
1137 uint32_t min_tiles_in_flight)
1138 {
1139 const uint32_t available_shareds =
1140 dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1141 const uint32_t max_tiles_in_flight =
1142 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1143 uint32_t num_tile_in_flight;
1144 uint32_t num_allocs;
1145
1146 if (fs_common_size == 0)
1147 return max_tiles_in_flight;
1148
1149 num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1150
1151 if (fs_common_size == UINT32_MAX) {
1152 uint32_t max_common_size = available_shareds;
1153
1154 num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1155
1156 if (!PVR_HAS_ERN(dev_info, 38748)) {
1157 /* Hardware needs space for one extra shared allocation. */
1158 num_allocs += 1;
1159 }
1160
1161 /* Double resource requirements to deal with fragmentation. */
1162 max_common_size /= num_allocs * 2;
1163 max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1164 max_common_size =
1165 ROUND_DOWN_TO(max_common_size,
1166 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1167
1168 return max_common_size;
1169 }
1170
1171 num_tile_in_flight = available_shareds / (fs_common_size * 2);
1172
1173 if (!PVR_HAS_ERN(dev_info, 38748))
1174 num_tile_in_flight -= 1;
1175
1176 num_tile_in_flight /= num_allocs;
1177
1178 #if MESA_DEBUG
1179 /* Validate the above result. */
1180
1181 assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1182 num_allocs *= num_tile_in_flight;
1183
1184 if (!PVR_HAS_ERN(dev_info, 38748)) {
1185 /* Hardware needs space for one extra shared allocation. */
1186 num_allocs += 1;
1187 }
1188
1189 assert(fs_common_size <= available_shareds / (num_allocs * 2));
1190 #endif
1191
1192 return MIN2(num_tile_in_flight, max_tiles_in_flight);
1193 }
1194
1195 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1196 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1197 VK_QUEUE_TRANSFER_BIT,
1198 .queueCount = PVR_MAX_QUEUES,
1199 .timestampValidBits = 0,
1200 .minImageTransferGranularity = { 1, 1, 1 },
1201 };
1202
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1203 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1204 {
1205 const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1206 const uint64_t heap_used = pdevice->heap_used;
1207 uint64_t sys_available = 0, heap_available;
1208 ASSERTED bool has_available_memory =
1209 os_get_available_system_memory(&sys_available);
1210 assert(has_available_memory);
1211
1212 /* Let's not incite the app to starve the system: report at most 90% of
1213 * available system memory.
1214 */
1215 heap_available = sys_available * 9 / 10;
1216 return MIN2(heap_size, heap_used + heap_available);
1217 }
1218
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1219 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1220 VkPhysicalDevice physicalDevice,
1221 uint32_t *pQueueFamilyPropertyCount,
1222 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1223 {
1224 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1225 out,
1226 pQueueFamilyProperties,
1227 pQueueFamilyPropertyCount);
1228
1229 vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1230 p->queueFamilyProperties = pvr_queue_family_properties;
1231
1232 vk_foreach_struct (ext, p->pNext) {
1233 vk_debug_ignored_stype(ext->sType);
1234 }
1235 }
1236 }
1237
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1238 void pvr_GetPhysicalDeviceMemoryProperties2(
1239 VkPhysicalDevice physicalDevice,
1240 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1241 {
1242 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1243
1244 pMemoryProperties->memoryProperties = pdevice->memory;
1245
1246 vk_foreach_struct (ext, pMemoryProperties->pNext) {
1247 switch (ext->sType) {
1248 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1249 VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1250 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1251
1252 pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1253 pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1254
1255 for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1256 pMemoryBudget->heapBudget[i] = 0u;
1257 pMemoryBudget->heapUsage[i] = 0u;
1258 }
1259 break;
1260 }
1261 default:
1262 vk_debug_ignored_stype(ext->sType);
1263 break;
1264 }
1265 }
1266 }
1267
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1268 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1269 const char *pName)
1270 {
1271 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1272 return vk_instance_get_proc_addr(&instance->vk,
1273 &pvr_instance_entrypoints,
1274 pName);
1275 }
1276
1277 /* With version 1+ of the loader interface the ICD should expose
1278 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1279 * apps.
1280 */
1281 PUBLIC
1282 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1283 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1284 {
1285 return pvr_GetInstanceProcAddr(instance, pName);
1286 }
1287
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1288 VkResult pvr_pds_compute_shader_create_and_upload(
1289 struct pvr_device *device,
1290 struct pvr_pds_compute_shader_program *program,
1291 struct pvr_pds_upload *const pds_upload_out)
1292 {
1293 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1294 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1295 size_t staging_buffer_size;
1296 uint32_t *staging_buffer;
1297 uint32_t *data_buffer;
1298 uint32_t *code_buffer;
1299 VkResult result;
1300
1301 /* Calculate how much space we'll need for the compute shader PDS program.
1302 */
1303 pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1304
1305 /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1306 * data size being in dwords.
1307 */
1308 /* Code size is in bytes, data size in dwords. */
1309 staging_buffer_size =
1310 PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1311
1312 staging_buffer = vk_alloc(&device->vk.alloc,
1313 staging_buffer_size,
1314 8U,
1315 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1316 if (!staging_buffer)
1317 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1318
1319 data_buffer = staging_buffer;
1320 code_buffer = pvr_pds_compute_shader(program,
1321 data_buffer,
1322 PDS_GENERATE_DATA_SEGMENT,
1323 dev_info);
1324
1325 pvr_pds_compute_shader(program,
1326 code_buffer,
1327 PDS_GENERATE_CODE_SEGMENT,
1328 dev_info);
1329
1330 result = pvr_gpu_upload_pds(device,
1331 data_buffer,
1332 program->data_size,
1333 PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1334 code_buffer,
1335 program->code_size / sizeof(uint32_t),
1336 PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1337 cache_line_size,
1338 pds_upload_out);
1339
1340 vk_free(&device->vk.alloc, staging_buffer);
1341
1342 return result;
1343 }
1344
pvr_device_init_compute_fence_program(struct pvr_device * device)1345 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1346 {
1347 struct pvr_pds_compute_shader_program program;
1348
1349 pvr_pds_compute_shader_program_init(&program);
1350 /* Fence kernel. */
1351 program.fence = true;
1352 program.clear_pds_barrier = true;
1353
1354 return pvr_pds_compute_shader_create_and_upload(
1355 device,
1356 &program,
1357 &device->pds_compute_fence_program);
1358 }
1359
pvr_device_init_compute_empty_program(struct pvr_device * device)1360 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1361 {
1362 struct pvr_pds_compute_shader_program program;
1363
1364 pvr_pds_compute_shader_program_init(&program);
1365 program.clear_pds_barrier = true;
1366
1367 return pvr_pds_compute_shader_create_and_upload(
1368 device,
1369 &program,
1370 &device->pds_compute_empty_program);
1371 }
1372
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1373 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1374 struct pvr_device *device,
1375 pvr_dev_addr_t usc_addr,
1376 uint32_t shareds,
1377 uint32_t temps,
1378 pvr_dev_addr_t shareds_buffer_addr,
1379 struct pvr_pds_upload *const upload_out,
1380 struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1381 {
1382 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1383 struct pvr_pds_vertex_shader_sa_program program = {
1384 .kick_usc = true,
1385 .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1386 };
1387 size_t staging_buffer_size;
1388 uint32_t *staging_buffer;
1389 VkResult result;
1390
1391 /* We'll need to DMA the shareds into the USC's Common Store. */
1392 program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1393 program.dma_address,
1394 0,
1395 shareds,
1396 shareds_buffer_addr.addr,
1397 false,
1398 dev_info);
1399
1400 /* DMA temp regs. */
1401 pvr_pds_setup_doutu(&program.usc_task_control,
1402 usc_addr.addr,
1403 temps,
1404 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1405 false);
1406
1407 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1408
1409 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1410
1411 staging_buffer = vk_alloc(&device->vk.alloc,
1412 staging_buffer_size,
1413 8,
1414 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1415 if (!staging_buffer)
1416 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1417
1418 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1419 pvr_pds_vertex_shader_sa(&program,
1420 staging_buffer,
1421 PDS_GENERATE_DATA_SEGMENT,
1422 dev_info);
1423 pvr_pds_vertex_shader_sa(&program,
1424 &staging_buffer[program.data_size],
1425 PDS_GENERATE_CODE_SEGMENT,
1426 dev_info);
1427
1428 /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1429 * is bigger so we handle it first (if needed) and realloc() for a smaller
1430 * size.
1431 */
1432 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1433 /* FIXME: Figure out the define for alignment of 16. */
1434 result = pvr_gpu_upload_pds(device,
1435 &staging_buffer[0],
1436 program.data_size,
1437 16,
1438 &staging_buffer[program.data_size],
1439 program.code_size,
1440 16,
1441 16,
1442 sw_compute_barrier_upload_out);
1443 if (result != VK_SUCCESS) {
1444 vk_free(&device->vk.alloc, staging_buffer);
1445 return result;
1446 }
1447
1448 program.clear_pds_barrier = false;
1449
1450 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1451
1452 staging_buffer_size =
1453 PVR_DW_TO_BYTES(program.code_size + program.data_size);
1454
1455 staging_buffer = vk_realloc(&device->vk.alloc,
1456 staging_buffer,
1457 staging_buffer_size,
1458 8,
1459 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1460 if (!staging_buffer) {
1461 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1462
1463 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1464 }
1465
1466 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1467 pvr_pds_vertex_shader_sa(&program,
1468 staging_buffer,
1469 PDS_GENERATE_DATA_SEGMENT,
1470 dev_info);
1471 pvr_pds_vertex_shader_sa(&program,
1472 &staging_buffer[program.data_size],
1473 PDS_GENERATE_CODE_SEGMENT,
1474 dev_info);
1475 } else {
1476 *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1477 .pvr_bo = NULL,
1478 };
1479 }
1480
1481 /* FIXME: Figure out the define for alignment of 16. */
1482 result = pvr_gpu_upload_pds(device,
1483 &staging_buffer[0],
1484 program.data_size,
1485 16,
1486 &staging_buffer[program.data_size],
1487 program.code_size,
1488 16,
1489 16,
1490 upload_out);
1491 if (result != VK_SUCCESS) {
1492 vk_free(&device->vk.alloc, staging_buffer);
1493 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1494
1495 return result;
1496 }
1497
1498 vk_free(&device->vk.alloc, staging_buffer);
1499
1500 return VK_SUCCESS;
1501 }
1502
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1503 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1504 {
1505 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1506 uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1507 struct util_dynarray usc_program;
1508 struct pvr_texture_state_info tex_info;
1509 uint32_t *dword_ptr;
1510 uint32_t usc_shareds;
1511 uint32_t usc_temps;
1512 VkResult result;
1513
1514 util_dynarray_init(&usc_program, NULL);
1515 pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1516 &usc_program,
1517 &usc_shareds,
1518 &usc_temps);
1519
1520 device->idfwdf_state.usc_shareds = usc_shareds;
1521
1522 /* FIXME: Figure out the define for alignment of 16. */
1523 result = pvr_gpu_upload_usc(device,
1524 usc_program.data,
1525 usc_program.size,
1526 16,
1527 &device->idfwdf_state.usc);
1528 util_dynarray_fini(&usc_program);
1529
1530 if (result != VK_SUCCESS)
1531 return result;
1532
1533 /* TODO: Get the store buffer size from the compiler? */
1534 /* TODO: How was the size derived here? */
1535 result = pvr_bo_alloc(device,
1536 device->heaps.general_heap,
1537 4 * sizeof(float) * 4 * 2,
1538 4,
1539 0,
1540 &device->idfwdf_state.store_bo);
1541 if (result != VK_SUCCESS)
1542 goto err_free_usc_program;
1543
1544 result = pvr_bo_alloc(device,
1545 device->heaps.general_heap,
1546 usc_shareds * ROGUE_REG_SIZE_BYTES,
1547 ROGUE_REG_SIZE_BYTES,
1548 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1549 &device->idfwdf_state.shareds_bo);
1550 if (result != VK_SUCCESS)
1551 goto err_free_store_buffer;
1552
1553 /* Pack state words. */
1554
1555 pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1556 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1557 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1558 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1559 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1560 }
1561
1562 /* clang-format off */
1563 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1564 /* clang-format on */
1565
1566 STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1567
1568 tex_info = (struct pvr_texture_state_info){
1569 .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1570 .mem_layout = PVR_MEMLAYOUT_LINEAR,
1571 .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1572 .type = VK_IMAGE_VIEW_TYPE_2D,
1573 .extent = { .width = 4, .height = 2, .depth = 0 },
1574 .mip_levels = 1,
1575 .sample_count = 1,
1576 .stride = 4,
1577 .swizzle = { PIPE_SWIZZLE_X,
1578 PIPE_SWIZZLE_Y,
1579 PIPE_SWIZZLE_Z,
1580 PIPE_SWIZZLE_W },
1581 .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1582 };
1583
1584 result = pvr_pack_tex_state(device, &tex_info, image_state);
1585 if (result != VK_SUCCESS)
1586 goto err_free_shareds_buffer;
1587
1588 /* Fill the shareds buffer. */
1589
1590 dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1591
1592 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1593 #define LOW_32(val) ((uint32_t)(val))
1594
1595 /* TODO: Should we use compiler info to setup the shareds data instead of
1596 * assuming there's always 12 and this is how they should be setup?
1597 */
1598
1599 dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1600 dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1601
1602 /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1603 dword_ptr[2] = 0U;
1604 dword_ptr[3] = 0U;
1605
1606 dword_ptr[4] = LOW_32(image_state[0]);
1607 dword_ptr[5] = HIGH_32(image_state[0]);
1608 dword_ptr[6] = LOW_32(image_state[1]);
1609 dword_ptr[7] = HIGH_32(image_state[1]);
1610
1611 dword_ptr[8] = LOW_32(sampler_state[0]);
1612 dword_ptr[9] = HIGH_32(sampler_state[0]);
1613 dword_ptr[10] = LOW_32(sampler_state[1]);
1614 dword_ptr[11] = HIGH_32(sampler_state[1]);
1615 assert(11 + 1 == usc_shareds);
1616
1617 #undef HIGH_32
1618 #undef LOW_32
1619
1620 pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1621 dword_ptr = NULL;
1622
1623 /* Generate and upload PDS programs. */
1624 result = pvr_pds_idfwdf_programs_create_and_upload(
1625 device,
1626 device->idfwdf_state.usc->dev_addr,
1627 usc_shareds,
1628 usc_temps,
1629 device->idfwdf_state.shareds_bo->vma->dev_addr,
1630 &device->idfwdf_state.pds,
1631 &device->idfwdf_state.sw_compute_barrier_pds);
1632 if (result != VK_SUCCESS)
1633 goto err_free_shareds_buffer;
1634
1635 return VK_SUCCESS;
1636
1637 err_free_shareds_buffer:
1638 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1639
1640 err_free_store_buffer:
1641 pvr_bo_free(device, device->idfwdf_state.store_bo);
1642
1643 err_free_usc_program:
1644 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1645
1646 return result;
1647 }
1648
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1649 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1650 {
1651 pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1652 pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1653 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1654 pvr_bo_free(device, device->idfwdf_state.store_bo);
1655 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1656 }
1657
1658 /* FIXME: We should be calculating the size when we upload the code in
1659 * pvr_srv_setup_static_pixel_event_program().
1660 */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1661 static void pvr_device_get_pixel_event_pds_program_data_size(
1662 const struct pvr_device_info *dev_info,
1663 uint32_t *const data_size_in_dwords_out)
1664 {
1665 struct pvr_pds_event_program program = {
1666 /* No data to DMA, just a DOUTU needed. */
1667 .num_emit_word_pairs = 0,
1668 };
1669
1670 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1671
1672 *data_size_in_dwords_out = program.data_size;
1673 }
1674
pvr_device_init_nop_program(struct pvr_device * device)1675 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1676 {
1677 const uint32_t cache_line_size =
1678 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1679 struct pvr_pds_kickusc_program program = { 0 };
1680 struct util_dynarray nop_usc_bin;
1681 uint32_t staging_buffer_size;
1682 uint32_t *staging_buffer;
1683 VkResult result;
1684
1685 pvr_uscgen_nop(&nop_usc_bin);
1686
1687 result = pvr_gpu_upload_usc(device,
1688 util_dynarray_begin(&nop_usc_bin),
1689 nop_usc_bin.size,
1690 cache_line_size,
1691 &device->nop_program.usc);
1692 util_dynarray_fini(&nop_usc_bin);
1693 if (result != VK_SUCCESS)
1694 return result;
1695
1696 /* Setup a PDS program that kicks the static USC program. */
1697 pvr_pds_setup_doutu(&program.usc_task_control,
1698 device->nop_program.usc->dev_addr.addr,
1699 0U,
1700 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1701 false);
1702
1703 pvr_pds_set_sizes_pixel_shader(&program);
1704
1705 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1706
1707 staging_buffer = vk_alloc(&device->vk.alloc,
1708 staging_buffer_size,
1709 8U,
1710 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1711 if (!staging_buffer) {
1712 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1713 goto err_free_nop_usc_bo;
1714 }
1715
1716 pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1717
1718 /* FIXME: Figure out the define for alignment of 16. */
1719 result = pvr_gpu_upload_pds(device,
1720 staging_buffer,
1721 program.data_size,
1722 16U,
1723 &staging_buffer[program.data_size],
1724 program.code_size,
1725 16U,
1726 16U,
1727 &device->nop_program.pds);
1728 if (result != VK_SUCCESS)
1729 goto err_free_staging_buffer;
1730
1731 vk_free(&device->vk.alloc, staging_buffer);
1732
1733 return VK_SUCCESS;
1734
1735 err_free_staging_buffer:
1736 vk_free(&device->vk.alloc, staging_buffer);
1737
1738 err_free_nop_usc_bo:
1739 pvr_bo_suballoc_free(device->nop_program.usc);
1740
1741 return result;
1742 }
1743
pvr_device_init_tile_buffer_state(struct pvr_device * device)1744 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1745 {
1746 simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1747
1748 for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1749 device->tile_buffer_state.buffers[i] = NULL;
1750
1751 device->tile_buffer_state.buffer_count = 0;
1752 }
1753
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1754 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1755 {
1756 /* Destroy the mutex first to trigger asserts in case it's still locked so
1757 * that we don't put things in an inconsistent state by freeing buffers that
1758 * might be in use or attempt to free buffers while new buffers are being
1759 * allocated.
1760 */
1761 simple_mtx_destroy(&device->tile_buffer_state.mtx);
1762
1763 for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1764 pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1765 }
1766
1767 /**
1768 * \brief Ensures that a certain amount of tile buffers are allocated.
1769 *
1770 * Make sure that \p capacity amount of tile buffers are allocated. If less were
1771 * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1772 */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1773 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1774 uint32_t capacity,
1775 uint32_t size_in_bytes)
1776 {
1777 struct pvr_device_tile_buffer_state *tile_buffer_state =
1778 &device->tile_buffer_state;
1779 const uint32_t cache_line_size =
1780 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1781 VkResult result;
1782
1783 simple_mtx_lock(&tile_buffer_state->mtx);
1784
1785 /* Clamping in release and asserting in debug. */
1786 assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1787 capacity = CLAMP(capacity,
1788 tile_buffer_state->buffer_count,
1789 ARRAY_SIZE(tile_buffer_state->buffers));
1790
1791 /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1792 * allocations.
1793 */
1794 for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1795 result = pvr_bo_alloc(device,
1796 device->heaps.general_heap,
1797 size_in_bytes,
1798 cache_line_size,
1799 0,
1800 &tile_buffer_state->buffers[i]);
1801 if (result != VK_SUCCESS) {
1802 for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1803 pvr_bo_free(device, tile_buffer_state->buffers[j]);
1804
1805 goto err_release_lock;
1806 }
1807 }
1808
1809 tile_buffer_state->buffer_count = capacity;
1810
1811 simple_mtx_unlock(&tile_buffer_state->mtx);
1812
1813 return VK_SUCCESS;
1814
1815 err_release_lock:
1816 simple_mtx_unlock(&tile_buffer_state->mtx);
1817
1818 return result;
1819 }
1820
pvr_device_init_default_sampler_state(struct pvr_device * device)1821 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1822 {
1823 pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1824 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1825 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1826 sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1827 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1828 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1829 sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1830 sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1831 sampler.non_normalized_coords = true;
1832 }
1833 }
1834
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1835 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1836 const VkDeviceCreateInfo *pCreateInfo,
1837 const VkAllocationCallbacks *pAllocator,
1838 VkDevice *pDevice)
1839 {
1840 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1841 uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1842 struct pvr_instance *instance = pdevice->instance;
1843 struct vk_device_dispatch_table dispatch_table;
1844 struct pvr_device *device;
1845 struct pvr_winsys *ws;
1846 VkResult result;
1847
1848 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1849
1850 result = pvr_winsys_create(pdevice->render_path,
1851 pdevice->display_path,
1852 pAllocator ? pAllocator : &instance->vk.alloc,
1853 &ws);
1854 if (result != VK_SUCCESS)
1855 goto err_out;
1856
1857 device = vk_alloc2(&instance->vk.alloc,
1858 pAllocator,
1859 sizeof(*device),
1860 8,
1861 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1862 if (!device) {
1863 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1864 goto err_pvr_winsys_destroy;
1865 }
1866
1867 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1868 &pvr_device_entrypoints,
1869 true);
1870
1871 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1872 &wsi_device_entrypoints,
1873 false);
1874
1875 result = vk_device_init(&device->vk,
1876 &pdevice->vk,
1877 &dispatch_table,
1878 pCreateInfo,
1879 pAllocator);
1880 if (result != VK_SUCCESS)
1881 goto err_free_device;
1882
1883 device->instance = instance;
1884 device->pdevice = pdevice;
1885 device->ws = ws;
1886
1887 vk_device_set_drm_fd(&device->vk, ws->render_fd);
1888
1889 if (ws->features.supports_threaded_submit) {
1890 /* Queue submission can be blocked if the kernel CCBs become full,
1891 * so enable threaded submit to not block the submitter.
1892 */
1893 vk_device_enable_threaded_submit(&device->vk);
1894 }
1895
1896 ws->ops->get_heaps_info(ws, &device->heaps);
1897
1898 result = pvr_bo_store_create(device);
1899 if (result != VK_SUCCESS)
1900 goto err_vk_device_finish;
1901
1902 pvr_bo_suballocator_init(&device->suballoc_general,
1903 device->heaps.general_heap,
1904 device,
1905 PVR_SUBALLOCATOR_GENERAL_SIZE);
1906 pvr_bo_suballocator_init(&device->suballoc_pds,
1907 device->heaps.pds_heap,
1908 device,
1909 PVR_SUBALLOCATOR_PDS_SIZE);
1910 pvr_bo_suballocator_init(&device->suballoc_transfer,
1911 device->heaps.transfer_frag_heap,
1912 device,
1913 PVR_SUBALLOCATOR_TRANSFER_SIZE);
1914 pvr_bo_suballocator_init(&device->suballoc_usc,
1915 device->heaps.usc_heap,
1916 device,
1917 PVR_SUBALLOCATOR_USC_SIZE);
1918 pvr_bo_suballocator_init(&device->suballoc_vis_test,
1919 device->heaps.vis_test_heap,
1920 device,
1921 PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1922
1923 if (p_atomic_inc_return(&instance->active_device_count) >
1924 PVR_SECONDARY_DEVICE_THRESHOLD) {
1925 initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1926 }
1927
1928 result = pvr_free_list_create(device,
1929 initial_free_list_size,
1930 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1931 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1932 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1933 NULL /* parent_free_list */,
1934 &device->global_free_list);
1935 if (result != VK_SUCCESS)
1936 goto err_dec_device_count;
1937
1938 result = pvr_device_init_nop_program(device);
1939 if (result != VK_SUCCESS)
1940 goto err_pvr_free_list_destroy;
1941
1942 result = pvr_device_init_compute_fence_program(device);
1943 if (result != VK_SUCCESS)
1944 goto err_pvr_free_nop_program;
1945
1946 result = pvr_device_init_compute_empty_program(device);
1947 if (result != VK_SUCCESS)
1948 goto err_pvr_free_compute_fence;
1949
1950 result = pvr_device_create_compute_query_programs(device);
1951 if (result != VK_SUCCESS)
1952 goto err_pvr_free_compute_empty;
1953
1954 result = pvr_device_init_compute_idfwdf_state(device);
1955 if (result != VK_SUCCESS)
1956 goto err_pvr_destroy_compute_query_programs;
1957
1958 result = pvr_device_init_graphics_static_clear_state(device);
1959 if (result != VK_SUCCESS)
1960 goto err_pvr_finish_compute_idfwdf;
1961
1962 result = pvr_device_init_spm_load_state(device);
1963 if (result != VK_SUCCESS)
1964 goto err_pvr_finish_graphics_static_clear_state;
1965
1966 pvr_device_init_tile_buffer_state(device);
1967
1968 result = pvr_queues_create(device, pCreateInfo);
1969 if (result != VK_SUCCESS)
1970 goto err_pvr_finish_tile_buffer_state;
1971
1972 pvr_device_init_default_sampler_state(device);
1973
1974 pvr_spm_init_scratch_buffer_store(device);
1975
1976 result = pvr_init_robustness_buffer(device);
1977 if (result != VK_SUCCESS)
1978 goto err_pvr_spm_finish_scratch_buffer_store;
1979
1980 result = pvr_border_color_table_init(&device->border_color_table, device);
1981 if (result != VK_SUCCESS)
1982 goto err_pvr_robustness_buffer_finish;
1983
1984 /* FIXME: Move this to a later stage and possibly somewhere other than
1985 * pvr_device. The purpose of this is so that we don't have to get the size
1986 * on each kick.
1987 */
1988 pvr_device_get_pixel_event_pds_program_data_size(
1989 &pdevice->dev_info,
1990 &device->pixel_event_data_size_in_dwords);
1991
1992 device->global_cmd_buffer_submit_count = 0;
1993 device->global_queue_present_count = 0;
1994
1995 *pDevice = pvr_device_to_handle(device);
1996
1997 return VK_SUCCESS;
1998
1999 err_pvr_robustness_buffer_finish:
2000 pvr_robustness_buffer_finish(device);
2001
2002 err_pvr_spm_finish_scratch_buffer_store:
2003 pvr_spm_finish_scratch_buffer_store(device);
2004
2005 pvr_queues_destroy(device);
2006
2007 err_pvr_finish_tile_buffer_state:
2008 pvr_device_finish_tile_buffer_state(device);
2009 pvr_device_finish_spm_load_state(device);
2010
2011 err_pvr_finish_graphics_static_clear_state:
2012 pvr_device_finish_graphics_static_clear_state(device);
2013
2014 err_pvr_finish_compute_idfwdf:
2015 pvr_device_finish_compute_idfwdf_state(device);
2016
2017 err_pvr_destroy_compute_query_programs:
2018 pvr_device_destroy_compute_query_programs(device);
2019
2020 err_pvr_free_compute_empty:
2021 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2022
2023 err_pvr_free_compute_fence:
2024 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2025
2026 err_pvr_free_nop_program:
2027 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2028 pvr_bo_suballoc_free(device->nop_program.usc);
2029
2030 err_pvr_free_list_destroy:
2031 pvr_free_list_destroy(device->global_free_list);
2032
2033 err_dec_device_count:
2034 p_atomic_dec(&device->instance->active_device_count);
2035
2036 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2037 pvr_bo_suballocator_fini(&device->suballoc_usc);
2038 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2039 pvr_bo_suballocator_fini(&device->suballoc_pds);
2040 pvr_bo_suballocator_fini(&device->suballoc_general);
2041
2042 pvr_bo_store_destroy(device);
2043
2044 err_vk_device_finish:
2045 vk_device_finish(&device->vk);
2046
2047 err_free_device:
2048 vk_free(&device->vk.alloc, device);
2049
2050 err_pvr_winsys_destroy:
2051 pvr_winsys_destroy(ws);
2052
2053 err_out:
2054 return result;
2055 }
2056
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2057 void pvr_DestroyDevice(VkDevice _device,
2058 const VkAllocationCallbacks *pAllocator)
2059 {
2060 PVR_FROM_HANDLE(pvr_device, device, _device);
2061
2062 if (!device)
2063 return;
2064
2065 pvr_border_color_table_finish(&device->border_color_table, device);
2066 pvr_robustness_buffer_finish(device);
2067 pvr_spm_finish_scratch_buffer_store(device);
2068 pvr_queues_destroy(device);
2069 pvr_device_finish_tile_buffer_state(device);
2070 pvr_device_finish_spm_load_state(device);
2071 pvr_device_finish_graphics_static_clear_state(device);
2072 pvr_device_finish_compute_idfwdf_state(device);
2073 pvr_device_destroy_compute_query_programs(device);
2074 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2075 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2076 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2077 pvr_bo_suballoc_free(device->nop_program.usc);
2078 pvr_free_list_destroy(device->global_free_list);
2079 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2080 pvr_bo_suballocator_fini(&device->suballoc_usc);
2081 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2082 pvr_bo_suballocator_fini(&device->suballoc_pds);
2083 pvr_bo_suballocator_fini(&device->suballoc_general);
2084 pvr_bo_store_destroy(device);
2085 pvr_winsys_destroy(device->ws);
2086 p_atomic_dec(&device->instance->active_device_count);
2087 vk_device_finish(&device->vk);
2088 vk_free(&device->vk.alloc, device);
2089 }
2090
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2091 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2092 VkLayerProperties *pProperties)
2093 {
2094 if (!pProperties) {
2095 *pPropertyCount = 0;
2096 return VK_SUCCESS;
2097 }
2098
2099 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2100 }
2101
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2102 static void free_memory(struct pvr_device *device,
2103 struct pvr_device_memory *mem,
2104 const VkAllocationCallbacks *pAllocator)
2105 {
2106 if (!mem)
2107 return;
2108
2109 /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2110 * If a memory object is mapped at the time it is freed, it is implicitly
2111 * unmapped.
2112 */
2113 if (mem->bo->map)
2114 device->ws->ops->buffer_unmap(mem->bo);
2115
2116 p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2117
2118 device->ws->ops->buffer_destroy(mem->bo);
2119
2120 vk_object_free(&device->vk, pAllocator, mem);
2121 }
2122
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2123 VkResult pvr_AllocateMemory(VkDevice _device,
2124 const VkMemoryAllocateInfo *pAllocateInfo,
2125 const VkAllocationCallbacks *pAllocator,
2126 VkDeviceMemory *pMem)
2127 {
2128 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2129 PVR_FROM_HANDLE(pvr_device, device, _device);
2130 enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2131 struct pvr_device_memory *mem;
2132 uint64_t heap_used;
2133 VkResult result;
2134
2135 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2136 assert(pAllocateInfo->allocationSize > 0);
2137
2138 mem = vk_object_alloc(&device->vk,
2139 pAllocator,
2140 sizeof(*mem),
2141 VK_OBJECT_TYPE_DEVICE_MEMORY);
2142 if (!mem)
2143 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2144
2145 vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2146 switch ((unsigned)ext->sType) {
2147 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2148 if (device->ws->display_fd >= 0)
2149 type = PVR_WINSYS_BO_TYPE_DISPLAY;
2150 break;
2151 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2152 fd_info = (void *)ext;
2153 break;
2154 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2155 break;
2156 default:
2157 vk_debug_ignored_stype(ext->sType);
2158 break;
2159 }
2160 }
2161
2162 if (fd_info && fd_info->handleType) {
2163 VkDeviceSize aligned_alloc_size =
2164 ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2165
2166 assert(
2167 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2168 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2169
2170 result = device->ws->ops->buffer_create_from_fd(device->ws,
2171 fd_info->fd,
2172 &mem->bo);
2173 if (result != VK_SUCCESS)
2174 goto err_vk_object_free_mem;
2175
2176 /* For security purposes, we reject importing the bo if it's smaller
2177 * than the requested allocation size. This prevents a malicious client
2178 * from passing a buffer to a trusted client, lying about the size, and
2179 * telling the trusted client to try and texture from an image that goes
2180 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2181 * in the trusted client. The trusted client can protect itself against
2182 * this sort of attack but only if it can trust the buffer size.
2183 */
2184 if (aligned_alloc_size > mem->bo->size) {
2185 result = vk_errorf(device,
2186 VK_ERROR_INVALID_EXTERNAL_HANDLE,
2187 "Aligned requested size too large for the given fd "
2188 "%" PRIu64 "B > %" PRIu64 "B",
2189 pAllocateInfo->allocationSize,
2190 mem->bo->size);
2191 device->ws->ops->buffer_destroy(mem->bo);
2192 goto err_vk_object_free_mem;
2193 }
2194
2195 /* From the Vulkan spec:
2196 *
2197 * "Importing memory from a file descriptor transfers ownership of
2198 * the file descriptor from the application to the Vulkan
2199 * implementation. The application must not perform any operations on
2200 * the file descriptor after a successful import."
2201 *
2202 * If the import fails, we leave the file descriptor open.
2203 */
2204 close(fd_info->fd);
2205 } else {
2206 /* Align physical allocations to the page size of the heap that will be
2207 * used when binding device memory (see pvr_bind_memory()) to ensure the
2208 * entire allocation can be mapped.
2209 */
2210 const uint64_t alignment = device->heaps.general_heap->page_size;
2211
2212 /* FIXME: Need to determine the flags based on
2213 * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2214 *
2215 * The alternative would be to store the flags alongside the memory
2216 * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2217 * that they can be looked up.
2218 */
2219 result = device->ws->ops->buffer_create(device->ws,
2220 pAllocateInfo->allocationSize,
2221 alignment,
2222 type,
2223 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2224 &mem->bo);
2225 if (result != VK_SUCCESS)
2226 goto err_vk_object_free_mem;
2227 }
2228
2229 heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2230 if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2231 free_memory(device, mem, pAllocator);
2232 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2233 }
2234
2235 *pMem = pvr_device_memory_to_handle(mem);
2236
2237 return VK_SUCCESS;
2238
2239 err_vk_object_free_mem:
2240 vk_object_free(&device->vk, pAllocator, mem);
2241
2242 return result;
2243 }
2244
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2245 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2246 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2247 int *pFd)
2248 {
2249 PVR_FROM_HANDLE(pvr_device, device, _device);
2250 PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2251
2252 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2253
2254 assert(
2255 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2256 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2257
2258 return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2259 }
2260
2261 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2262 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2263 VkExternalMemoryHandleTypeFlagBits handleType,
2264 int fd,
2265 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2266 {
2267 PVR_FROM_HANDLE(pvr_device, device, _device);
2268
2269 switch (handleType) {
2270 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2271 /* FIXME: This should only allow memory types having
2272 * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2273 * dma-buf should be imported using cacheable memory types,
2274 * given exporter's mmap will always map it as cacheable.
2275 * Ref:
2276 * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2277 */
2278 pMemoryFdProperties->memoryTypeBits =
2279 (1 << device->pdevice->memory.memoryTypeCount) - 1;
2280 return VK_SUCCESS;
2281 default:
2282 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2283 }
2284 }
2285
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2286 void pvr_FreeMemory(VkDevice _device,
2287 VkDeviceMemory _mem,
2288 const VkAllocationCallbacks *pAllocator)
2289 {
2290 PVR_FROM_HANDLE(pvr_device, device, _device);
2291 PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2292
2293 free_memory(device, mem, pAllocator);
2294 }
2295
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2296 VkResult pvr_MapMemory(VkDevice _device,
2297 VkDeviceMemory _memory,
2298 VkDeviceSize offset,
2299 VkDeviceSize size,
2300 VkMemoryMapFlags flags,
2301 void **ppData)
2302 {
2303 PVR_FROM_HANDLE(pvr_device, device, _device);
2304 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2305 VkResult result;
2306
2307 if (!mem) {
2308 *ppData = NULL;
2309 return VK_SUCCESS;
2310 }
2311
2312 if (size == VK_WHOLE_SIZE)
2313 size = mem->bo->size - offset;
2314
2315 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2316 *
2317 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2318 * assert(size != 0);
2319 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2320 * equal to the size of the memory minus offset
2321 */
2322
2323 assert(size > 0);
2324 assert(offset + size <= mem->bo->size);
2325
2326 /* Check if already mapped */
2327 if (mem->bo->map) {
2328 *ppData = (uint8_t *)mem->bo->map + offset;
2329 return VK_SUCCESS;
2330 }
2331
2332 /* Map it all at once */
2333 result = device->ws->ops->buffer_map(mem->bo);
2334 if (result != VK_SUCCESS)
2335 return result;
2336
2337 *ppData = (uint8_t *)mem->bo->map + offset;
2338
2339 return VK_SUCCESS;
2340 }
2341
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2342 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2343 {
2344 PVR_FROM_HANDLE(pvr_device, device, _device);
2345 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2346
2347 if (!mem || !mem->bo->map)
2348 return;
2349
2350 device->ws->ops->buffer_unmap(mem->bo);
2351 }
2352
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2353 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2354 uint32_t memoryRangeCount,
2355 const VkMappedMemoryRange *pMemoryRanges)
2356 {
2357 return VK_SUCCESS;
2358 }
2359
2360 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2361 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2362 uint32_t memoryRangeCount,
2363 const VkMappedMemoryRange *pMemoryRanges)
2364 {
2365 return VK_SUCCESS;
2366 }
2367
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2368 void pvr_GetImageSparseMemoryRequirements2(
2369 VkDevice device,
2370 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2371 uint32_t *pSparseMemoryRequirementCount,
2372 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2373 {
2374 *pSparseMemoryRequirementCount = 0;
2375 }
2376
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2377 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2378 VkDeviceMemory memory,
2379 VkDeviceSize *pCommittedMemoryInBytes)
2380 {
2381 *pCommittedMemoryInBytes = 0;
2382 }
2383
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2384 VkResult pvr_bind_memory(struct pvr_device *device,
2385 struct pvr_device_memory *mem,
2386 VkDeviceSize offset,
2387 VkDeviceSize size,
2388 VkDeviceSize alignment,
2389 struct pvr_winsys_vma **const vma_out,
2390 pvr_dev_addr_t *const dev_addr_out)
2391 {
2392 VkDeviceSize virt_size =
2393 size + (offset & (device->heaps.general_heap->page_size - 1));
2394 struct pvr_winsys_vma *vma;
2395 pvr_dev_addr_t dev_addr;
2396 VkResult result;
2397
2398 /* Valid usage:
2399 *
2400 * "memoryOffset must be an integer multiple of the alignment member of
2401 * the VkMemoryRequirements structure returned from a call to
2402 * vkGetBufferMemoryRequirements with buffer"
2403 *
2404 * "memoryOffset must be an integer multiple of the alignment member of
2405 * the VkMemoryRequirements structure returned from a call to
2406 * vkGetImageMemoryRequirements with image"
2407 */
2408 assert(offset % alignment == 0);
2409 assert(offset < mem->bo->size);
2410
2411 result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2412 virt_size,
2413 alignment,
2414 &vma);
2415 if (result != VK_SUCCESS)
2416 goto err_out;
2417
2418 result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2419 if (result != VK_SUCCESS)
2420 goto err_free_vma;
2421
2422 *dev_addr_out = dev_addr;
2423 *vma_out = vma;
2424
2425 return VK_SUCCESS;
2426
2427 err_free_vma:
2428 device->ws->ops->heap_free(vma);
2429
2430 err_out:
2431 return result;
2432 }
2433
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2434 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2435 {
2436 device->ws->ops->vma_unmap(vma);
2437 device->ws->ops->heap_free(vma);
2438 }
2439
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2440 VkResult pvr_BindBufferMemory2(VkDevice _device,
2441 uint32_t bindInfoCount,
2442 const VkBindBufferMemoryInfo *pBindInfos)
2443 {
2444 PVR_FROM_HANDLE(pvr_device, device, _device);
2445 uint32_t i;
2446
2447 for (i = 0; i < bindInfoCount; i++) {
2448 PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2449 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2450
2451 VkResult result = pvr_bind_memory(device,
2452 mem,
2453 pBindInfos[i].memoryOffset,
2454 buffer->vk.size,
2455 buffer->alignment,
2456 &buffer->vma,
2457 &buffer->dev_addr);
2458 if (result != VK_SUCCESS) {
2459 while (i--) {
2460 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2461 pvr_unbind_memory(device, buffer->vma);
2462 }
2463
2464 return result;
2465 }
2466 }
2467
2468 return VK_SUCCESS;
2469 }
2470
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2471 VkResult pvr_QueueBindSparse(VkQueue _queue,
2472 uint32_t bindInfoCount,
2473 const VkBindSparseInfo *pBindInfo,
2474 VkFence fence)
2475 {
2476 return VK_SUCCESS;
2477 }
2478
2479 /* Event functions. */
2480
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2481 VkResult pvr_CreateEvent(VkDevice _device,
2482 const VkEventCreateInfo *pCreateInfo,
2483 const VkAllocationCallbacks *pAllocator,
2484 VkEvent *pEvent)
2485 {
2486 PVR_FROM_HANDLE(pvr_device, device, _device);
2487
2488 struct pvr_event *event = vk_object_alloc(&device->vk,
2489 pAllocator,
2490 sizeof(*event),
2491 VK_OBJECT_TYPE_EVENT);
2492 if (!event)
2493 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2494
2495 event->sync = NULL;
2496 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2497
2498 *pEvent = pvr_event_to_handle(event);
2499
2500 return VK_SUCCESS;
2501 }
2502
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2503 void pvr_DestroyEvent(VkDevice _device,
2504 VkEvent _event,
2505 const VkAllocationCallbacks *pAllocator)
2506 {
2507 PVR_FROM_HANDLE(pvr_device, device, _device);
2508 PVR_FROM_HANDLE(pvr_event, event, _event);
2509
2510 if (!event)
2511 return;
2512
2513 if (event->sync)
2514 vk_sync_destroy(&device->vk, event->sync);
2515
2516 vk_object_free(&device->vk, pAllocator, event);
2517 }
2518
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2519 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2520 {
2521 PVR_FROM_HANDLE(pvr_device, device, _device);
2522 PVR_FROM_HANDLE(pvr_event, event, _event);
2523 VkResult result;
2524
2525 switch (event->state) {
2526 case PVR_EVENT_STATE_SET_BY_DEVICE:
2527 if (!event->sync)
2528 return VK_EVENT_RESET;
2529
2530 result =
2531 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2532 result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2533 break;
2534
2535 case PVR_EVENT_STATE_RESET_BY_DEVICE:
2536 if (!event->sync)
2537 return VK_EVENT_RESET;
2538
2539 result =
2540 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2541 result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2542 break;
2543
2544 case PVR_EVENT_STATE_SET_BY_HOST:
2545 result = VK_EVENT_SET;
2546 break;
2547
2548 case PVR_EVENT_STATE_RESET_BY_HOST:
2549 result = VK_EVENT_RESET;
2550 break;
2551
2552 default:
2553 unreachable("Event object in unknown state");
2554 }
2555
2556 return result;
2557 }
2558
pvr_SetEvent(VkDevice _device,VkEvent _event)2559 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2560 {
2561 PVR_FROM_HANDLE(pvr_event, event, _event);
2562
2563 if (event->sync) {
2564 PVR_FROM_HANDLE(pvr_device, device, _device);
2565
2566 const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2567 if (result != VK_SUCCESS)
2568 return result;
2569 }
2570
2571 event->state = PVR_EVENT_STATE_SET_BY_HOST;
2572
2573 return VK_SUCCESS;
2574 }
2575
pvr_ResetEvent(VkDevice _device,VkEvent _event)2576 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2577 {
2578 PVR_FROM_HANDLE(pvr_event, event, _event);
2579
2580 if (event->sync) {
2581 PVR_FROM_HANDLE(pvr_device, device, _device);
2582
2583 const VkResult result = vk_sync_reset(&device->vk, event->sync);
2584 if (result != VK_SUCCESS)
2585 return result;
2586 }
2587
2588 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2589
2590 return VK_SUCCESS;
2591 }
2592
2593 /* Buffer functions. */
2594
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2595 VkResult pvr_CreateBuffer(VkDevice _device,
2596 const VkBufferCreateInfo *pCreateInfo,
2597 const VkAllocationCallbacks *pAllocator,
2598 VkBuffer *pBuffer)
2599 {
2600 PVR_FROM_HANDLE(pvr_device, device, _device);
2601 const uint32_t alignment = 4096;
2602 struct pvr_buffer *buffer;
2603
2604 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2605 assert(pCreateInfo->usage != 0);
2606
2607 /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2608 if (pCreateInfo->size >= ULONG_MAX - alignment)
2609 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2610
2611 buffer =
2612 vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2613 if (!buffer)
2614 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2615
2616 buffer->alignment = alignment;
2617
2618 *pBuffer = pvr_buffer_to_handle(buffer);
2619
2620 return VK_SUCCESS;
2621 }
2622
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2623 void pvr_DestroyBuffer(VkDevice _device,
2624 VkBuffer _buffer,
2625 const VkAllocationCallbacks *pAllocator)
2626 {
2627 PVR_FROM_HANDLE(pvr_device, device, _device);
2628 PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2629
2630 if (!buffer)
2631 return;
2632
2633 if (buffer->vma)
2634 pvr_unbind_memory(device, buffer->vma);
2635
2636 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2637 }
2638
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2639 VkResult pvr_gpu_upload(struct pvr_device *device,
2640 struct pvr_winsys_heap *heap,
2641 const void *data,
2642 size_t size,
2643 uint64_t alignment,
2644 struct pvr_suballoc_bo **const pvr_bo_out)
2645 {
2646 struct pvr_suballoc_bo *suballoc_bo = NULL;
2647 struct pvr_suballocator *allocator;
2648 VkResult result;
2649 void *map;
2650
2651 assert(size > 0);
2652
2653 if (heap == device->heaps.general_heap)
2654 allocator = &device->suballoc_general;
2655 else if (heap == device->heaps.pds_heap)
2656 allocator = &device->suballoc_pds;
2657 else if (heap == device->heaps.transfer_frag_heap)
2658 allocator = &device->suballoc_transfer;
2659 else if (heap == device->heaps.usc_heap)
2660 allocator = &device->suballoc_usc;
2661 else
2662 unreachable("Unknown heap type");
2663
2664 result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2665 if (result != VK_SUCCESS)
2666 return result;
2667
2668 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2669 memcpy(map, data, size);
2670
2671 *pvr_bo_out = suballoc_bo;
2672
2673 return VK_SUCCESS;
2674 }
2675
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2676 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2677 const void *code,
2678 size_t code_size,
2679 uint64_t code_alignment,
2680 struct pvr_suballoc_bo **const pvr_bo_out)
2681 {
2682 struct pvr_suballoc_bo *suballoc_bo = NULL;
2683 VkResult result;
2684 void *map;
2685
2686 assert(code_size > 0);
2687
2688 /* The USC will prefetch the next instruction, so over allocate by 1
2689 * instruction to prevent reading off the end of a page into a potentially
2690 * unallocated page.
2691 */
2692 result = pvr_bo_suballoc(&device->suballoc_usc,
2693 code_size + ROGUE_MAX_INSTR_BYTES,
2694 code_alignment,
2695 false,
2696 &suballoc_bo);
2697 if (result != VK_SUCCESS)
2698 return result;
2699
2700 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2701 memcpy(map, code, code_size);
2702
2703 *pvr_bo_out = suballoc_bo;
2704
2705 return VK_SUCCESS;
2706 }
2707
2708 /**
2709 * \brief Upload PDS program data and code segments from host memory to device
2710 * memory.
2711 *
2712 * \param[in] device Logical device pointer.
2713 * \param[in] data Pointer to PDS data segment to upload.
2714 * \param[in] data_size_dwords Size of PDS data segment in dwords.
2715 * \param[in] data_alignment Required alignment of the PDS data segment in
2716 * bytes. Must be a power of two.
2717 * \param[in] code Pointer to PDS code segment to upload.
2718 * \param[in] code_size_dwords Size of PDS code segment in dwords.
2719 * \param[in] code_alignment Required alignment of the PDS code segment in
2720 * bytes. Must be a power of two.
2721 * \param[in] min_alignment Minimum alignment of the bo holding the PDS
2722 * program in bytes.
2723 * \param[out] pds_upload_out On success will be initialized based on the
2724 * uploaded PDS program.
2725 * \return VK_SUCCESS on success, or error code otherwise.
2726 */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2727 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2728 const uint32_t *data,
2729 uint32_t data_size_dwords,
2730 uint32_t data_alignment,
2731 const uint32_t *code,
2732 uint32_t code_size_dwords,
2733 uint32_t code_alignment,
2734 uint64_t min_alignment,
2735 struct pvr_pds_upload *const pds_upload_out)
2736 {
2737 /* All alignment and sizes below are in bytes. */
2738 const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2739 const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2740 const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2741 const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2742 const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2743 const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2744 const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2745 : data_aligned_size;
2746 VkResult result;
2747 void *map;
2748
2749 assert(code || data);
2750 assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2751 assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2752
2753 result = pvr_bo_suballoc(&device->suballoc_pds,
2754 bo_size,
2755 bo_alignment,
2756 true,
2757 &pds_upload_out->pvr_bo);
2758 if (result != VK_SUCCESS)
2759 return result;
2760
2761 map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2762
2763 if (data) {
2764 memcpy(map, data, data_size);
2765
2766 pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2767 device->heaps.pds_heap->base_addr.addr;
2768
2769 /* Store data size in dwords. */
2770 assert(data_aligned_size % 4 == 0);
2771 pds_upload_out->data_size = data_aligned_size / 4;
2772 } else {
2773 pds_upload_out->data_offset = 0;
2774 pds_upload_out->data_size = 0;
2775 }
2776
2777 if (code) {
2778 memcpy((uint8_t *)map + code_offset, code, code_size);
2779
2780 pds_upload_out->code_offset =
2781 (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2782 device->heaps.pds_heap->base_addr.addr;
2783
2784 /* Store code size in dwords. */
2785 assert(code_aligned_size % 4 == 0);
2786 pds_upload_out->code_size = code_aligned_size / 4;
2787 } else {
2788 pds_upload_out->code_offset = 0;
2789 pds_upload_out->code_size = 0;
2790 }
2791
2792 return VK_SUCCESS;
2793 }
2794
2795 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2796 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2797 struct pvr_framebuffer *framebuffer)
2798 {
2799 const uint32_t cache_line_size =
2800 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2801 uint32_t ppp_state[3];
2802 VkResult result;
2803
2804 pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2805 header.pres_terminate = true;
2806 }
2807
2808 pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2809 term0.clip_right =
2810 DIV_ROUND_UP(
2811 framebuffer->width,
2812 PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2813 1;
2814 term0.clip_bottom =
2815 DIV_ROUND_UP(
2816 framebuffer->height,
2817 PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2818 1;
2819 }
2820
2821 pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2822 term1.render_target = 0;
2823 term1.clip_left = 0;
2824 }
2825
2826 result = pvr_gpu_upload(device,
2827 device->heaps.general_heap,
2828 ppp_state,
2829 sizeof(ppp_state),
2830 cache_line_size,
2831 &framebuffer->ppp_state_bo);
2832 if (result != VK_SUCCESS)
2833 return result;
2834
2835 /* Calculate the size of PPP state in dwords. */
2836 framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2837
2838 return VK_SUCCESS;
2839 }
2840
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2841 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2842 uint32_t render_targets_count)
2843 {
2844 uint32_t i;
2845
2846 for (i = 0; i < render_targets_count; i++) {
2847 if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2848 goto err_mutex_destroy;
2849 }
2850
2851 return true;
2852
2853 err_mutex_destroy:
2854 while (i--)
2855 pthread_mutex_destroy(&render_targets[i].mutex);
2856
2857 return false;
2858 }
2859
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2860 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2861 uint32_t render_targets_count)
2862 {
2863 for (uint32_t i = 0; i < render_targets_count; i++) {
2864 if (render_targets[i].valid) {
2865 pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2866 render_targets[i].valid = false;
2867 }
2868
2869 pthread_mutex_destroy(&render_targets[i].mutex);
2870 }
2871 }
2872
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2873 VkResult pvr_CreateFramebuffer(VkDevice _device,
2874 const VkFramebufferCreateInfo *pCreateInfo,
2875 const VkAllocationCallbacks *pAllocator,
2876 VkFramebuffer *pFramebuffer)
2877 {
2878 PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2879 PVR_FROM_HANDLE(pvr_device, device, _device);
2880 struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2881 struct pvr_spm_eot_state *spm_eot_state_per_render;
2882 struct pvr_render_target *render_targets;
2883 struct pvr_framebuffer *framebuffer;
2884 struct pvr_image_view **attachments;
2885 uint32_t render_targets_count;
2886 uint64_t scratch_buffer_size;
2887 VkResult result;
2888
2889 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2890
2891 render_targets_count =
2892 PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2893
2894 VK_MULTIALLOC(ma);
2895 vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2896 vk_multialloc_add(&ma,
2897 &attachments,
2898 __typeof__(*attachments),
2899 pCreateInfo->attachmentCount);
2900 vk_multialloc_add(&ma,
2901 &render_targets,
2902 __typeof__(*render_targets),
2903 render_targets_count);
2904 vk_multialloc_add(&ma,
2905 &spm_eot_state_per_render,
2906 __typeof__(*spm_eot_state_per_render),
2907 pass->hw_setup->render_count);
2908 vk_multialloc_add(&ma,
2909 &spm_bgobj_state_per_render,
2910 __typeof__(*spm_bgobj_state_per_render),
2911 pass->hw_setup->render_count);
2912
2913 if (!vk_multialloc_zalloc2(&ma,
2914 &device->vk.alloc,
2915 pAllocator,
2916 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2917 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2918
2919 vk_object_base_init(&device->vk,
2920 &framebuffer->base,
2921 VK_OBJECT_TYPE_FRAMEBUFFER);
2922
2923 framebuffer->width = pCreateInfo->width;
2924 framebuffer->height = pCreateInfo->height;
2925 framebuffer->layers = pCreateInfo->layers;
2926
2927 framebuffer->attachments = attachments;
2928 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2929 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2930 framebuffer->attachments[i] =
2931 pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2932 }
2933
2934 result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2935 if (result != VK_SUCCESS)
2936 goto err_free_framebuffer;
2937
2938 framebuffer->render_targets = render_targets;
2939 framebuffer->render_targets_count = render_targets_count;
2940 if (!pvr_render_targets_init(framebuffer->render_targets,
2941 render_targets_count)) {
2942 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2943 goto err_free_ppp_state_bo;
2944 }
2945
2946 scratch_buffer_size =
2947 pvr_spm_scratch_buffer_calc_required_size(pass,
2948 framebuffer->width,
2949 framebuffer->height);
2950
2951 result = pvr_spm_scratch_buffer_get_buffer(device,
2952 scratch_buffer_size,
2953 &framebuffer->scratch_buffer);
2954 if (result != VK_SUCCESS)
2955 goto err_finish_render_targets;
2956
2957 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2958 uint32_t emit_count;
2959
2960 result = pvr_spm_init_eot_state(device,
2961 &spm_eot_state_per_render[i],
2962 framebuffer,
2963 &pass->hw_setup->renders[i],
2964 &emit_count);
2965 if (result != VK_SUCCESS)
2966 goto err_finish_eot_state;
2967
2968 result = pvr_spm_init_bgobj_state(device,
2969 &spm_bgobj_state_per_render[i],
2970 framebuffer,
2971 &pass->hw_setup->renders[i],
2972 emit_count);
2973 if (result != VK_SUCCESS)
2974 goto err_finish_bgobj_state;
2975
2976 continue;
2977
2978 err_finish_bgobj_state:
2979 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
2980
2981 for (uint32_t j = 0; j < i; j++)
2982 pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
2983
2984 err_finish_eot_state:
2985 for (uint32_t j = 0; j < i; j++)
2986 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
2987
2988 goto err_finish_render_targets;
2989 }
2990
2991 framebuffer->render_count = pass->hw_setup->render_count;
2992 framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
2993 framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
2994
2995 *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2996
2997 return VK_SUCCESS;
2998
2999 err_finish_render_targets:
3000 pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
3001
3002 err_free_ppp_state_bo:
3003 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3004
3005 err_free_framebuffer:
3006 vk_object_base_finish(&framebuffer->base);
3007 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3008
3009 return result;
3010 }
3011
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3012 void pvr_DestroyFramebuffer(VkDevice _device,
3013 VkFramebuffer _fb,
3014 const VkAllocationCallbacks *pAllocator)
3015 {
3016 PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3017 PVR_FROM_HANDLE(pvr_device, device, _device);
3018
3019 if (!framebuffer)
3020 return;
3021
3022 for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3023 pvr_spm_finish_bgobj_state(device,
3024 &framebuffer->spm_bgobj_state_per_render[i]);
3025
3026 pvr_spm_finish_eot_state(device,
3027 &framebuffer->spm_eot_state_per_render[i]);
3028 }
3029
3030 pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3031 pvr_render_targets_fini(framebuffer->render_targets,
3032 framebuffer->render_targets_count);
3033 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3034 vk_object_base_finish(&framebuffer->base);
3035 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3036 }
3037
3038 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3039 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3040 VkFilter filter)
3041 {
3042 switch (filter) {
3043 case VK_FILTER_NEAREST:
3044 return PVRX(TEXSTATE_FILTER_POINT);
3045 case VK_FILTER_LINEAR:
3046 return PVRX(TEXSTATE_FILTER_LINEAR);
3047 default:
3048 unreachable("Unknown filter type.");
3049 }
3050 }
3051
3052 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3053 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3054 {
3055 switch (addr_mode) {
3056 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3057 return PVRX(TEXSTATE_ADDRMODE_REPEAT);
3058 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3059 return PVRX(TEXSTATE_ADDRMODE_FLIP);
3060 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3061 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
3062 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3063 return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
3064 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3065 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
3066 default:
3067 unreachable("Invalid sampler address mode.");
3068 }
3069 }
3070
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3071 VkResult pvr_CreateSampler(VkDevice _device,
3072 const VkSamplerCreateInfo *pCreateInfo,
3073 const VkAllocationCallbacks *pAllocator,
3074 VkSampler *pSampler)
3075 {
3076 PVR_FROM_HANDLE(pvr_device, device, _device);
3077 uint32_t border_color_table_index;
3078 struct pvr_sampler *sampler;
3079 float lod_rounding_bias;
3080 VkFilter min_filter;
3081 VkFilter mag_filter;
3082 VkResult result;
3083 float min_lod;
3084 float max_lod;
3085
3086 STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3087 sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3088
3089 sampler =
3090 vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3091 if (!sampler) {
3092 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3093 goto err_out;
3094 }
3095
3096 mag_filter = pCreateInfo->magFilter;
3097 min_filter = pCreateInfo->minFilter;
3098
3099 result =
3100 pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3101 sampler,
3102 &border_color_table_index);
3103 if (result != VK_SUCCESS)
3104 goto err_free_sampler;
3105
3106 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3107 /* The min/mag filters may need adjustment here, the GPU should decide
3108 * which of the two filters to use based on the clamped LOD value: LOD
3109 * <= 0 implies magnification, while LOD > 0 implies minification.
3110 *
3111 * As a workaround, we override magFilter with minFilter if we know that
3112 * the magnification filter will never be used due to clamping anyway
3113 * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3114 * if maxLod <= 0.
3115 */
3116 if (pCreateInfo->minLod > 0.0f) {
3117 /* The clamped LOD will always be positive => always minify. */
3118 mag_filter = pCreateInfo->minFilter;
3119 }
3120
3121 if (pCreateInfo->maxLod <= 0.0f) {
3122 /* The clamped LOD will always be negative or zero => always
3123 * magnify.
3124 */
3125 min_filter = pCreateInfo->magFilter;
3126 }
3127 }
3128
3129 if (pCreateInfo->compareEnable) {
3130 sampler->descriptor.data.compare_op =
3131 (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3132 } else {
3133 sampler->descriptor.data.compare_op =
3134 (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3135 }
3136
3137 sampler->descriptor.data.word3 = 0;
3138 pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3139 TEXSTATE_SAMPLER,
3140 word) {
3141 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3142 const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
3143 (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3144 const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
3145 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3146 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3147 const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
3148 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3149 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3150
3151 word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3152 word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3153
3154 if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3155 word.mipfilter = true;
3156
3157 word.addrmode_u =
3158 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3159 word.addrmode_v =
3160 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3161 word.addrmode_w =
3162 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3163
3164 /* TODO: Figure out defines for these. */
3165 if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
3166 sampler->descriptor.data.word3 |= 0x40000000;
3167
3168 if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
3169 sampler->descriptor.data.word3 |= 0x20000000;
3170
3171 /* The Vulkan 1.0.205 spec says:
3172 *
3173 * The absolute value of mipLodBias must be less than or equal to
3174 * VkPhysicalDeviceLimits::maxSamplerLodBias.
3175 */
3176 word.dadjust =
3177 PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
3178 util_signed_fixed(
3179 CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3180 PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3181
3182 /* Anisotropy is not supported for now. */
3183 word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
3184
3185 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3186 pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3187 /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3188 * selected by adding 0.5 and then truncating the input LOD value.
3189 * This hardware adds the 0.5 bias before clamping against
3190 * lodmin/lodmax, while Vulkan specifies the bias to be added after
3191 * clamping. We compensate for this difference by adding the 0.5
3192 * bias to the LOD bounds, too.
3193 */
3194 lod_rounding_bias = 0.5f;
3195 } else {
3196 lod_rounding_bias = 0.0f;
3197 }
3198
3199 min_lod = pCreateInfo->minLod + lod_rounding_bias;
3200 word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3201 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3202
3203 max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3204 word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3205 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3206
3207 word.bordercolor_index = border_color_table_index;
3208
3209 if (pCreateInfo->unnormalizedCoordinates)
3210 word.non_normalized_coords = true;
3211 }
3212
3213 *pSampler = pvr_sampler_to_handle(sampler);
3214
3215 return VK_SUCCESS;
3216
3217 err_free_sampler:
3218 vk_object_free(&device->vk, pAllocator, sampler);
3219
3220 err_out:
3221 return result;
3222 }
3223
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3224 void pvr_DestroySampler(VkDevice _device,
3225 VkSampler _sampler,
3226 const VkAllocationCallbacks *pAllocator)
3227 {
3228 PVR_FROM_HANDLE(pvr_device, device, _device);
3229 PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3230
3231 if (!sampler)
3232 return;
3233
3234 vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3235 }
3236
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3237 void pvr_GetBufferMemoryRequirements2(
3238 VkDevice _device,
3239 const VkBufferMemoryRequirementsInfo2 *pInfo,
3240 VkMemoryRequirements2 *pMemoryRequirements)
3241 {
3242 PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3243 PVR_FROM_HANDLE(pvr_device, device, _device);
3244 uint64_t size;
3245
3246 /* The Vulkan 1.0.166 spec says:
3247 *
3248 * memoryTypeBits is a bitmask and contains one bit set for every
3249 * supported memory type for the resource. Bit 'i' is set if and only
3250 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3251 * structure for the physical device is supported for the resource.
3252 *
3253 * All types are currently supported for buffers.
3254 */
3255 pMemoryRequirements->memoryRequirements.memoryTypeBits =
3256 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3257
3258 pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3259
3260 size = buffer->vk.size;
3261
3262 if (size % device->ws->page_size == 0 ||
3263 size % device->ws->page_size >
3264 device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3265 /* TODO: We can save memory by having one extra virtual page mapped
3266 * in and having the first and last virtual page mapped to the first
3267 * physical address.
3268 */
3269 size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3270 }
3271
3272 pMemoryRequirements->memoryRequirements.size =
3273 ALIGN_POT(size, buffer->alignment);
3274 }
3275
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3276 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3277 const VkImageMemoryRequirementsInfo2 *pInfo,
3278 VkMemoryRequirements2 *pMemoryRequirements)
3279 {
3280 PVR_FROM_HANDLE(pvr_device, device, _device);
3281 PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3282
3283 /* The Vulkan 1.0.166 spec says:
3284 *
3285 * memoryTypeBits is a bitmask and contains one bit set for every
3286 * supported memory type for the resource. Bit 'i' is set if and only
3287 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3288 * structure for the physical device is supported for the resource.
3289 *
3290 * All types are currently supported for images.
3291 */
3292 const uint32_t memory_types =
3293 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3294
3295 /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3296 * in GetImageMemoryRequirements()), but this should be known at image
3297 * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3298 * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3299 * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3300 * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3301 *
3302 * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3303 * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3304 * render and compute jobs.
3305 */
3306 pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3307 pMemoryRequirements->memoryRequirements.size =
3308 align64(image->size, image->alignment);
3309 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3310 }
3311