1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include "radv_image.h"
12 #include "util/u_atomic.h"
13 #include "util/u_debug.h"
14 #include "ac_drm_fourcc.h"
15 #include "ac_formats.h"
16 #include "radv_android.h"
17 #include "radv_buffer.h"
18 #include "radv_buffer_view.h"
19 #include "radv_debug.h"
20 #include "radv_device_memory.h"
21 #include "radv_entrypoints.h"
22 #include "radv_formats.h"
23 #include "radv_image_view.h"
24 #include "radv_radeon_winsys.h"
25 #include "radv_rmv.h"
26 #include "radv_video.h"
27 #include "radv_wsi.h"
28 #include "sid.h"
29 #include "vk_debug_utils.h"
30 #include "vk_format.h"
31 #include "vk_log.h"
32 #include "vk_render_pass.h"
33 #include "vk_util.h"
34
35 #include "gfx10_format_table.h"
36
37 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)38 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
39 {
40 const struct radv_physical_device *pdev = radv_device_physical(device);
41
42 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43 assert(pCreateInfo->samples <= 1);
44 return RADEON_SURF_MODE_LINEAR_ALIGNED;
45 }
46
47 if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
48 return RADEON_SURF_MODE_LINEAR_ALIGNED;
49
50 if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
51 return RADEON_SURF_MODE_LINEAR_ALIGNED;
52
53 /* MSAA resources must be 2D tiled. */
54 if (pCreateInfo->samples > 1)
55 return RADEON_SURF_MODE_2D;
56
57 if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) {
58 /* this causes hangs in some VK CTS tests on GFX9. */
59 /* Textures with a very small height are recommended to be linear. */
60 if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
61 /* Only very thin and long 2D textures should benefit from
62 * linear_aligned. */
63 (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
64 return RADEON_SURF_MODE_LINEAR_ALIGNED;
65 }
66
67 return RADEON_SURF_MODE_2D;
68 }
69
70 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)71 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
72 {
73 const struct radv_physical_device *pdev = radv_device_physical(device);
74
75 if (!pdev->info.has_tc_compatible_htile)
76 return false;
77
78 /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
79 * workarounds don't help.
80 */
81 if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND)
82 return false;
83
84 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
85 return false;
86
87 /* Do not enable TC-compatible HTILE if the image isn't readable by a
88 * shader because no texture fetches will happen.
89 */
90 if (!(pCreateInfo->usage &
91 (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
92 return false;
93
94 if (pdev->info.gfx_level < GFX9) {
95 /* TC-compat HTILE for MSAA depth/stencil images is broken
96 * on GFX8 because the tiling doesn't match.
97 */
98 if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
99 return false;
100
101 /* GFX9+ supports compression for both 32-bit and 16-bit depth
102 * surfaces, while GFX8 only supports 32-bit natively. Though,
103 * the driver allows TC-compat HTILE for 16-bit depth surfaces
104 * with no Z planes compression.
105 */
106 if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM)
107 return false;
108
109 /* TC-compat HTILE for layered images can have interleaved slices (see sliceInterleaved flag
110 * in addrlib). radv_clear_htile does not work.
111 */
112 if (pCreateInfo->arrayLayers > 1)
113 return false;
114 }
115
116 /* GFX9 has issues when the sample count is 4 and the format is D16 */
117 if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
118 return false;
119
120 return true;
121 }
122
123 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)124 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
125 {
126 const struct radv_physical_device *pdev = radv_device_physical(device);
127
128 if (info->bo_metadata) {
129 if (pdev->info.gfx_level >= GFX9)
130 return info->bo_metadata->u.gfx9.scanout;
131 else
132 return info->bo_metadata->u.legacy.scanout;
133 }
134
135 return info->scanout;
136 }
137
138 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)139 radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image)
140 {
141 const struct radv_physical_device *pdev = radv_device_physical(device);
142 const struct radv_instance *instance = radv_physical_device_instance(pdev);
143
144 if (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
145 return true;
146
147 if (image->vk.samples <= 1 && image->vk.extent.width * image->vk.extent.height <= 512 * 512) {
148 /* Do not enable CMASK or DCC for small surfaces where the cost
149 * of the eliminate pass can be higher than the benefit of fast
150 * clear. RadeonSI does this, but the image threshold is
151 * different.
152 */
153 return false;
154 }
155
156 return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
157 }
158
159 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)160 radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image)
161 {
162 const struct radv_physical_device *pdev = radv_device_physical(device);
163 const struct radv_instance *instance = radv_physical_device_instance(pdev);
164
165 if (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
166 return true;
167
168 return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive ||
169 /* Enable DCC for concurrent images if stores are
170 * supported because that means we can keep DCC
171 * compressed on all layouts/queues.
172 */
173 radv_image_use_dcc_image_stores(device, image));
174 }
175
176 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)177 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
178 VkImageCreateFlags flags, bool *sign_reinterpret)
179 {
180 if (!radv_is_colorbuffer_format_supported(pdev, format))
181 return false;
182
183 if (sign_reinterpret != NULL)
184 *sign_reinterpret = false;
185
186 /* All formats are compatible on GFX11. */
187 if ((flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && pdev->info.gfx_level < GFX11) {
188 const struct VkImageFormatListCreateInfo *format_list =
189 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
190
191 /* We have to ignore the existence of the list if viewFormatCount = 0 */
192 if (format_list && format_list->viewFormatCount) {
193 /* compatibility is transitive, so we only need to check
194 * one format with everything else. */
195 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
196 if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
197 continue;
198
199 if (!radv_dcc_formats_compatible(pdev->info.gfx_level, format, format_list->pViewFormats[i],
200 sign_reinterpret))
201 return false;
202 }
203 } else {
204 return false;
205 }
206 }
207
208 return true;
209 }
210
211 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)212 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
213 {
214 if (format == VK_FORMAT_R32_SFLOAT && !radv_uses_image_float32_atomics(device))
215 return false;
216
217 return radv_is_atomic_format_supported(format);
218 }
219
220 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)221 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags)
222 {
223 if (radv_format_is_atomic_allowed(device, format))
224 return true;
225
226 if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
227 const struct VkImageFormatListCreateInfo *format_list =
228 (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
229
230 /* We have to ignore the existence of the list if viewFormatCount = 0 */
231 if (format_list && format_list->viewFormatCount) {
232 for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
233 if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
234 return true;
235 }
236 }
237 }
238
239 return false;
240 }
241
242 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)243 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
244 VkFormat format, bool *sign_reinterpret)
245 {
246 const struct radv_physical_device *pdev = radv_device_physical(device);
247 const struct radv_instance *instance = radv_physical_device_instance(pdev);
248
249 /* DCC (Delta Color Compression) is only available for GFX8+. */
250 if (pdev->info.gfx_level < GFX8)
251 return false;
252
253 const VkImageCompressionControlEXT *compression =
254 vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
255
256 if (instance->debug_flags & RADV_DEBUG_NO_DCC ||
257 (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) {
258 return false;
259 }
260
261 if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
262 return false;
263
264 /*
265 * TODO: Enable DCC for storage images on GFX9 and earlier.
266 *
267 * Also disable DCC with atomics because even when DCC stores are
268 * supported atomics will always decompress. So if we are
269 * decompressing a lot anyway we might as well not have DCC.
270 */
271 if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
272 (pdev->info.gfx_level < GFX10 ||
273 radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
274 return false;
275
276 if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
277 return false;
278
279 if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
280 return false;
281
282 if (!radv_image_use_fast_clear_for_image_early(device, image) &&
283 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
284 return false;
285
286 /* Do not enable DCC for mipmapped arrays because performance is worse. */
287 if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
288 return false;
289
290 if (pdev->info.gfx_level < GFX10) {
291 /* TODO: Add support for DCC MSAA on GFX8-9. */
292 if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed)
293 return false;
294
295 /* TODO: Add support for DCC layers/mipmaps on GFX9. */
296 if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9)
297 return false;
298 }
299
300 /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
301 if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 && (instance->debug_flags & RADV_DEBUG_NO_FMASK))
302 return false;
303
304 return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret);
305 }
306
307 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)308 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
309 {
310 if (!radv_image_has_dcc(image))
311 return false;
312
313 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
314 return true;
315
316 if (!radv_image_use_fast_clear_for_image(device, image))
317 return false;
318
319 /* TODO: Fix storage images with DCC without DCC image stores.
320 * Disabling it for now. */
321 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
322 return false;
323
324 return true;
325 }
326
327 /*
328 * Whether to enable image stores with DCC compression for this image. If
329 * this function returns false the image subresource should be decompressed
330 * before using it with image stores.
331 *
332 * Note that this can have mixed performance implications, see
333 * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
334 *
335 * This function assumes the image uses DCC compression.
336 */
337 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)338 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
339 {
340 const struct radv_physical_device *pdev = radv_device_physical(device);
341
342 return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface);
343 }
344
345 /*
346 * Whether to use a predicate to determine whether DCC is in a compressed
347 * state. This can be used to avoid decompressing an image multiple times.
348 */
349 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)350 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
351 {
352 return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
353 }
354
355 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)356 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
357 {
358 const struct radv_physical_device *pdev = radv_device_physical(device);
359 const struct radv_instance *instance = radv_physical_device_instance(pdev);
360
361 if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
362 /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
363 return false;
364 }
365
366 return pdev->use_fmask && image->vk.samples > 1 &&
367 ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
368 (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
369 }
370
371 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image,const VkImageCreateInfo * pCreateInfo)372 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
373 const VkImageCreateInfo *pCreateInfo)
374 {
375 const struct radv_physical_device *pdev = radv_device_physical(device);
376 const struct radv_instance *instance = radv_physical_device_instance(pdev);
377 const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
378
379 const VkImageCompressionControlEXT *compression =
380 vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
381
382 if (instance->debug_flags & RADV_DEBUG_NO_HIZ ||
383 (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT))
384 return false;
385
386 if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
387 return false;
388
389 /* TODO:
390 * - Investigate about mips+layers.
391 * - Enable on other gens.
392 */
393 bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10;
394
395 /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
396 if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
397 return false;
398
399 /* Do not enable HTILE for very small images because it seems less performant but make sure it's
400 * allowed with VRS attachments because we need HTILE on GFX10.3.
401 */
402 if (image->vk.extent.width * image->vk.extent.height < 8 * 8 &&
403 !(instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
404 !(gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate))
405 return false;
406
407 return (image->vk.mip_levels == 1 || use_htile_for_mips) && !image->shareable;
408 }
409
410 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)411 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
412 {
413 const struct radv_physical_device *pdev = radv_device_physical(device);
414 const struct radv_instance *instance = radv_physical_device_instance(pdev);
415
416 /* TC-compat CMASK is only available for GFX8+. */
417 if (pdev->info.gfx_level < GFX8)
418 return false;
419
420 /* GFX9 has issues when sample count is greater than 2 */
421 if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2)
422 return false;
423
424 if (instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
425 return false;
426
427 /* TC-compat CMASK with storage images is supported on GFX10+. */
428 if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10)
429 return false;
430
431 /* Do not enable TC-compatible if the image isn't readable by a shader
432 * because no texture fetches will happen.
433 */
434 if (!(image->vk.usage &
435 (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
436 return false;
437
438 /* If the image doesn't have FMASK, it can't be fetchable. */
439 if (!radv_image_has_fmask(image))
440 return false;
441
442 return true;
443 }
444
445 static uint32_t
radv_get_bo_metadata_word1(const struct radv_device * device)446 radv_get_bo_metadata_word1(const struct radv_device *device)
447 {
448 const struct radv_physical_device *pdev = radv_device_physical(device);
449
450 return (ATI_VENDOR_ID << 16) | pdev->info.pci_id;
451 }
452
453 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)454 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
455 {
456 if (md->metadata[0] != 1 || md->metadata[1] != radv_get_bo_metadata_word1(device))
457 return false;
458
459 if (md->size_metadata < 40)
460 return false;
461
462 return true;
463 }
464
465 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)466 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
467 const struct radeon_bo_metadata *md)
468 {
469 const struct radv_physical_device *pdev = radv_device_physical(device);
470
471 surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
472
473 if (pdev->info.gfx_level >= GFX9) {
474 if (md->u.gfx9.swizzle_mode > 0)
475 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
476 else
477 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
478
479 surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
480 } else {
481 surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
482 surface->u.legacy.bankw = md->u.legacy.bankw;
483 surface->u.legacy.bankh = md->u.legacy.bankh;
484 surface->u.legacy.tile_split = md->u.legacy.tile_split;
485 surface->u.legacy.mtilea = md->u.legacy.mtilea;
486 surface->u.legacy.num_banks = md->u.legacy.num_banks;
487
488 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
489 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
490 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
491 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
492 else
493 surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
494 }
495 }
496
497 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)498 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
499 const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
500 {
501 const struct radv_physical_device *pdev = radv_device_physical(device);
502 unsigned width = image->vk.extent.width;
503 unsigned height = image->vk.extent.height;
504
505 /*
506 * minigbm sometimes allocates bigger images which is going to result in
507 * weird strides and other properties. Lets be lenient where possible and
508 * fail it on GFX10 (as we cannot cope there).
509 *
510 * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
511 */
512 if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
513 const struct radeon_bo_metadata *md = create_info->bo_metadata;
514
515 if (pdev->info.gfx_level >= GFX10) {
516 width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
517 height = G_00A008_HEIGHT(md->metadata[4]) + 1;
518 } else {
519 width = G_008F18_WIDTH(md->metadata[4]) + 1;
520 height = G_008F18_HEIGHT(md->metadata[4]) + 1;
521 }
522 }
523
524 if (image->vk.extent.width == width && image->vk.extent.height == height)
525 return VK_SUCCESS;
526
527 if (width < image->vk.extent.width || height < image->vk.extent.height) {
528 fprintf(stderr,
529 "The imported image has smaller dimensions than the internal\n"
530 "dimensions. Using it is going to fail badly, so we reject\n"
531 "this import.\n"
532 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
533 image->vk.extent.width, image->vk.extent.height, width, height);
534 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
535 } else if (pdev->info.gfx_level >= GFX10) {
536 fprintf(stderr,
537 "Tried to import an image with inconsistent width on GFX10.\n"
538 "As GFX10 has no separate stride fields we cannot cope with\n"
539 "an inconsistency in width and will fail this import.\n"
540 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
541 image->vk.extent.width, image->vk.extent.height, width, height);
542 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
543 } else {
544 fprintf(stderr,
545 "Tried to import an image with inconsistent width on pre-GFX10.\n"
546 "As GFX10 has no separate stride fields we cannot cope with\n"
547 "an inconsistency and would fail on GFX10.\n"
548 "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
549 image->vk.extent.width, image->vk.extent.height, width, height);
550 }
551 image_info->width = width;
552 image_info->height = height;
553
554 return VK_SUCCESS;
555 }
556
557 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)558 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
559 const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
560 {
561 const struct radv_physical_device *pdev = radv_device_physical(device);
562 const struct radv_instance *instance = radv_physical_device_instance(pdev);
563
564 VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
565 if (result != VK_SUCCESS)
566 return result;
567
568 for (unsigned plane = 0; plane < image->plane_count; ++plane) {
569 if (create_info->bo_metadata) {
570 radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata);
571 }
572
573 if (radv_surface_has_scanout(device, create_info)) {
574 image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
575 if (instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
576 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
577
578 image_info->surf_index = NULL;
579 }
580
581 if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) {
582 /* Older SDMA hw can't handle DCC */
583 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
584 }
585 }
586 return VK_SUCCESS;
587 }
588
589 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)590 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane)
591 {
592 if (radv_is_format_emulated(pdev, image->vk.format)) {
593 if (plane == 0)
594 return image->vk.format;
595 if (vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
596 return vk_texcompress_astc_emulation_format(image->vk.format);
597 else
598 return vk_texcompress_etc2_emulation_format(image->vk.format);
599 }
600
601 return vk_format_get_plane_format(image->vk.format, plane);
602 }
603
604 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)605 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
606 const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
607 {
608 const struct radv_physical_device *pdev = radv_device_physical(device);
609 const struct radv_instance *instance = radv_physical_device_instance(pdev);
610 uint64_t flags;
611 unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
612 VkFormat format = radv_image_get_plane_format(pdev, image, plane_id);
613 const struct util_format_description *desc = vk_format_description(format);
614 const VkImageAlignmentControlCreateInfoMESA *alignment =
615 vk_find_struct_const(pCreateInfo->pNext, IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA);
616 bool is_depth, is_stencil;
617
618 is_depth = util_format_has_depth(desc);
619 is_stencil = util_format_has_stencil(desc);
620
621 flags = RADEON_SURF_SET(array_mode, MODE);
622
623 switch (pCreateInfo->imageType) {
624 case VK_IMAGE_TYPE_1D:
625 if (pCreateInfo->arrayLayers > 1)
626 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
627 else
628 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
629 break;
630 case VK_IMAGE_TYPE_2D:
631 if (pCreateInfo->arrayLayers > 1)
632 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
633 else
634 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
635 break;
636 case VK_IMAGE_TYPE_3D:
637 flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
638 break;
639 default:
640 unreachable("unhandled image type");
641 }
642
643 /* Required for clearing/initializing a specific layer on GFX8. */
644 flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
645
646 if (is_depth) {
647 flags |= RADEON_SURF_ZBUFFER;
648
649 if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) {
650 if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
651 flags |= RADEON_SURF_NO_RENDER_TARGET;
652
653 /* RADV doesn't support stencil pitch adjustment. As a result there are some spec gaps that
654 * are not covered by CTS.
655 *
656 * For D+S images with pitch constraints due to rendertarget usage it can happen that
657 * sampling from mipmaps beyond the base level of the descriptor is broken as the pitch
658 * adjustment can't be applied to anything beyond the first level.
659 */
660 flags |= RADEON_SURF_NO_STENCIL_ADJUST;
661 }
662
663 if (radv_use_htile_for_image(device, image, pCreateInfo) && !(flags & RADEON_SURF_NO_RENDER_TARGET)) {
664 if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
665 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
666 } else {
667 flags |= RADEON_SURF_NO_HTILE;
668 }
669 }
670
671 if (is_stencil)
672 flags |= RADEON_SURF_SBUFFER;
673
674 if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
675 vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
676 flags |= RADEON_SURF_NO_RENDER_TARGET;
677
678 if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret))
679 flags |= RADEON_SURF_DISABLE_DCC;
680
681 if (!radv_use_fmask_for_image(device, image))
682 flags |= RADEON_SURF_NO_FMASK;
683
684 if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
685 flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
686 }
687
688 if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
689 if (!pdev->info.sdma_supports_compression)
690 flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
691 }
692
693 /* Disable DCC for VRS rate images because the hw can't handle compression. */
694 if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
695 flags |= RADEON_SURF_VRS_RATE | RADEON_SURF_DISABLE_DCC;
696 if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)))
697 flags |= RADEON_SURF_NO_TEXTURE;
698
699 if (alignment && alignment->maximumRequestedAlignment && !(instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)) {
700 bool is_4k_capable;
701
702 if (!vk_format_is_depth_or_stencil(image_format)) {
703 is_4k_capable =
704 !(pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && (flags & RADEON_SURF_DISABLE_DCC) &&
705 (flags & RADEON_SURF_NO_FMASK);
706 } else {
707 /* Depth-stencil format without DEPTH_STENCIL usage does not work either. */
708 is_4k_capable = false;
709 }
710
711 if (is_4k_capable && alignment->maximumRequestedAlignment <= 4096)
712 flags |= RADEON_SURF_PREFER_4K_ALIGNMENT;
713 if (alignment->maximumRequestedAlignment <= 64 * 1024)
714 flags |= RADEON_SURF_PREFER_64K_ALIGNMENT;
715 }
716
717 return flags;
718 }
719
720 void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])721 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
722 enum pipe_swizzle swizzle[4])
723 {
724 if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
725 /* 64-bit formats only support storage images and storage images
726 * require identity component mappings. We use 32-bit
727 * instructions to access 64-bit images, so we need a special
728 * case here.
729 *
730 * The zw components are 1,0 so that they can be easily be used
731 * by loads to create the w component, which has to be 0 for
732 * NULL descriptors.
733 */
734 swizzle[0] = PIPE_SWIZZLE_X;
735 swizzle[1] = PIPE_SWIZZLE_Y;
736 swizzle[2] = PIPE_SWIZZLE_1;
737 swizzle[3] = PIPE_SWIZZLE_0;
738 } else if (!mapping) {
739 for (unsigned i = 0; i < 4; i++)
740 swizzle[i] = desc->swizzle[i];
741 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
742 const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1};
743 vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
744 } else {
745 vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
746 }
747 }
748
749 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,unsigned plane_id,struct radeon_bo_metadata * md)750 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
751 struct radeon_bo_metadata *md)
752 {
753 const struct radv_physical_device *pdev = radv_device_physical(device);
754 const struct radv_instance *instance = radv_physical_device_instance(pdev);
755 static const VkComponentMapping fixedmapping;
756 const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id);
757 const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
758 const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
759 struct radeon_surf *surface = &image->planes[plane_id].surface;
760 const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
761 uint32_t desc[8];
762
763 radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
764 &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1, plane_width,
765 plane_height, image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL);
766
767 radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
768 false, desc, NULL);
769
770 ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
771 instance->debug_flags & RADV_DEBUG_EXTRA_MD);
772 }
773
774 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)775 radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
776 {
777 const struct radv_physical_device *pdev = radv_device_physical(device);
778
779 /* use plane 0, even when there are multiple planes, to follow radeonsi */
780 const unsigned plane_id = 0;
781 struct radeon_surf *surface = &image->planes[plane_id].surface;
782
783 memset(metadata, 0, sizeof(*metadata));
784
785 if (pdev->info.gfx_level >= GFX9) {
786 uint64_t dcc_offset =
787 image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
788 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
789 metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
790 metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
791 metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
792 metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
793 metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size;
794 metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
795 } else {
796 metadata->u.legacy.microtile =
797 surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
798 metadata->u.legacy.macrotile =
799 surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
800 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
801 metadata->u.legacy.bankw = surface->u.legacy.bankw;
802 metadata->u.legacy.bankh = surface->u.legacy.bankh;
803 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
804 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
805 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
806 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
807 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
808 }
809 radv_query_opaque_metadata(device, image, plane_id, metadata);
810 }
811
812 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)813 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
814 uint32_t stride)
815 {
816 const struct radv_physical_device *pdev = radv_device_physical(device);
817 ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers,
818 image->vk.mip_levels, offset, stride);
819 }
820
821 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)822 radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image,
823 struct radeon_surf *surf)
824 {
825 if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 ||
826 image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) ||
827 (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
828 return;
829
830 assert(image->vk.samples == 1);
831
832 surf->cmask_offset = align64(surf->total_size, 1ull << surf->cmask_alignment_log2);
833 surf->total_size = surf->cmask_offset + surf->cmask_size;
834 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
835 }
836
837 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)838 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
839 {
840 const struct radv_physical_device *pdev = radv_device_physical(device);
841
842 /* images with modifiers can be potentially imported */
843 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
844 return;
845
846 if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
847 image->fce_pred_offset = image->size;
848 image->size += 8 * image->vk.mip_levels;
849 }
850
851 if (radv_image_use_dcc_predication(device, image)) {
852 image->dcc_pred_offset = image->size;
853 image->size += 8 * image->vk.mip_levels;
854 }
855
856 if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) ||
857 radv_image_has_htile(image)) {
858 image->clear_value_offset = image->size;
859 image->size += 8 * image->vk.mip_levels;
860 }
861
862 if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) {
863 /* Metadata for the TC-compatible HTILE hardware bug which
864 * have to be fixed by updating ZRANGE_PRECISION when doing
865 * fast depth clears to 0.0f.
866 */
867 image->tc_compat_zrange_offset = image->size;
868 image->size += image->vk.mip_levels * 4;
869 }
870 }
871
872 /* Determine if the image is affected by the pipe misaligned metadata issue
873 * which requires to invalidate L2.
874 */
875 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)876 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
877 {
878 const struct radv_physical_device *pdev = radv_device_physical(device);
879 const struct radeon_info *gpu_info = &pdev->info;
880 int log2_samples = util_logbase2(image->vk.samples);
881
882 assert(gpu_info->gfx_level >= GFX10);
883
884 for (unsigned i = 0; i < image->plane_count; ++i) {
885 VkFormat fmt = radv_image_get_plane_format(pdev, image, i);
886 int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
887 int log2_bpp_and_samples;
888
889 if (gpu_info->gfx_level >= GFX10_3) {
890 log2_bpp_and_samples = log2_bpp + log2_samples;
891 } else {
892 if (vk_format_has_depth(image->vk.format) && image->vk.array_layers >= 8) {
893 log2_bpp = 2;
894 }
895
896 log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
897 }
898
899 int num_pipes = G_0098F8_NUM_PIPES(gpu_info->gb_addr_config);
900 int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
901
902 if (vk_format_has_depth(image->vk.format)) {
903 if (radv_image_is_tc_compat_htile(image) && overlap) {
904 return true;
905 }
906 } else {
907 int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(gpu_info->gb_addr_config);
908 int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
909 int samples_overlap = MIN2(log2_samples, overlap);
910
911 /* TODO: It shouldn't be necessary if the image has DCC but
912 * not readable by shader.
913 */
914 if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
915 (samples_overlap > log2_samples_frag_diff)) {
916 return true;
917 }
918 }
919 }
920
921 return false;
922 }
923
924 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)925 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
926 {
927 const struct radv_physical_device *pdev = radv_device_physical(device);
928
929 if (pdev->info.gfx_level >= GFX12) {
930 return true; /* Everything is coherent with TC L2. */
931 } else if (pdev->info.gfx_level >= GFX10) {
932 return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
933 } else if (pdev->info.gfx_level == GFX9) {
934 if (image->vk.samples == 1 &&
935 (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
936 !vk_format_has_stencil(image->vk.format)) {
937 /* Single-sample color and single-sample depth
938 * (not stencil) are coherent with shaders on
939 * GFX9.
940 */
941 return true;
942 }
943 }
944
945 return false;
946 }
947
948 /**
949 * Determine if the given image can be fast cleared.
950 */
951 bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)952 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
953 {
954 const struct radv_physical_device *pdev = radv_device_physical(device);
955 const struct radv_instance *instance = radv_physical_device_instance(pdev);
956
957 if (instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
958 return false;
959
960 if (vk_format_is_color(image->vk.format)) {
961 if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
962 return false;
963
964 /* RB+ doesn't work with CMASK fast clear on Stoney. */
965 if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY)
966 return false;
967
968 /* Fast-clears with CMASK aren't supported for 128-bit formats. */
969 if (radv_image_has_cmask(image) && vk_format_get_blocksizebits(image->vk.format) > 64)
970 return false;
971 } else {
972 if (!radv_image_has_htile(image))
973 return false;
974 }
975
976 /* Do not fast clears 3D images. */
977 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
978 return false;
979
980 return true;
981 }
982
983 /**
984 * Determine if the given image can be fast cleared using comp-to-single.
985 */
986 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)987 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
988 {
989 const struct radv_physical_device *pdev = radv_device_physical(device);
990
991 /* comp-to-single is only available for GFX10+. */
992 if (pdev->info.gfx_level < GFX10)
993 return false;
994
995 /* If the image can't be fast cleared, comp-to-single can't be used. */
996 if (!radv_image_can_fast_clear(device, image))
997 return false;
998
999 /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1000 if (!radv_image_has_dcc(image))
1001 return false;
1002
1003 /* It seems 8bpp and 16bpp require RB+ to work. */
1004 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1005 if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed)
1006 return false;
1007
1008 return true;
1009 }
1010
1011 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)1012 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
1013 {
1014 if (radv_is_format_emulated(pdev, fmt))
1015 return 2;
1016 return vk_format_get_plane_count(fmt);
1017 }
1018
1019 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)1020 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
1021 {
1022 image->size = 0;
1023 image->alignment = 1;
1024
1025 image->tc_compatible_cmask = 0;
1026 image->fce_pred_offset = image->dcc_pred_offset = 0;
1027 image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1028
1029 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1030 for (unsigned i = 0; i < plane_count; ++i) {
1031 VkFormat format = radv_image_get_plane_format(pdev, image, i);
1032 if (vk_format_has_depth(format))
1033 format = vk_format_depth_only(format);
1034
1035 uint64_t flags = image->planes[i].surface.flags;
1036 uint64_t modifier = image->planes[i].surface.modifier;
1037 memset(image->planes + i, 0, sizeof(image->planes[i]));
1038
1039 image->planes[i].surface.flags = flags;
1040 image->planes[i].surface.modifier = modifier;
1041 image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1042 image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1043 image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1044
1045 /* align byte per element on dword */
1046 if (image->planes[i].surface.bpe == 3) {
1047 image->planes[i].surface.bpe = 4;
1048 }
1049 }
1050 }
1051
1052 struct ac_surf_info
radv_get_ac_surf_info(struct radv_device * device,const struct radv_image * image)1053 radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image)
1054 {
1055 struct ac_surf_info info;
1056
1057 memset(&info, 0, sizeof(info));
1058
1059 info.width = image->vk.extent.width;
1060 info.height = image->vk.extent.height;
1061 info.depth = image->vk.extent.depth;
1062 info.samples = image->vk.samples;
1063 info.storage_samples = image->vk.samples;
1064 info.array_size = image->vk.array_layers;
1065 info.levels = image->vk.mip_levels;
1066 info.num_channels = vk_format_get_nr_components(image->vk.format);
1067
1068 if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable &&
1069 !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) &&
1070 image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1071 info.surf_index = &device->image_mrt_offset_counter;
1072 }
1073
1074 return info;
1075 }
1076
1077 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,const struct VkVideoProfileListInfoKHR * profile_list,struct radv_image * image)1078 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1079 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1080 const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
1081 {
1082 struct radv_physical_device *pdev = radv_device_physical(device);
1083
1084 /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1085 * common internal case. */
1086 create_info.vk_info = NULL;
1087
1088 struct ac_surf_info image_info = radv_get_ac_surf_info(device, image);
1089 VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1090 if (result != VK_SUCCESS)
1091 return result;
1092
1093 assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1094
1095 radv_image_reset_layout(pdev, image);
1096
1097 /*
1098 * Due to how the decoder works, the user can't supply an oversized image, because if it attempts
1099 * to sample it later with a linear filter, it will get garbage after the height it wants,
1100 * so we let the user specify the width/height unaligned, and align them preallocation.
1101 */
1102 if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
1103 VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
1104 VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) {
1105 if (!device->vk.enabled_features.videoMaintenance1)
1106 assert(profile_list);
1107 uint32_t width_align, height_align;
1108 radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align);
1109 image_info.width = align(image_info.width, width_align);
1110 image_info.height = align(image_info.height, height_align);
1111
1112 if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
1113 /* UVD and kernel demand a full DPB allocation. */
1114 image_info.array_size = MIN2(16, image_info.array_size);
1115 }
1116 }
1117
1118 unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1119 for (unsigned plane = 0; plane < plane_count; ++plane) {
1120 struct ac_surf_info info = image_info;
1121 uint64_t offset;
1122 unsigned stride;
1123
1124 info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1125 info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1126
1127 if (create_info.no_metadata_planes || plane_count > 1) {
1128 image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1129 }
1130
1131 device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1132
1133 if (plane == 0) {
1134 if (!radv_use_dcc_for_image_late(device, image))
1135 ac_surface_zero_dcc_fields(&image->planes[0].surface);
1136 }
1137
1138 if (create_info.bo_metadata && !mod_info &&
1139 !ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
1140 image->vk.mip_levels, create_info.bo_metadata->size_metadata,
1141 create_info.bo_metadata->metadata))
1142 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1143
1144 if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
1145 radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1146
1147 if (mod_info) {
1148 if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1149 !mod_info->pPlaneLayouts[plane].rowPitch)
1150 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1151
1152 offset = mod_info->pPlaneLayouts[plane].offset;
1153 stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1154 } else {
1155 offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
1156 stride = 0; /* 0 means no override */
1157 }
1158
1159 if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers,
1160 image->vk.mip_levels, offset, stride))
1161 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1162
1163 /* Validate DCC offsets in modifier layout. */
1164 if (plane_count == 1 && mod_info) {
1165 unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1166 if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1167 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1168
1169 for (unsigned i = 1; i < mem_planes; ++i) {
1170 if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) !=
1171 mod_info->pPlaneLayouts[i].offset)
1172 return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1173 }
1174 }
1175
1176 image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1177 image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1178
1179 image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane);
1180 }
1181
1182 image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1183
1184 image->l2_coherent = radv_image_is_l2_coherent(device, image);
1185
1186 image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1187
1188 radv_image_alloc_values(device, image);
1189
1190 assert(image->planes[0].surface.surf_size);
1191 assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1192 ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1193 return VK_SUCCESS;
1194 }
1195
1196 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1197 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image)
1198 {
1199 struct radv_physical_device *pdev = radv_device_physical(device);
1200 struct radv_instance *instance = radv_physical_device_instance(pdev);
1201
1202 if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo)
1203 radv_bo_destroy(device, &image->vk.base, image->bindings[0].bo);
1204
1205 if (image->owned_memory != VK_NULL_HANDLE) {
1206 VK_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1207 radv_free_memory(device, pAllocator, mem);
1208 }
1209
1210 for (uint32_t i = 0; i < ARRAY_SIZE(image->bindings); i++) {
1211 if (!image->bindings[i].bo_va)
1212 continue;
1213
1214 vk_address_binding_report(&instance->vk, &image->vk.base, image->bindings[i].bo_va + image->bindings[i].offset,
1215 image->bindings[i].range, VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT);
1216 }
1217
1218 radv_rmv_log_resource_destroy(device, (uint64_t)radv_image_to_handle(image));
1219 vk_image_finish(&image->vk);
1220 vk_free2(&device->vk.alloc, pAllocator, image);
1221 }
1222
1223 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1224 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1225 {
1226 const struct radv_physical_device *pdev = radv_device_physical(device);
1227
1228 fprintf(stderr, "Image:\n");
1229 fprintf(stderr,
1230 " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1231 "width=%" PRIu32 ", height=%" PRIu32 ", depth=%" PRIu32 ", "
1232 "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1233 image->size, image->alignment, image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth,
1234 image->vk.array_layers, image->vk.mip_levels);
1235 for (unsigned i = 0; i < image->plane_count; ++i) {
1236 const struct radv_image_plane *plane = &image->planes[i];
1237 const struct radeon_surf *surf = &plane->surface;
1238 const struct util_format_description *desc = vk_format_description(plane->format);
1239 uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0);
1240
1241 fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1242
1243 ac_surface_print_info(stderr, &pdev->info, surf);
1244 }
1245 }
1246
1247 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1248 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1249 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1250 {
1251 const struct radv_physical_device *pdev = radv_device_physical(dev);
1252 unsigned mod_count;
1253
1254 assert(mod_list->drmFormatModifierCount);
1255
1256 /* We can allow everything here as it does not affect order and the application
1257 * is only allowed to specify modifiers that we support. */
1258 const struct ac_modifier_options modifier_options = {
1259 .dcc = true,
1260 .dcc_retile = true,
1261 };
1262
1263 ac_get_supported_modifiers(&pdev->info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL);
1264
1265 uint64_t *mods = calloc(mod_count, sizeof(*mods));
1266
1267 /* If allocations fail, fall back to a dumber solution. */
1268 if (!mods)
1269 return mod_list->pDrmFormatModifiers[0];
1270
1271 ac_get_supported_modifiers(&pdev->info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods);
1272
1273 for (unsigned i = 0; i < mod_count; ++i) {
1274 for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1275 if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1276 free(mods);
1277 return mod_list->pDrmFormatModifiers[j];
1278 }
1279 }
1280 }
1281 unreachable("App specified an invalid modifier");
1282 }
1283
1284 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage,bool is_internal)1285 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1286 const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
1287 {
1288 VK_FROM_HANDLE(radv_device, device, _device);
1289 const struct radv_physical_device *pdev = radv_device_physical(device);
1290 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1291 const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1292 uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1293 struct radv_image *image = NULL;
1294 VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1295 const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1296 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1297 const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1298 vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1299 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1300 const struct VkVideoProfileListInfoKHR *profile_list =
1301 vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
1302
1303 unsigned plane_count = radv_get_internal_plane_count(pdev, format);
1304
1305 const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1306
1307 image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1308 if (!image)
1309 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1310
1311 vk_image_init(&device->vk, &image->vk, pCreateInfo);
1312
1313 image->plane_count = vk_format_get_plane_count(format);
1314 image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1315
1316 image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1317 if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1318 for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1319 if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1320 pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1321 image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1322 else
1323 image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]);
1324
1325 /* This queue never really accesses the image. */
1326 image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
1327 }
1328
1329 const VkExternalMemoryImageCreateInfo *external_info =
1330 vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1331
1332 image->shareable = external_info;
1333
1334 if (mod_list)
1335 modifier = radv_select_modifier(device, format, mod_list);
1336 else if (explicit_mod)
1337 modifier = explicit_mod->drmFormatModifier;
1338
1339 for (unsigned plane = 0; plane < plane_count; ++plane) {
1340 image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1341 image->planes[plane].surface.modifier = modifier;
1342 }
1343
1344 if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
1345 #if DETECT_OS_ANDROID
1346 image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format);
1347 #endif
1348
1349 *pImage = radv_image_to_handle(image);
1350 assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1351 return VK_SUCCESS;
1352 }
1353
1354 VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, profile_list, image);
1355 if (result != VK_SUCCESS) {
1356 radv_destroy_image(device, alloc, image);
1357 return result;
1358 }
1359
1360 if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1361 image->alignment = MAX2(image->alignment, 4096);
1362 image->size = align64(image->size, image->alignment);
1363 image->bindings[0].offset = 0;
1364
1365 result = radv_bo_create(device, &image->vk.base, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL,
1366 RADV_BO_PRIORITY_VIRTUAL, 0, true, &image->bindings[0].bo);
1367 if (result != VK_SUCCESS) {
1368 radv_destroy_image(device, alloc, image);
1369 return vk_error(device, result);
1370 }
1371 }
1372
1373 if (instance->debug_flags & RADV_DEBUG_IMG) {
1374 radv_image_print_info(device, image);
1375 }
1376
1377 *pImage = radv_image_to_handle(image);
1378
1379 radv_rmv_log_image_create(device, pCreateInfo, is_internal, *pImage);
1380 if (image->bindings[0].bo)
1381 radv_rmv_log_image_bind(device, 0, *pImage);
1382 return VK_SUCCESS;
1383 }
1384
1385 unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1386 radv_plane_from_aspect(VkImageAspectFlags mask)
1387 {
1388 switch (mask) {
1389 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1390 case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1391 return 1;
1392 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1393 case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1394 return 2;
1395 case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1396 return 3;
1397 default:
1398 return 0;
1399 }
1400 }
1401
1402 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1403 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1404 {
1405 switch (mask) {
1406 case VK_IMAGE_ASPECT_PLANE_0_BIT:
1407 return image->planes[0].format;
1408 case VK_IMAGE_ASPECT_PLANE_1_BIT:
1409 return image->planes[1].format;
1410 case VK_IMAGE_ASPECT_PLANE_2_BIT:
1411 return image->planes[2].format;
1412 case VK_IMAGE_ASPECT_STENCIL_BIT:
1413 return vk_format_stencil_only(image->vk.format);
1414 case VK_IMAGE_ASPECT_DEPTH_BIT:
1415 return vk_format_depth_only(image->vk.format);
1416 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1417 return vk_format_depth_only(image->vk.format);
1418 default:
1419 return image->vk.format;
1420 }
1421 }
1422
1423 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1424 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1425 unsigned queue_mask)
1426 {
1427 const struct radv_physical_device *pdev = radv_device_physical(device);
1428 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1429
1430 /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
1431 * Note that HTILE is already disabled on concurrent images when not supported.
1432 */
1433 if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
1434 return false;
1435
1436 switch (layout) {
1437 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1438 case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
1439 case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
1440 case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
1441 return radv_image_has_htile(image);
1442 case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1443 return radv_image_is_tc_compat_htile(image) ||
1444 (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
1445 case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
1446 case VK_IMAGE_LAYOUT_GENERAL:
1447 /* It should be safe to enable TC-compat HTILE with
1448 * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
1449 * if the image doesn't have the storage bit set. This
1450 * improves performance for apps that use GENERAL for the main
1451 * depth pass because this allows compression and this reduces
1452 * the number of decompressions from/to GENERAL.
1453 */
1454 if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
1455 !instance->drirc.disable_tc_compat_htile_in_general) {
1456 return true;
1457 } else {
1458 return false;
1459 }
1460 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1461 /* Do not compress HTILE with feedback loops because we can't read&write it without
1462 * introducing corruption.
1463 */
1464 return false;
1465 case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
1466 case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
1467 if (radv_image_is_tc_compat_htile(image) ||
1468 (radv_image_has_htile(image) &&
1469 !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
1470 /* Keep HTILE compressed if the image is only going to
1471 * be used as a depth/stencil read-only attachment.
1472 */
1473 return true;
1474 } else {
1475 return false;
1476 }
1477 break;
1478 default:
1479 return radv_image_is_tc_compat_htile(image);
1480 }
1481 }
1482
1483 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1484 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
1485 VkImageLayout layout, unsigned queue_mask)
1486 {
1487 if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
1488 return false;
1489
1490 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1491 return false;
1492
1493 if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
1494 return false;
1495
1496 /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
1497 * images can only be fast-cleared if comp-to-single is supported because we don't yet support
1498 * FCE on the compute queue.
1499 */
1500 return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
1501 }
1502
1503 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1504 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
1505 VkImageLayout layout, unsigned queue_mask)
1506 {
1507 const struct radv_physical_device *pdev = radv_device_physical(device);
1508
1509 if (!radv_dcc_enabled(image, level))
1510 return false;
1511
1512 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
1513 return true;
1514
1515 /* If the image is read-only, we can always just keep it compressed */
1516 if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1517 return true;
1518
1519 /* Don't compress compute transfer dst when image stores are not supported. */
1520 if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
1521 (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
1522 return false;
1523
1524 /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
1525 * Note that DCC is already disabled on concurrent images when not supported.
1526 */
1527 if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
1528 return false;
1529
1530 if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
1531 /* Do not compress DCC with feedback loops because we can't read&write it without introducing
1532 * corruption.
1533 */
1534 return false;
1535 }
1536
1537 return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
1538 }
1539
1540 enum radv_fmask_compression
radv_layout_fmask_compression(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1541 radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1542 unsigned queue_mask)
1543 {
1544 if (!radv_image_has_fmask(image))
1545 return RADV_FMASK_COMPRESSION_NONE;
1546
1547 if (layout == VK_IMAGE_LAYOUT_GENERAL)
1548 return RADV_FMASK_COMPRESSION_NONE;
1549
1550 /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
1551 * expanded before.
1552 */
1553 if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1554 return RADV_FMASK_COMPRESSION_NONE;
1555
1556 /* Compress images if TC-compat CMASK is enabled. */
1557 if (radv_image_is_tc_compat_cmask(image))
1558 return RADV_FMASK_COMPRESSION_FULL;
1559
1560 switch (layout) {
1561 case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1562 case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1563 /* Don't compress images but no need to expand FMASK. */
1564 return RADV_FMASK_COMPRESSION_PARTIAL;
1565 case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1566 /* Don't compress images that are in feedback loops. */
1567 return RADV_FMASK_COMPRESSION_NONE;
1568 default:
1569 /* Don't compress images that are concurrent. */
1570 return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
1571 }
1572 }
1573
1574 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)1575 radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
1576 enum radv_queue_family queue_family)
1577 {
1578 if (!image->exclusive)
1579 return image->queue_family_mask;
1580 if (family == RADV_QUEUE_FOREIGN)
1581 return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
1582 if (family == RADV_QUEUE_IGNORED)
1583 return 1u << queue_family;
1584 return 1u << family;
1585 }
1586
1587 bool
radv_image_is_renderable(const struct radv_device * device,const struct radv_image * image)1588 radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
1589 {
1590 const struct radv_physical_device *pdev = radv_device_physical(device);
1591
1592 if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1593 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
1594 return false;
1595
1596 if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
1597 vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
1598 return false;
1599
1600 if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET)
1601 return false;
1602
1603 return true;
1604 }
1605
1606 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1607 radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
1608 VkImage *pImage)
1609 {
1610 #if DETECT_OS_ANDROID
1611 const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1612
1613 if (gralloc_info)
1614 return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
1615 #endif
1616
1617 #ifdef RADV_USE_WSI_PLATFORM
1618 /* Ignore swapchain creation info on Android. Since we don't have an implementation in Mesa,
1619 * we're guaranteed to access an Android object incorrectly.
1620 */
1621 VK_FROM_HANDLE(radv_device, device, _device);
1622 const struct radv_physical_device *pdev = radv_device_physical(device);
1623 const VkImageSwapchainCreateInfoKHR *swapchain_info =
1624 vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
1625 if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1626 return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
1627 }
1628 #endif
1629
1630 const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1631 bool scanout = wsi_info && wsi_info->scanout;
1632 bool prime_blit_src = wsi_info && wsi_info->blit_src;
1633
1634 return radv_image_create(_device,
1635 &(struct radv_image_create_info){
1636 .vk_info = pCreateInfo,
1637 .scanout = scanout,
1638 .prime_blit_src = prime_blit_src,
1639 },
1640 pAllocator, pImage, false);
1641 }
1642
1643 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1644 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
1645 {
1646 VK_FROM_HANDLE(radv_device, device, _device);
1647 VK_FROM_HANDLE(radv_image, image, _image);
1648
1649 if (!image)
1650 return;
1651
1652 radv_destroy_image(device, pAllocator, image);
1653 }
1654
1655 static void
radv_bind_image_memory(struct radv_device * device,struct radv_image * image,uint32_t bind_idx,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t range)1656 radv_bind_image_memory(struct radv_device *device, struct radv_image *image, uint32_t bind_idx,
1657 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t range)
1658 {
1659 struct radv_physical_device *pdev = radv_device_physical(device);
1660 struct radv_instance *instance = radv_physical_device_instance(pdev);
1661
1662 assert(bind_idx < 3);
1663
1664 image->bindings[bind_idx].bo = bo;
1665 image->bindings[bind_idx].offset = offset;
1666 image->bindings[bind_idx].bo_va = radv_buffer_get_va(bo);
1667 image->bindings[bind_idx].range = range;
1668
1669 radv_rmv_log_image_bind(device, bind_idx, radv_image_to_handle(image));
1670
1671 vk_address_binding_report(&instance->vk, &image->vk.base,
1672 image->bindings[bind_idx].bo_va + image->bindings[bind_idx].offset,
1673 image->bindings[bind_idx].range, VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT);
1674 }
1675
1676 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)1677 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos)
1678 {
1679 VK_FROM_HANDLE(radv_device, device, _device);
1680
1681 for (uint32_t i = 0; i < bindInfoCount; ++i) {
1682 VK_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
1683 VK_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
1684 VkBindMemoryStatusKHR *status = (void *)vk_find_struct_const(&pBindInfos[i], BIND_MEMORY_STATUS_KHR);
1685
1686 if (status)
1687 *status->pResult = VK_SUCCESS;
1688
1689 /* Ignore this struct on Android, we cannot access swapchain structures there. */
1690 #ifdef RADV_USE_WSI_PLATFORM
1691 const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
1692 vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
1693
1694 if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1695 struct radv_image *swapchain_img =
1696 radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex));
1697
1698 radv_bind_image_memory(device, image, 0,
1699 swapchain_img->bindings[0].bo, swapchain_img->bindings[0].offset,
1700 swapchain_img->bindings[0].range);
1701 continue;
1702 }
1703 #endif
1704
1705 const VkBindImagePlaneMemoryInfo *plane_info = NULL;
1706 uint32_t bind_idx = 0;
1707
1708 if (image->disjoint) {
1709 plane_info = vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
1710 bind_idx = radv_plane_from_aspect(plane_info->planeAspect);
1711 }
1712
1713 VkImagePlaneMemoryRequirementsInfo plane = {
1714 .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
1715 .planeAspect = plane_info ? plane_info->planeAspect : 0,
1716 };
1717 VkImageMemoryRequirementsInfo2 info = {
1718 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1719 .pNext = image->disjoint ? &plane : NULL,
1720 .image = pBindInfos[i].image,
1721 };
1722 VkMemoryRequirements2 reqs = {
1723 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1724 };
1725
1726 radv_GetImageMemoryRequirements2(_device, &info, &reqs);
1727
1728 if (mem->alloc_size) {
1729 if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
1730 if (status)
1731 *status->pResult = VK_ERROR_UNKNOWN;
1732 return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n");
1733 }
1734 }
1735
1736 radv_bind_image_memory(device, image, bind_idx, mem->bo, pBindInfos[i].memoryOffset,
1737 reqs.memoryRequirements.size);
1738 }
1739 return VK_SUCCESS;
1740 }
1741
1742 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout2KHR(VkDevice _device,VkImage _image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)1743 radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkImageSubresource2KHR *pSubresource,
1744 VkSubresourceLayout2KHR *pLayout)
1745 {
1746 VK_FROM_HANDLE(radv_image, image, _image);
1747 VK_FROM_HANDLE(radv_device, device, _device);
1748 const struct radv_physical_device *pdev = radv_device_physical(device);
1749 int level = pSubresource->imageSubresource.mipLevel;
1750 int layer = pSubresource->imageSubresource.arrayLayer;
1751
1752 const unsigned plane_count = vk_format_get_plane_count(image->vk.format);
1753 unsigned plane_id = 0;
1754 if (plane_count > 1)
1755 plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1756
1757 struct radv_image_plane *plane = &image->planes[plane_id];
1758 struct radeon_surf *surface = &plane->surface;
1759
1760 if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && plane_count == 1) {
1761 unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1762
1763 assert(level == 0);
1764 assert(layer == 0);
1765
1766 pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0);
1767 pLayout->subresourceLayout.rowPitch =
1768 ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level);
1769 pLayout->subresourceLayout.arrayPitch = 0;
1770 pLayout->subresourceLayout.depthPitch = 0;
1771 pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
1772 } else if (pdev->info.gfx_level >= GFX9) {
1773 uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1774
1775 pLayout->subresourceLayout.offset =
1776 ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset;
1777 if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1778 image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1779 /* Adjust the number of bytes between each row because
1780 * the pitch is actually the number of components per
1781 * row.
1782 */
1783 pLayout->subresourceLayout.rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1784 } else {
1785 uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1786
1787 assert(util_is_power_of_two_nonzero(surface->bpe));
1788 pLayout->subresourceLayout.rowPitch = pitch * surface->bpe;
1789 }
1790
1791 pLayout->subresourceLayout.arrayPitch = surface->u.gfx9.surf_slice_size;
1792 pLayout->subresourceLayout.depthPitch = surface->u.gfx9.surf_slice_size;
1793 pLayout->subresourceLayout.size = surface->u.gfx9.surf_slice_size;
1794 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1795 pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1796 } else {
1797 pLayout->subresourceLayout.offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
1798 (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1799 pLayout->subresourceLayout.rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1800 pLayout->subresourceLayout.arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1801 pLayout->subresourceLayout.depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1802 pLayout->subresourceLayout.size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1803 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1804 pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1805 }
1806
1807 VkImageCompressionPropertiesEXT *image_compression_props =
1808 vk_find_struct(pLayout->pNext, IMAGE_COMPRESSION_PROPERTIES_EXT);
1809 if (image_compression_props) {
1810 image_compression_props->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT;
1811
1812 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1813 image_compression_props->imageCompressionFlags =
1814 radv_image_has_htile(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1815 } else {
1816 image_compression_props->imageCompressionFlags =
1817 radv_image_has_dcc(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1818 }
1819 }
1820 }
1821
1822 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)1823 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
1824 VkImageDrmFormatModifierPropertiesEXT *pProperties)
1825 {
1826 VK_FROM_HANDLE(radv_image, image, _image);
1827
1828 pProperties->drmFormatModifier = image->planes[0].surface.modifier;
1829 return VK_SUCCESS;
1830 }
1831