xref: /aosp_15_r20/external/mesa3d/src/amd/vulkan/radv_image.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include "radv_image.h"
12 #include "util/u_atomic.h"
13 #include "util/u_debug.h"
14 #include "ac_drm_fourcc.h"
15 #include "ac_formats.h"
16 #include "radv_android.h"
17 #include "radv_buffer.h"
18 #include "radv_buffer_view.h"
19 #include "radv_debug.h"
20 #include "radv_device_memory.h"
21 #include "radv_entrypoints.h"
22 #include "radv_formats.h"
23 #include "radv_image_view.h"
24 #include "radv_radeon_winsys.h"
25 #include "radv_rmv.h"
26 #include "radv_video.h"
27 #include "radv_wsi.h"
28 #include "sid.h"
29 #include "vk_debug_utils.h"
30 #include "vk_format.h"
31 #include "vk_log.h"
32 #include "vk_render_pass.h"
33 #include "vk_util.h"
34 
35 #include "gfx10_format_table.h"
36 
37 static unsigned
radv_choose_tiling(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)38 radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
39 {
40    const struct radv_physical_device *pdev = radv_device_physical(device);
41 
42    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
43       assert(pCreateInfo->samples <= 1);
44       return RADEON_SURF_MODE_LINEAR_ALIGNED;
45    }
46 
47    if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR | VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR))
48       return RADEON_SURF_MODE_LINEAR_ALIGNED;
49 
50    if (pCreateInfo->usage & (VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR | VK_IMAGE_USAGE_VIDEO_ENCODE_SRC_BIT_KHR))
51       return RADEON_SURF_MODE_LINEAR_ALIGNED;
52 
53    /* MSAA resources must be 2D tiled. */
54    if (pCreateInfo->samples > 1)
55       return RADEON_SURF_MODE_2D;
56 
57    if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) && pdev->info.gfx_level <= GFX8) {
58       /* this causes hangs in some VK CTS tests on GFX9. */
59       /* Textures with a very small height are recommended to be linear. */
60       if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
61           /* Only very thin and long 2D textures should benefit from
62            * linear_aligned. */
63           (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
64          return RADEON_SURF_MODE_LINEAR_ALIGNED;
65    }
66 
67    return RADEON_SURF_MODE_2D;
68 }
69 
70 static bool
radv_use_tc_compat_htile_for_image(struct radv_device * device,const VkImageCreateInfo * pCreateInfo,VkFormat format)71 radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo, VkFormat format)
72 {
73    const struct radv_physical_device *pdev = radv_device_physical(device);
74 
75    if (!pdev->info.has_tc_compatible_htile)
76       return false;
77 
78    /* TC-compat HTILE looks broken on Tonga (and Iceland is the same design) and the documented bug
79     * workarounds don't help.
80     */
81    if (pdev->info.family == CHIP_TONGA || pdev->info.family == CHIP_ICELAND)
82       return false;
83 
84    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
85       return false;
86 
87    /* Do not enable TC-compatible HTILE if the image isn't readable by a
88     * shader because no texture fetches will happen.
89     */
90    if (!(pCreateInfo->usage &
91          (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
92       return false;
93 
94    if (pdev->info.gfx_level < GFX9) {
95       /* TC-compat HTILE for MSAA depth/stencil images is broken
96        * on GFX8 because the tiling doesn't match.
97        */
98       if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
99          return false;
100 
101       /* GFX9+ supports compression for both 32-bit and 16-bit depth
102        * surfaces, while GFX8 only supports 32-bit natively. Though,
103        * the driver allows TC-compat HTILE for 16-bit depth surfaces
104        * with no Z planes compression.
105        */
106       if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT && format != VK_FORMAT_D16_UNORM)
107          return false;
108 
109       /* TC-compat HTILE for layered images can have interleaved slices (see sliceInterleaved flag
110        * in addrlib).  radv_clear_htile does not work.
111        */
112       if (pCreateInfo->arrayLayers > 1)
113          return false;
114    }
115 
116    /* GFX9 has issues when the sample count is 4 and the format is D16 */
117    if (pdev->info.gfx_level == GFX9 && pCreateInfo->samples == 4 && format == VK_FORMAT_D16_UNORM)
118       return false;
119 
120    return true;
121 }
122 
123 static bool
radv_surface_has_scanout(struct radv_device * device,const struct radv_image_create_info * info)124 radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
125 {
126    const struct radv_physical_device *pdev = radv_device_physical(device);
127 
128    if (info->bo_metadata) {
129       if (pdev->info.gfx_level >= GFX9)
130          return info->bo_metadata->u.gfx9.scanout;
131       else
132          return info->bo_metadata->u.legacy.scanout;
133    }
134 
135    return info->scanout;
136 }
137 
138 static bool
radv_image_use_fast_clear_for_image_early(const struct radv_device * device,const struct radv_image * image)139 radv_image_use_fast_clear_for_image_early(const struct radv_device *device, const struct radv_image *image)
140 {
141    const struct radv_physical_device *pdev = radv_device_physical(device);
142    const struct radv_instance *instance = radv_physical_device_instance(pdev);
143 
144    if (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
145       return true;
146 
147    if (image->vk.samples <= 1 && image->vk.extent.width * image->vk.extent.height <= 512 * 512) {
148       /* Do not enable CMASK or DCC for small surfaces where the cost
149        * of the eliminate pass can be higher than the benefit of fast
150        * clear. RadeonSI does this, but the image threshold is
151        * different.
152        */
153       return false;
154    }
155 
156    return !!(image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
157 }
158 
159 static bool
radv_image_use_fast_clear_for_image(const struct radv_device * device,const struct radv_image * image)160 radv_image_use_fast_clear_for_image(const struct radv_device *device, const struct radv_image *image)
161 {
162    const struct radv_physical_device *pdev = radv_device_physical(device);
163    const struct radv_instance *instance = radv_physical_device_instance(pdev);
164 
165    if (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
166       return true;
167 
168    return radv_image_use_fast_clear_for_image_early(device, image) && (image->exclusive ||
169                                                                        /* Enable DCC for concurrent images if stores are
170                                                                         * supported because that means we can keep DCC
171                                                                         * compressed on all layouts/queues.
172                                                                         */
173                                                                        radv_image_use_dcc_image_stores(device, image));
174 }
175 
176 bool
radv_are_formats_dcc_compatible(const struct radv_physical_device * pdev,const void * pNext,VkFormat format,VkImageCreateFlags flags,bool * sign_reinterpret)177 radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext, VkFormat format,
178                                 VkImageCreateFlags flags, bool *sign_reinterpret)
179 {
180    if (!radv_is_colorbuffer_format_supported(pdev, format))
181       return false;
182 
183    if (sign_reinterpret != NULL)
184       *sign_reinterpret = false;
185 
186    /* All formats are compatible on GFX11. */
187    if ((flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) && pdev->info.gfx_level < GFX11) {
188       const struct VkImageFormatListCreateInfo *format_list =
189          (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
190 
191       /* We have to ignore the existence of the list if viewFormatCount = 0 */
192       if (format_list && format_list->viewFormatCount) {
193          /* compatibility is transitive, so we only need to check
194           * one format with everything else. */
195          for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
196             if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
197                continue;
198 
199             if (!radv_dcc_formats_compatible(pdev->info.gfx_level, format, format_list->pViewFormats[i],
200                                              sign_reinterpret))
201                return false;
202          }
203       } else {
204          return false;
205       }
206    }
207 
208    return true;
209 }
210 
211 static bool
radv_format_is_atomic_allowed(struct radv_device * device,VkFormat format)212 radv_format_is_atomic_allowed(struct radv_device *device, VkFormat format)
213 {
214    if (format == VK_FORMAT_R32_SFLOAT && !radv_uses_image_float32_atomics(device))
215       return false;
216 
217    return radv_is_atomic_format_supported(format);
218 }
219 
220 static bool
radv_formats_is_atomic_allowed(struct radv_device * device,const void * pNext,VkFormat format,VkImageCreateFlags flags)221 radv_formats_is_atomic_allowed(struct radv_device *device, const void *pNext, VkFormat format, VkImageCreateFlags flags)
222 {
223    if (radv_format_is_atomic_allowed(device, format))
224       return true;
225 
226    if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
227       const struct VkImageFormatListCreateInfo *format_list =
228          (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
229 
230       /* We have to ignore the existence of the list if viewFormatCount = 0 */
231       if (format_list && format_list->viewFormatCount) {
232          for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
233             if (radv_format_is_atomic_allowed(device, format_list->pViewFormats[i]))
234                return true;
235          }
236       }
237    }
238 
239    return false;
240 }
241 
242 static bool
radv_use_dcc_for_image_early(struct radv_device * device,struct radv_image * image,const VkImageCreateInfo * pCreateInfo,VkFormat format,bool * sign_reinterpret)243 radv_use_dcc_for_image_early(struct radv_device *device, struct radv_image *image, const VkImageCreateInfo *pCreateInfo,
244                              VkFormat format, bool *sign_reinterpret)
245 {
246    const struct radv_physical_device *pdev = radv_device_physical(device);
247    const struct radv_instance *instance = radv_physical_device_instance(pdev);
248 
249    /* DCC (Delta Color Compression) is only available for GFX8+. */
250    if (pdev->info.gfx_level < GFX8)
251       return false;
252 
253    const VkImageCompressionControlEXT *compression =
254       vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
255 
256    if (instance->debug_flags & RADV_DEBUG_NO_DCC ||
257        (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT)) {
258       return false;
259    }
260 
261    if (image->shareable && image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
262       return false;
263 
264    /*
265     * TODO: Enable DCC for storage images on GFX9 and earlier.
266     *
267     * Also disable DCC with atomics because even when DCC stores are
268     * supported atomics will always decompress. So if we are
269     * decompressing a lot anyway we might as well not have DCC.
270     */
271    if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
272        (pdev->info.gfx_level < GFX10 ||
273         radv_formats_is_atomic_allowed(device, pCreateInfo->pNext, format, pCreateInfo->flags)))
274       return false;
275 
276    if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
277       return false;
278 
279    if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
280       return false;
281 
282    if (!radv_image_use_fast_clear_for_image_early(device, image) &&
283        image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
284       return false;
285 
286    /* Do not enable DCC for mipmapped arrays because performance is worse. */
287    if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
288       return false;
289 
290    if (pdev->info.gfx_level < GFX10) {
291       /* TODO: Add support for DCC MSAA on GFX8-9. */
292       if (pCreateInfo->samples > 1 && !pdev->dcc_msaa_allowed)
293          return false;
294 
295       /* TODO: Add support for DCC layers/mipmaps on GFX9. */
296       if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) && pdev->info.gfx_level == GFX9)
297          return false;
298    }
299 
300    /* DCC MSAA can't work on GFX10.3 and earlier without FMASK. */
301    if (pCreateInfo->samples > 1 && pdev->info.gfx_level < GFX11 && (instance->debug_flags & RADV_DEBUG_NO_FMASK))
302       return false;
303 
304    return radv_are_formats_dcc_compatible(pdev, pCreateInfo->pNext, format, pCreateInfo->flags, sign_reinterpret);
305 }
306 
307 static bool
radv_use_dcc_for_image_late(struct radv_device * device,struct radv_image * image)308 radv_use_dcc_for_image_late(struct radv_device *device, struct radv_image *image)
309 {
310    if (!radv_image_has_dcc(image))
311       return false;
312 
313    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
314       return true;
315 
316    if (!radv_image_use_fast_clear_for_image(device, image))
317       return false;
318 
319    /* TODO: Fix storage images with DCC without DCC image stores.
320     * Disabling it for now. */
321    if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && !radv_image_use_dcc_image_stores(device, image))
322       return false;
323 
324    return true;
325 }
326 
327 /*
328  * Whether to enable image stores with DCC compression for this image. If
329  * this function returns false the image subresource should be decompressed
330  * before using it with image stores.
331  *
332  * Note that this can have mixed performance implications, see
333  * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
334  *
335  * This function assumes the image uses DCC compression.
336  */
337 bool
radv_image_use_dcc_image_stores(const struct radv_device * device,const struct radv_image * image)338 radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
339 {
340    const struct radv_physical_device *pdev = radv_device_physical(device);
341 
342    return ac_surface_supports_dcc_image_stores(pdev->info.gfx_level, &image->planes[0].surface);
343 }
344 
345 /*
346  * Whether to use a predicate to determine whether DCC is in a compressed
347  * state. This can be used to avoid decompressing an image multiple times.
348  */
349 bool
radv_image_use_dcc_predication(const struct radv_device * device,const struct radv_image * image)350 radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
351 {
352    return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
353 }
354 
355 static inline bool
radv_use_fmask_for_image(const struct radv_device * device,const struct radv_image * image)356 radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
357 {
358    const struct radv_physical_device *pdev = radv_device_physical(device);
359    const struct radv_instance *instance = radv_physical_device_instance(pdev);
360 
361    if (pdev->info.gfx_level == GFX9 && image->vk.array_layers > 1) {
362       /* On GFX9, FMASK can be interleaved with layers and this isn't properly supported. */
363       return false;
364    }
365 
366    return pdev->use_fmask && image->vk.samples > 1 &&
367           ((image->vk.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
368            (instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
369 }
370 
371 static inline bool
radv_use_htile_for_image(const struct radv_device * device,const struct radv_image * image,const VkImageCreateInfo * pCreateInfo)372 radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image,
373                          const VkImageCreateInfo *pCreateInfo)
374 {
375    const struct radv_physical_device *pdev = radv_device_physical(device);
376    const struct radv_instance *instance = radv_physical_device_instance(pdev);
377    const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
378 
379    const VkImageCompressionControlEXT *compression =
380       vk_find_struct_const(pCreateInfo->pNext, IMAGE_COMPRESSION_CONTROL_EXT);
381 
382    if (instance->debug_flags & RADV_DEBUG_NO_HIZ ||
383        (compression && compression->flags == VK_IMAGE_COMPRESSION_DISABLED_EXT))
384       return false;
385 
386    if (image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)
387       return false;
388 
389    /* TODO:
390     * - Investigate about mips+layers.
391     * - Enable on other gens.
392     */
393    bool use_htile_for_mips = image->vk.array_layers == 1 && pdev->info.gfx_level >= GFX10;
394 
395    /* Stencil texturing with HTILE doesn't work with mipmapping on Navi10-14. */
396    if (pdev->info.gfx_level == GFX10 && image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT && image->vk.mip_levels > 1)
397       return false;
398 
399    /* Do not enable HTILE for very small images because it seems less performant but make sure it's
400     * allowed with VRS attachments because we need HTILE on GFX10.3.
401     */
402    if (image->vk.extent.width * image->vk.extent.height < 8 * 8 &&
403        !(instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
404        !(gfx_level == GFX10_3 && device->vk.enabled_features.attachmentFragmentShadingRate))
405       return false;
406 
407    return (image->vk.mip_levels == 1 || use_htile_for_mips) && !image->shareable;
408 }
409 
410 static bool
radv_use_tc_compat_cmask_for_image(struct radv_device * device,struct radv_image * image)411 radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
412 {
413    const struct radv_physical_device *pdev = radv_device_physical(device);
414    const struct radv_instance *instance = radv_physical_device_instance(pdev);
415 
416    /* TC-compat CMASK is only available for GFX8+. */
417    if (pdev->info.gfx_level < GFX8)
418       return false;
419 
420    /* GFX9 has issues when sample count is greater than 2 */
421    if (pdev->info.gfx_level == GFX9 && image->vk.samples > 2)
422       return false;
423 
424    if (instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
425       return false;
426 
427    /* TC-compat CMASK with storage images is supported on GFX10+. */
428    if ((image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) && pdev->info.gfx_level < GFX10)
429       return false;
430 
431    /* Do not enable TC-compatible if the image isn't readable by a shader
432     * because no texture fetches will happen.
433     */
434    if (!(image->vk.usage &
435          (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
436       return false;
437 
438    /* If the image doesn't have FMASK, it can't be fetchable. */
439    if (!radv_image_has_fmask(image))
440       return false;
441 
442    return true;
443 }
444 
445 static uint32_t
radv_get_bo_metadata_word1(const struct radv_device * device)446 radv_get_bo_metadata_word1(const struct radv_device *device)
447 {
448    const struct radv_physical_device *pdev = radv_device_physical(device);
449 
450    return (ATI_VENDOR_ID << 16) | pdev->info.pci_id;
451 }
452 
453 static bool
radv_is_valid_opaque_metadata(const struct radv_device * device,const struct radeon_bo_metadata * md)454 radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
455 {
456    if (md->metadata[0] != 1 || md->metadata[1] != radv_get_bo_metadata_word1(device))
457       return false;
458 
459    if (md->size_metadata < 40)
460       return false;
461 
462    return true;
463 }
464 
465 static void
radv_patch_surface_from_metadata(struct radv_device * device,struct radeon_surf * surface,const struct radeon_bo_metadata * md)466 radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
467                                  const struct radeon_bo_metadata *md)
468 {
469    const struct radv_physical_device *pdev = radv_device_physical(device);
470 
471    surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
472 
473    if (pdev->info.gfx_level >= GFX9) {
474       if (md->u.gfx9.swizzle_mode > 0)
475          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
476       else
477          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
478 
479       surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
480    } else {
481       surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
482       surface->u.legacy.bankw = md->u.legacy.bankw;
483       surface->u.legacy.bankh = md->u.legacy.bankh;
484       surface->u.legacy.tile_split = md->u.legacy.tile_split;
485       surface->u.legacy.mtilea = md->u.legacy.mtilea;
486       surface->u.legacy.num_banks = md->u.legacy.num_banks;
487 
488       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
489          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
490       else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
491          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
492       else
493          surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
494    }
495 }
496 
497 static VkResult
radv_patch_image_dimensions(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)498 radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
499                             const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
500 {
501    const struct radv_physical_device *pdev = radv_device_physical(device);
502    unsigned width = image->vk.extent.width;
503    unsigned height = image->vk.extent.height;
504 
505    /*
506     * minigbm sometimes allocates bigger images which is going to result in
507     * weird strides and other properties. Lets be lenient where possible and
508     * fail it on GFX10 (as we cannot cope there).
509     *
510     * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
511     */
512    if (create_info->bo_metadata && radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
513       const struct radeon_bo_metadata *md = create_info->bo_metadata;
514 
515       if (pdev->info.gfx_level >= GFX10) {
516          width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
517          height = G_00A008_HEIGHT(md->metadata[4]) + 1;
518       } else {
519          width = G_008F18_WIDTH(md->metadata[4]) + 1;
520          height = G_008F18_HEIGHT(md->metadata[4]) + 1;
521       }
522    }
523 
524    if (image->vk.extent.width == width && image->vk.extent.height == height)
525       return VK_SUCCESS;
526 
527    if (width < image->vk.extent.width || height < image->vk.extent.height) {
528       fprintf(stderr,
529               "The imported image has smaller dimensions than the internal\n"
530               "dimensions. Using it is going to fail badly, so we reject\n"
531               "this import.\n"
532               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
533               image->vk.extent.width, image->vk.extent.height, width, height);
534       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
535    } else if (pdev->info.gfx_level >= GFX10) {
536       fprintf(stderr,
537               "Tried to import an image with inconsistent width on GFX10.\n"
538               "As GFX10 has no separate stride fields we cannot cope with\n"
539               "an inconsistency in width and will fail this import.\n"
540               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
541               image->vk.extent.width, image->vk.extent.height, width, height);
542       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
543    } else {
544       fprintf(stderr,
545               "Tried to import an image with inconsistent width on pre-GFX10.\n"
546               "As GFX10 has no separate stride fields we cannot cope with\n"
547               "an inconsistency and would fail on GFX10.\n"
548               "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
549               image->vk.extent.width, image->vk.extent.height, width, height);
550    }
551    image_info->width = width;
552    image_info->height = height;
553 
554    return VK_SUCCESS;
555 }
556 
557 static VkResult
radv_patch_image_from_extra_info(struct radv_device * device,struct radv_image * image,const struct radv_image_create_info * create_info,struct ac_surf_info * image_info)558 radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
559                                  const struct radv_image_create_info *create_info, struct ac_surf_info *image_info)
560 {
561    const struct radv_physical_device *pdev = radv_device_physical(device);
562    const struct radv_instance *instance = radv_physical_device_instance(pdev);
563 
564    VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
565    if (result != VK_SUCCESS)
566       return result;
567 
568    for (unsigned plane = 0; plane < image->plane_count; ++plane) {
569       if (create_info->bo_metadata) {
570          radv_patch_surface_from_metadata(device, &image->planes[plane].surface, create_info->bo_metadata);
571       }
572 
573       if (radv_surface_has_scanout(device, create_info)) {
574          image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
575          if (instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
576             image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
577 
578          image_info->surf_index = NULL;
579       }
580 
581       if (create_info->prime_blit_src && !pdev->info.sdma_supports_compression) {
582          /* Older SDMA hw can't handle DCC */
583          image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
584       }
585    }
586    return VK_SUCCESS;
587 }
588 
589 static VkFormat
radv_image_get_plane_format(const struct radv_physical_device * pdev,const struct radv_image * image,unsigned plane)590 radv_image_get_plane_format(const struct radv_physical_device *pdev, const struct radv_image *image, unsigned plane)
591 {
592    if (radv_is_format_emulated(pdev, image->vk.format)) {
593       if (plane == 0)
594          return image->vk.format;
595       if (vk_format_description(image->vk.format)->layout == UTIL_FORMAT_LAYOUT_ASTC)
596          return vk_texcompress_astc_emulation_format(image->vk.format);
597       else
598          return vk_texcompress_etc2_emulation_format(image->vk.format);
599    }
600 
601    return vk_format_get_plane_format(image->vk.format, plane);
602 }
603 
604 static uint64_t
radv_get_surface_flags(struct radv_device * device,struct radv_image * image,unsigned plane_id,const VkImageCreateInfo * pCreateInfo,VkFormat image_format)605 radv_get_surface_flags(struct radv_device *device, struct radv_image *image, unsigned plane_id,
606                        const VkImageCreateInfo *pCreateInfo, VkFormat image_format)
607 {
608    const struct radv_physical_device *pdev = radv_device_physical(device);
609    const struct radv_instance *instance = radv_physical_device_instance(pdev);
610    uint64_t flags;
611    unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
612    VkFormat format = radv_image_get_plane_format(pdev, image, plane_id);
613    const struct util_format_description *desc = vk_format_description(format);
614    const VkImageAlignmentControlCreateInfoMESA *alignment =
615          vk_find_struct_const(pCreateInfo->pNext, IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA);
616    bool is_depth, is_stencil;
617 
618    is_depth = util_format_has_depth(desc);
619    is_stencil = util_format_has_stencil(desc);
620 
621    flags = RADEON_SURF_SET(array_mode, MODE);
622 
623    switch (pCreateInfo->imageType) {
624    case VK_IMAGE_TYPE_1D:
625       if (pCreateInfo->arrayLayers > 1)
626          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
627       else
628          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
629       break;
630    case VK_IMAGE_TYPE_2D:
631       if (pCreateInfo->arrayLayers > 1)
632          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
633       else
634          flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
635       break;
636    case VK_IMAGE_TYPE_3D:
637       flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
638       break;
639    default:
640       unreachable("unhandled image type");
641    }
642 
643    /* Required for clearing/initializing a specific layer on GFX8. */
644    flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
645 
646    if (is_depth) {
647       flags |= RADEON_SURF_ZBUFFER;
648 
649       if (is_depth && is_stencil && pdev->info.gfx_level <= GFX8) {
650          if (!(pCreateInfo->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT))
651             flags |= RADEON_SURF_NO_RENDER_TARGET;
652 
653          /* RADV doesn't support stencil pitch adjustment. As a result there are some spec gaps that
654           * are not covered by CTS.
655           *
656           * For D+S images with pitch constraints due to rendertarget usage it can happen that
657           * sampling from mipmaps beyond the base level of the descriptor is broken as the pitch
658           * adjustment can't be applied to anything beyond the first level.
659           */
660          flags |= RADEON_SURF_NO_STENCIL_ADJUST;
661       }
662 
663       if (radv_use_htile_for_image(device, image, pCreateInfo) && !(flags & RADEON_SURF_NO_RENDER_TARGET)) {
664          if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
665             flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
666       } else {
667          flags |= RADEON_SURF_NO_HTILE;
668       }
669    }
670 
671    if (is_stencil)
672       flags |= RADEON_SURF_SBUFFER;
673 
674    if (pdev->info.gfx_level >= GFX9 && pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
675        vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
676       flags |= RADEON_SURF_NO_RENDER_TARGET;
677 
678    if (!radv_use_dcc_for_image_early(device, image, pCreateInfo, image_format, &image->dcc_sign_reinterpret))
679       flags |= RADEON_SURF_DISABLE_DCC;
680 
681    if (!radv_use_fmask_for_image(device, image))
682       flags |= RADEON_SURF_NO_FMASK;
683 
684    if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
685       flags |= RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
686    }
687 
688    if (image->queue_family_mask & BITFIELD_BIT(RADV_QUEUE_TRANSFER)) {
689       if (!pdev->info.sdma_supports_compression)
690          flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_HTILE;
691    }
692 
693    /* Disable DCC for VRS rate images because the hw can't handle compression. */
694    if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
695       flags |= RADEON_SURF_VRS_RATE | RADEON_SURF_DISABLE_DCC;
696    if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)))
697       flags |= RADEON_SURF_NO_TEXTURE;
698 
699    if (alignment && alignment->maximumRequestedAlignment && !(instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)) {
700       bool is_4k_capable;
701 
702       if (!vk_format_is_depth_or_stencil(image_format)) {
703          is_4k_capable =
704                !(pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && (flags & RADEON_SURF_DISABLE_DCC) &&
705                (flags & RADEON_SURF_NO_FMASK);
706       } else {
707          /* Depth-stencil format without DEPTH_STENCIL usage does not work either. */
708          is_4k_capable = false;
709       }
710 
711       if (is_4k_capable && alignment->maximumRequestedAlignment <= 4096)
712          flags |= RADEON_SURF_PREFER_4K_ALIGNMENT;
713       if (alignment->maximumRequestedAlignment <= 64 * 1024)
714          flags |= RADEON_SURF_PREFER_64K_ALIGNMENT;
715    }
716 
717    return flags;
718 }
719 
720 void
radv_compose_swizzle(const struct util_format_description * desc,const VkComponentMapping * mapping,enum pipe_swizzle swizzle[4])721 radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
722                      enum pipe_swizzle swizzle[4])
723 {
724    if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
725       /* 64-bit formats only support storage images and storage images
726        * require identity component mappings. We use 32-bit
727        * instructions to access 64-bit images, so we need a special
728        * case here.
729        *
730        * The zw components are 1,0 so that they can be easily be used
731        * by loads to create the w component, which has to be 0 for
732        * NULL descriptors.
733        */
734       swizzle[0] = PIPE_SWIZZLE_X;
735       swizzle[1] = PIPE_SWIZZLE_Y;
736       swizzle[2] = PIPE_SWIZZLE_1;
737       swizzle[3] = PIPE_SWIZZLE_0;
738    } else if (!mapping) {
739       for (unsigned i = 0; i < 4; i++)
740          swizzle[i] = desc->swizzle[i];
741    } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
742       const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1};
743       vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
744    } else {
745       vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
746    }
747 }
748 
749 static void
radv_query_opaque_metadata(struct radv_device * device,struct radv_image * image,unsigned plane_id,struct radeon_bo_metadata * md)750 radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image, unsigned plane_id,
751                            struct radeon_bo_metadata *md)
752 {
753    const struct radv_physical_device *pdev = radv_device_physical(device);
754    const struct radv_instance *instance = radv_physical_device_instance(pdev);
755    static const VkComponentMapping fixedmapping;
756    const VkFormat plane_format = radv_image_get_plane_format(pdev, image, plane_id);
757    const unsigned plane_width = vk_format_get_plane_width(image->vk.format, plane_id, image->vk.extent.width);
758    const unsigned plane_height = vk_format_get_plane_height(image->vk.format, plane_id, image->vk.extent.height);
759    struct radeon_surf *surface = &image->planes[plane_id].surface;
760    const struct legacy_surf_level *base_level_info = pdev->info.gfx_level <= GFX8 ? &surface->u.legacy.level[0] : NULL;
761    uint32_t desc[8];
762 
763    radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->vk.image_type, plane_format,
764                                 &fixedmapping, 0, image->vk.mip_levels - 1, 0, image->vk.array_layers - 1, plane_width,
765                                 plane_height, image->vk.extent.depth, 0.0f, desc, NULL, 0, NULL, NULL);
766 
767    radv_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, 0, 0, surface->blk_w, false, false, false,
768                                     false, desc, NULL);
769 
770    ac_surface_compute_umd_metadata(&pdev->info, surface, image->vk.mip_levels, desc, &md->size_metadata, md->metadata,
771                                    instance->debug_flags & RADV_DEBUG_EXTRA_MD);
772 }
773 
774 void
radv_init_metadata(struct radv_device * device,struct radv_image * image,struct radeon_bo_metadata * metadata)775 radv_init_metadata(struct radv_device *device, struct radv_image *image, struct radeon_bo_metadata *metadata)
776 {
777    const struct radv_physical_device *pdev = radv_device_physical(device);
778 
779    /* use plane 0, even when there are multiple planes, to follow radeonsi */
780    const unsigned plane_id = 0;
781    struct radeon_surf *surface = &image->planes[plane_id].surface;
782 
783    memset(metadata, 0, sizeof(*metadata));
784 
785    if (pdev->info.gfx_level >= GFX9) {
786       uint64_t dcc_offset =
787          image->bindings[0].offset + (surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
788       metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
789       metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
790       metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
791       metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
792       metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
793       metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.color.dcc.max_compressed_block_size;
794       metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
795    } else {
796       metadata->u.legacy.microtile =
797          surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
798       metadata->u.legacy.macrotile =
799          surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ? RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
800       metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
801       metadata->u.legacy.bankw = surface->u.legacy.bankw;
802       metadata->u.legacy.bankh = surface->u.legacy.bankh;
803       metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
804       metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
805       metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
806       metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
807       metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
808    }
809    radv_query_opaque_metadata(device, image, plane_id, metadata);
810 }
811 
812 void
radv_image_override_offset_stride(struct radv_device * device,struct radv_image * image,uint64_t offset,uint32_t stride)813 radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image, uint64_t offset,
814                                   uint32_t stride)
815 {
816    const struct radv_physical_device *pdev = radv_device_physical(device);
817    ac_surface_override_offset_stride(&pdev->info, &image->planes[0].surface, image->vk.array_layers,
818                                      image->vk.mip_levels, offset, stride);
819 }
820 
821 static void
radv_image_alloc_single_sample_cmask(const struct radv_device * device,const struct radv_image * image,struct radeon_surf * surf)822 radv_image_alloc_single_sample_cmask(const struct radv_device *device, const struct radv_image *image,
823                                      struct radeon_surf *surf)
824 {
825    if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->vk.mip_levels > 1 ||
826        image->vk.extent.depth > 1 || radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(device, image) ||
827        (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
828       return;
829 
830    assert(image->vk.samples == 1);
831 
832    surf->cmask_offset = align64(surf->total_size, 1ull << surf->cmask_alignment_log2);
833    surf->total_size = surf->cmask_offset + surf->cmask_size;
834    surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
835 }
836 
837 static void
radv_image_alloc_values(const struct radv_device * device,struct radv_image * image)838 radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
839 {
840    const struct radv_physical_device *pdev = radv_device_physical(device);
841 
842    /* images with modifiers can be potentially imported */
843    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
844       return;
845 
846    if (radv_image_has_cmask(image) || (radv_image_has_dcc(image) && !image->support_comp_to_single)) {
847       image->fce_pred_offset = image->size;
848       image->size += 8 * image->vk.mip_levels;
849    }
850 
851    if (radv_image_use_dcc_predication(device, image)) {
852       image->dcc_pred_offset = image->size;
853       image->size += 8 * image->vk.mip_levels;
854    }
855 
856    if ((radv_image_has_dcc(image) && !image->support_comp_to_single) || radv_image_has_cmask(image) ||
857        radv_image_has_htile(image)) {
858       image->clear_value_offset = image->size;
859       image->size += 8 * image->vk.mip_levels;
860    }
861 
862    if (radv_image_is_tc_compat_htile(image) && pdev->info.has_tc_compat_zrange_bug) {
863       /* Metadata for the TC-compatible HTILE hardware bug which
864        * have to be fixed by updating ZRANGE_PRECISION when doing
865        * fast depth clears to 0.0f.
866        */
867       image->tc_compat_zrange_offset = image->size;
868       image->size += image->vk.mip_levels * 4;
869    }
870 }
871 
872 /* Determine if the image is affected by the pipe misaligned metadata issue
873  * which requires to invalidate L2.
874  */
875 static bool
radv_image_is_pipe_misaligned(const struct radv_device * device,const struct radv_image * image)876 radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
877 {
878    const struct radv_physical_device *pdev = radv_device_physical(device);
879    const struct radeon_info *gpu_info = &pdev->info;
880    int log2_samples = util_logbase2(image->vk.samples);
881 
882    assert(gpu_info->gfx_level >= GFX10);
883 
884    for (unsigned i = 0; i < image->plane_count; ++i) {
885       VkFormat fmt = radv_image_get_plane_format(pdev, image, i);
886       int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
887       int log2_bpp_and_samples;
888 
889       if (gpu_info->gfx_level >= GFX10_3) {
890          log2_bpp_and_samples = log2_bpp + log2_samples;
891       } else {
892          if (vk_format_has_depth(image->vk.format) && image->vk.array_layers >= 8) {
893             log2_bpp = 2;
894          }
895 
896          log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
897       }
898 
899       int num_pipes = G_0098F8_NUM_PIPES(gpu_info->gb_addr_config);
900       int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
901 
902       if (vk_format_has_depth(image->vk.format)) {
903          if (radv_image_is_tc_compat_htile(image) && overlap) {
904             return true;
905          }
906       } else {
907          int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(gpu_info->gb_addr_config);
908          int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
909          int samples_overlap = MIN2(log2_samples, overlap);
910 
911          /* TODO: It shouldn't be necessary if the image has DCC but
912           * not readable by shader.
913           */
914          if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
915              (samples_overlap > log2_samples_frag_diff)) {
916             return true;
917          }
918       }
919    }
920 
921    return false;
922 }
923 
924 static bool
radv_image_is_l2_coherent(const struct radv_device * device,const struct radv_image * image)925 radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
926 {
927    const struct radv_physical_device *pdev = radv_device_physical(device);
928 
929    if (pdev->info.gfx_level >= GFX12) {
930       return true; /* Everything is coherent with TC L2. */
931    } else if (pdev->info.gfx_level >= GFX10) {
932       return !pdev->info.tcc_rb_non_coherent && !radv_image_is_pipe_misaligned(device, image);
933    } else if (pdev->info.gfx_level == GFX9) {
934       if (image->vk.samples == 1 &&
935           (image->vk.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
936           !vk_format_has_stencil(image->vk.format)) {
937          /* Single-sample color and single-sample depth
938           * (not stencil) are coherent with shaders on
939           * GFX9.
940           */
941          return true;
942       }
943    }
944 
945    return false;
946 }
947 
948 /**
949  * Determine if the given image can be fast cleared.
950  */
951 bool
radv_image_can_fast_clear(const struct radv_device * device,const struct radv_image * image)952 radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
953 {
954    const struct radv_physical_device *pdev = radv_device_physical(device);
955    const struct radv_instance *instance = radv_physical_device_instance(pdev);
956 
957    if (instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
958       return false;
959 
960    if (vk_format_is_color(image->vk.format)) {
961       if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
962          return false;
963 
964       /* RB+ doesn't work with CMASK fast clear on Stoney. */
965       if (!radv_image_has_dcc(image) && pdev->info.family == CHIP_STONEY)
966          return false;
967 
968       /* Fast-clears with CMASK aren't supported for 128-bit formats. */
969       if (radv_image_has_cmask(image) && vk_format_get_blocksizebits(image->vk.format) > 64)
970          return false;
971    } else {
972       if (!radv_image_has_htile(image))
973          return false;
974    }
975 
976    /* Do not fast clears 3D images. */
977    if (image->vk.image_type == VK_IMAGE_TYPE_3D)
978       return false;
979 
980    return true;
981 }
982 
983 /**
984  * Determine if the given image can be fast cleared using comp-to-single.
985  */
986 static bool
radv_image_use_comp_to_single(const struct radv_device * device,const struct radv_image * image)987 radv_image_use_comp_to_single(const struct radv_device *device, const struct radv_image *image)
988 {
989    const struct radv_physical_device *pdev = radv_device_physical(device);
990 
991    /* comp-to-single is only available for GFX10+. */
992    if (pdev->info.gfx_level < GFX10)
993       return false;
994 
995    /* If the image can't be fast cleared, comp-to-single can't be used. */
996    if (!radv_image_can_fast_clear(device, image))
997       return false;
998 
999    /* If the image doesn't have DCC, it can't be fast cleared using comp-to-single */
1000    if (!radv_image_has_dcc(image))
1001       return false;
1002 
1003    /* It seems 8bpp and 16bpp require RB+ to work. */
1004    unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1005    if (bytes_per_pixel <= 2 && !pdev->info.rbplus_allowed)
1006       return false;
1007 
1008    return true;
1009 }
1010 
1011 static unsigned
radv_get_internal_plane_count(const struct radv_physical_device * pdev,VkFormat fmt)1012 radv_get_internal_plane_count(const struct radv_physical_device *pdev, VkFormat fmt)
1013 {
1014    if (radv_is_format_emulated(pdev, fmt))
1015       return 2;
1016    return vk_format_get_plane_count(fmt);
1017 }
1018 
1019 static void
radv_image_reset_layout(const struct radv_physical_device * pdev,struct radv_image * image)1020 radv_image_reset_layout(const struct radv_physical_device *pdev, struct radv_image *image)
1021 {
1022    image->size = 0;
1023    image->alignment = 1;
1024 
1025    image->tc_compatible_cmask = 0;
1026    image->fce_pred_offset = image->dcc_pred_offset = 0;
1027    image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1028 
1029    unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1030    for (unsigned i = 0; i < plane_count; ++i) {
1031       VkFormat format = radv_image_get_plane_format(pdev, image, i);
1032       if (vk_format_has_depth(format))
1033          format = vk_format_depth_only(format);
1034 
1035       uint64_t flags = image->planes[i].surface.flags;
1036       uint64_t modifier = image->planes[i].surface.modifier;
1037       memset(image->planes + i, 0, sizeof(image->planes[i]));
1038 
1039       image->planes[i].surface.flags = flags;
1040       image->planes[i].surface.modifier = modifier;
1041       image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1042       image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1043       image->planes[i].surface.bpe = vk_format_get_blocksize(format);
1044 
1045       /* align byte per element on dword */
1046       if (image->planes[i].surface.bpe == 3) {
1047          image->planes[i].surface.bpe = 4;
1048       }
1049    }
1050 }
1051 
1052 struct ac_surf_info
radv_get_ac_surf_info(struct radv_device * device,const struct radv_image * image)1053 radv_get_ac_surf_info(struct radv_device *device, const struct radv_image *image)
1054 {
1055    struct ac_surf_info info;
1056 
1057    memset(&info, 0, sizeof(info));
1058 
1059    info.width = image->vk.extent.width;
1060    info.height = image->vk.extent.height;
1061    info.depth = image->vk.extent.depth;
1062    info.samples = image->vk.samples;
1063    info.storage_samples = image->vk.samples;
1064    info.array_size = image->vk.array_layers;
1065    info.levels = image->vk.mip_levels;
1066    info.num_channels = vk_format_get_nr_components(image->vk.format);
1067 
1068    if (!vk_format_is_depth_or_stencil(image->vk.format) && !image->shareable &&
1069        !(image->vk.create_flags & (VK_IMAGE_CREATE_SPARSE_ALIASED_BIT | VK_IMAGE_CREATE_ALIAS_BIT)) &&
1070        image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1071       info.surf_index = &device->image_mrt_offset_counter;
1072    }
1073 
1074    return info;
1075 }
1076 
1077 VkResult
radv_image_create_layout(struct radv_device * device,struct radv_image_create_info create_info,const struct VkImageDrmFormatModifierExplicitCreateInfoEXT * mod_info,const struct VkVideoProfileListInfoKHR * profile_list,struct radv_image * image)1078 radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1079                          const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1080                          const struct VkVideoProfileListInfoKHR *profile_list, struct radv_image *image)
1081 {
1082    struct radv_physical_device *pdev = radv_device_physical(device);
1083 
1084    /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1085     * common internal case. */
1086    create_info.vk_info = NULL;
1087 
1088    struct ac_surf_info image_info = radv_get_ac_surf_info(device, image);
1089    VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1090    if (result != VK_SUCCESS)
1091       return result;
1092 
1093    assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1094 
1095    radv_image_reset_layout(pdev, image);
1096 
1097    /*
1098     * Due to how the decoder works, the user can't supply an oversized image, because if it attempts
1099     * to sample it later with a linear filter, it will get garbage after the height it wants,
1100     * so we let the user specify the width/height unaligned, and align them preallocation.
1101     */
1102    if (image->vk.usage & (VK_IMAGE_USAGE_VIDEO_DECODE_DST_BIT_KHR |
1103                           VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR |
1104                           VK_IMAGE_USAGE_VIDEO_ENCODE_DPB_BIT_KHR)) {
1105       if (!device->vk.enabled_features.videoMaintenance1)
1106          assert(profile_list);
1107       uint32_t width_align, height_align;
1108       radv_video_get_profile_alignments(pdev, profile_list, &width_align, &height_align);
1109       image_info.width = align(image_info.width, width_align);
1110       image_info.height = align(image_info.height, height_align);
1111 
1112       if (radv_has_uvd(pdev) && image->vk.usage & VK_IMAGE_USAGE_VIDEO_DECODE_DPB_BIT_KHR) {
1113          /* UVD and kernel demand a full DPB allocation. */
1114          image_info.array_size = MIN2(16, image_info.array_size);
1115       }
1116    }
1117 
1118    unsigned plane_count = radv_get_internal_plane_count(pdev, image->vk.format);
1119    for (unsigned plane = 0; plane < plane_count; ++plane) {
1120       struct ac_surf_info info = image_info;
1121       uint64_t offset;
1122       unsigned stride;
1123 
1124       info.width = vk_format_get_plane_width(image->vk.format, plane, info.width);
1125       info.height = vk_format_get_plane_height(image->vk.format, plane, info.height);
1126 
1127       if (create_info.no_metadata_planes || plane_count > 1) {
1128          image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1129       }
1130 
1131       device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1132 
1133       if (plane == 0) {
1134          if (!radv_use_dcc_for_image_late(device, image))
1135             ac_surface_zero_dcc_fields(&image->planes[0].surface);
1136       }
1137 
1138       if (create_info.bo_metadata && !mod_info &&
1139           !ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
1140                                          image->vk.mip_levels, create_info.bo_metadata->size_metadata,
1141                                          create_info.bo_metadata->metadata))
1142          return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1143 
1144       if (!create_info.no_metadata_planes && !create_info.bo_metadata && plane_count == 1 && !mod_info)
1145          radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1146 
1147       if (mod_info) {
1148          if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1149              !mod_info->pPlaneLayouts[plane].rowPitch)
1150             return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1151 
1152          offset = mod_info->pPlaneLayouts[plane].offset;
1153          stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1154       } else {
1155          offset = image->disjoint ? 0 : align64(image->size, 1ull << image->planes[plane].surface.alignment_log2);
1156          stride = 0; /* 0 means no override */
1157       }
1158 
1159       if (!ac_surface_override_offset_stride(&pdev->info, &image->planes[plane].surface, image->vk.array_layers,
1160                                              image->vk.mip_levels, offset, stride))
1161          return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1162 
1163       /* Validate DCC offsets in modifier layout. */
1164       if (plane_count == 1 && mod_info) {
1165          unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1166          if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1167             return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1168 
1169          for (unsigned i = 1; i < mem_planes; ++i) {
1170             if (ac_surface_get_plane_offset(pdev->info.gfx_level, &image->planes[plane].surface, i, 0) !=
1171                 mod_info->pPlaneLayouts[i].offset)
1172                return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1173          }
1174       }
1175 
1176       image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1177       image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1178 
1179       image->planes[plane].format = radv_image_get_plane_format(pdev, image, plane);
1180    }
1181 
1182    image->tc_compatible_cmask = radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1183 
1184    image->l2_coherent = radv_image_is_l2_coherent(device, image);
1185 
1186    image->support_comp_to_single = radv_image_use_comp_to_single(device, image);
1187 
1188    radv_image_alloc_values(device, image);
1189 
1190    assert(image->planes[0].surface.surf_size);
1191    assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1192           ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1193    return VK_SUCCESS;
1194 }
1195 
1196 static void
radv_destroy_image(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_image * image)1197 radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator, struct radv_image *image)
1198 {
1199    struct radv_physical_device *pdev = radv_device_physical(device);
1200    struct radv_instance *instance = radv_physical_device_instance(pdev);
1201 
1202    if ((image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bindings[0].bo)
1203       radv_bo_destroy(device, &image->vk.base, image->bindings[0].bo);
1204 
1205    if (image->owned_memory != VK_NULL_HANDLE) {
1206       VK_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1207       radv_free_memory(device, pAllocator, mem);
1208    }
1209 
1210    for (uint32_t i = 0; i < ARRAY_SIZE(image->bindings); i++) {
1211       if (!image->bindings[i].bo_va)
1212          continue;
1213 
1214       vk_address_binding_report(&instance->vk, &image->vk.base, image->bindings[i].bo_va + image->bindings[i].offset,
1215                                 image->bindings[i].range, VK_DEVICE_ADDRESS_BINDING_TYPE_UNBIND_EXT);
1216    }
1217 
1218    radv_rmv_log_resource_destroy(device, (uint64_t)radv_image_to_handle(image));
1219    vk_image_finish(&image->vk);
1220    vk_free2(&device->vk.alloc, pAllocator, image);
1221 }
1222 
1223 static void
radv_image_print_info(struct radv_device * device,struct radv_image * image)1224 radv_image_print_info(struct radv_device *device, struct radv_image *image)
1225 {
1226    const struct radv_physical_device *pdev = radv_device_physical(device);
1227 
1228    fprintf(stderr, "Image:\n");
1229    fprintf(stderr,
1230            "  Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1231            "width=%" PRIu32 ", height=%" PRIu32 ", depth=%" PRIu32 ", "
1232            "array_size=%" PRIu32 ", levels=%" PRIu32 "\n",
1233            image->size, image->alignment, image->vk.extent.width, image->vk.extent.height, image->vk.extent.depth,
1234            image->vk.array_layers, image->vk.mip_levels);
1235    for (unsigned i = 0; i < image->plane_count; ++i) {
1236       const struct radv_image_plane *plane = &image->planes[i];
1237       const struct radeon_surf *surf = &plane->surface;
1238       const struct util_format_description *desc = vk_format_description(plane->format);
1239       uint64_t offset = ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, 0);
1240 
1241       fprintf(stderr, "  Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1242 
1243       ac_surface_print_info(stderr, &pdev->info, surf);
1244    }
1245 }
1246 
1247 static uint64_t
radv_select_modifier(const struct radv_device * dev,VkFormat format,const struct VkImageDrmFormatModifierListCreateInfoEXT * mod_list)1248 radv_select_modifier(const struct radv_device *dev, VkFormat format,
1249                      const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1250 {
1251    const struct radv_physical_device *pdev = radv_device_physical(dev);
1252    unsigned mod_count;
1253 
1254    assert(mod_list->drmFormatModifierCount);
1255 
1256    /* We can allow everything here as it does not affect order and the application
1257     * is only allowed to specify modifiers that we support. */
1258    const struct ac_modifier_options modifier_options = {
1259       .dcc = true,
1260       .dcc_retile = true,
1261    };
1262 
1263    ac_get_supported_modifiers(&pdev->info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, NULL);
1264 
1265    uint64_t *mods = calloc(mod_count, sizeof(*mods));
1266 
1267    /* If allocations fail, fall back to a dumber solution. */
1268    if (!mods)
1269       return mod_list->pDrmFormatModifiers[0];
1270 
1271    ac_get_supported_modifiers(&pdev->info, &modifier_options, vk_format_to_pipe_format(format), &mod_count, mods);
1272 
1273    for (unsigned i = 0; i < mod_count; ++i) {
1274       for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1275          if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1276             free(mods);
1277             return mod_list->pDrmFormatModifiers[j];
1278          }
1279       }
1280    }
1281    unreachable("App specified an invalid modifier");
1282 }
1283 
1284 VkResult
radv_image_create(VkDevice _device,const struct radv_image_create_info * create_info,const VkAllocationCallbacks * alloc,VkImage * pImage,bool is_internal)1285 radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1286                   const VkAllocationCallbacks *alloc, VkImage *pImage, bool is_internal)
1287 {
1288    VK_FROM_HANDLE(radv_device, device, _device);
1289    const struct radv_physical_device *pdev = radv_device_physical(device);
1290    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1291    const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1292    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1293    struct radv_image *image = NULL;
1294    VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1295    const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1296       vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1297    const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1298       vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1299    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1300    const struct VkVideoProfileListInfoKHR *profile_list =
1301       vk_find_struct_const(pCreateInfo->pNext, VIDEO_PROFILE_LIST_INFO_KHR);
1302 
1303    unsigned plane_count = radv_get_internal_plane_count(pdev, format);
1304 
1305    const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1306 
1307    image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1308    if (!image)
1309       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1310 
1311    vk_image_init(&device->vk, &image->vk, pCreateInfo);
1312 
1313    image->plane_count = vk_format_get_plane_count(format);
1314    image->disjoint = image->plane_count > 1 && pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
1315 
1316    image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1317    if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1318       for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1319          if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1320              pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1321             image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1322          else
1323             image->queue_family_mask |= 1u << vk_queue_to_radv(pdev, pCreateInfo->pQueueFamilyIndices[i]);
1324 
1325       /* This queue never really accesses the image. */
1326       image->queue_family_mask &= ~(1u << RADV_QUEUE_SPARSE);
1327    }
1328 
1329    const VkExternalMemoryImageCreateInfo *external_info =
1330       vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1331 
1332    image->shareable = external_info;
1333 
1334    if (mod_list)
1335       modifier = radv_select_modifier(device, format, mod_list);
1336    else if (explicit_mod)
1337       modifier = explicit_mod->drmFormatModifier;
1338 
1339    for (unsigned plane = 0; plane < plane_count; ++plane) {
1340       image->planes[plane].surface.flags = radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1341       image->planes[plane].surface.modifier = modifier;
1342    }
1343 
1344    if (image->vk.external_handle_types & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID) {
1345 #if DETECT_OS_ANDROID
1346       image->vk.ahb_format = radv_ahb_format_for_vk_format(image->vk.format);
1347 #endif
1348 
1349       *pImage = radv_image_to_handle(image);
1350       assert(!(image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1351       return VK_SUCCESS;
1352    }
1353 
1354    VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, profile_list, image);
1355    if (result != VK_SUCCESS) {
1356       radv_destroy_image(device, alloc, image);
1357       return result;
1358    }
1359 
1360    if (image->vk.create_flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1361       image->alignment = MAX2(image->alignment, 4096);
1362       image->size = align64(image->size, image->alignment);
1363       image->bindings[0].offset = 0;
1364 
1365       result = radv_bo_create(device, &image->vk.base, image->size, image->alignment, 0, RADEON_FLAG_VIRTUAL,
1366                               RADV_BO_PRIORITY_VIRTUAL, 0, true, &image->bindings[0].bo);
1367       if (result != VK_SUCCESS) {
1368          radv_destroy_image(device, alloc, image);
1369          return vk_error(device, result);
1370       }
1371    }
1372 
1373    if (instance->debug_flags & RADV_DEBUG_IMG) {
1374       radv_image_print_info(device, image);
1375    }
1376 
1377    *pImage = radv_image_to_handle(image);
1378 
1379    radv_rmv_log_image_create(device, pCreateInfo, is_internal, *pImage);
1380    if (image->bindings[0].bo)
1381       radv_rmv_log_image_bind(device, 0, *pImage);
1382    return VK_SUCCESS;
1383 }
1384 
1385 unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)1386 radv_plane_from_aspect(VkImageAspectFlags mask)
1387 {
1388    switch (mask) {
1389    case VK_IMAGE_ASPECT_PLANE_1_BIT:
1390    case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1391       return 1;
1392    case VK_IMAGE_ASPECT_PLANE_2_BIT:
1393    case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1394       return 2;
1395    case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1396       return 3;
1397    default:
1398       return 0;
1399    }
1400 }
1401 
1402 VkFormat
radv_get_aspect_format(struct radv_image * image,VkImageAspectFlags mask)1403 radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1404 {
1405    switch (mask) {
1406    case VK_IMAGE_ASPECT_PLANE_0_BIT:
1407       return image->planes[0].format;
1408    case VK_IMAGE_ASPECT_PLANE_1_BIT:
1409       return image->planes[1].format;
1410    case VK_IMAGE_ASPECT_PLANE_2_BIT:
1411       return image->planes[2].format;
1412    case VK_IMAGE_ASPECT_STENCIL_BIT:
1413       return vk_format_stencil_only(image->vk.format);
1414    case VK_IMAGE_ASPECT_DEPTH_BIT:
1415       return vk_format_depth_only(image->vk.format);
1416    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1417       return vk_format_depth_only(image->vk.format);
1418    default:
1419       return image->vk.format;
1420    }
1421 }
1422 
1423 bool
radv_layout_is_htile_compressed(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1424 radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1425                                 unsigned queue_mask)
1426 {
1427    const struct radv_physical_device *pdev = radv_device_physical(device);
1428    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1429 
1430    /* Don't compress exclusive images used on transfer queues when SDMA doesn't support HTILE.
1431     * Note that HTILE is already disabled on concurrent images when not supported.
1432     */
1433    if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
1434       return false;
1435 
1436    switch (layout) {
1437    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1438    case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
1439    case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL:
1440    case VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL:
1441       return radv_image_has_htile(image);
1442    case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1443       return radv_image_is_tc_compat_htile(image) ||
1444              (radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL));
1445    case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
1446    case VK_IMAGE_LAYOUT_GENERAL:
1447       /* It should be safe to enable TC-compat HTILE with
1448        * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
1449        * if the image doesn't have the storage bit set. This
1450        * improves performance for apps that use GENERAL for the main
1451        * depth pass because this allows compression and this reduces
1452        * the number of decompressions from/to GENERAL.
1453        */
1454       if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
1455           !instance->drirc.disable_tc_compat_htile_in_general) {
1456          return true;
1457       } else {
1458          return false;
1459       }
1460    case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1461       /* Do not compress HTILE with feedback loops because we can't read&write it without
1462        * introducing corruption.
1463        */
1464       return false;
1465    case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
1466    case VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL:
1467       if (radv_image_is_tc_compat_htile(image) ||
1468           (radv_image_has_htile(image) &&
1469            !(image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
1470          /* Keep HTILE compressed if the image is only going to
1471           * be used as a depth/stencil read-only attachment.
1472           */
1473          return true;
1474       } else {
1475          return false;
1476       }
1477       break;
1478    default:
1479       return radv_image_is_tc_compat_htile(image);
1480    }
1481 }
1482 
1483 bool
radv_layout_can_fast_clear(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1484 radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image, unsigned level,
1485                            VkImageLayout layout, unsigned queue_mask)
1486 {
1487    if (radv_dcc_enabled(image, level) && !radv_layout_dcc_compressed(device, image, level, layout, queue_mask))
1488       return false;
1489 
1490    if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1491       return false;
1492 
1493    if (layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL && layout != VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL)
1494       return false;
1495 
1496    /* Exclusive images with CMASK or DCC can always be fast-cleared on the gfx queue. Concurrent
1497     * images can only be fast-cleared if comp-to-single is supported because we don't yet support
1498     * FCE on the compute queue.
1499     */
1500    return queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_use_comp_to_single(device, image);
1501 }
1502 
1503 bool
radv_layout_dcc_compressed(const struct radv_device * device,const struct radv_image * image,unsigned level,VkImageLayout layout,unsigned queue_mask)1504 radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image, unsigned level,
1505                            VkImageLayout layout, unsigned queue_mask)
1506 {
1507    const struct radv_physical_device *pdev = radv_device_physical(device);
1508 
1509    if (!radv_dcc_enabled(image, level))
1510       return false;
1511 
1512    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
1513       return true;
1514 
1515    /* If the image is read-only, we can always just keep it compressed */
1516    if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
1517       return true;
1518 
1519    /* Don't compress compute transfer dst when image stores are not supported. */
1520    if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
1521        (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
1522       return false;
1523 
1524    /* Don't compress exclusive images used on transfer queues when SDMA doesn't support DCC.
1525     * Note that DCC is already disabled on concurrent images when not supported.
1526     */
1527    if (queue_mask == BITFIELD_BIT(RADV_QUEUE_TRANSFER) && !pdev->info.sdma_supports_compression)
1528       return false;
1529 
1530    if (layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
1531       /* Do not compress DCC with feedback loops because we can't read&write it without introducing
1532        * corruption.
1533        */
1534       return false;
1535    }
1536 
1537    return pdev->info.gfx_level >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
1538 }
1539 
1540 enum radv_fmask_compression
radv_layout_fmask_compression(const struct radv_device * device,const struct radv_image * image,VkImageLayout layout,unsigned queue_mask)1541 radv_layout_fmask_compression(const struct radv_device *device, const struct radv_image *image, VkImageLayout layout,
1542                               unsigned queue_mask)
1543 {
1544    if (!radv_image_has_fmask(image))
1545       return RADV_FMASK_COMPRESSION_NONE;
1546 
1547    if (layout == VK_IMAGE_LAYOUT_GENERAL)
1548       return RADV_FMASK_COMPRESSION_NONE;
1549 
1550    /* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
1551     * expanded before.
1552     */
1553    if (layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL && (queue_mask & (1u << RADV_QUEUE_COMPUTE)))
1554       return RADV_FMASK_COMPRESSION_NONE;
1555 
1556    /* Compress images if TC-compat CMASK is enabled. */
1557    if (radv_image_is_tc_compat_cmask(image))
1558       return RADV_FMASK_COMPRESSION_FULL;
1559 
1560    switch (layout) {
1561    case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
1562    case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL:
1563       /* Don't compress images but no need to expand FMASK. */
1564       return RADV_FMASK_COMPRESSION_PARTIAL;
1565    case VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT:
1566       /* Don't compress images that are in feedback loops. */
1567       return RADV_FMASK_COMPRESSION_NONE;
1568    default:
1569       /* Don't compress images that are concurrent. */
1570       return queue_mask == (1u << RADV_QUEUE_GENERAL) ? RADV_FMASK_COMPRESSION_FULL : RADV_FMASK_COMPRESSION_NONE;
1571    }
1572 }
1573 
1574 unsigned
radv_image_queue_family_mask(const struct radv_image * image,enum radv_queue_family family,enum radv_queue_family queue_family)1575 radv_image_queue_family_mask(const struct radv_image *image, enum radv_queue_family family,
1576                              enum radv_queue_family queue_family)
1577 {
1578    if (!image->exclusive)
1579       return image->queue_family_mask;
1580    if (family == RADV_QUEUE_FOREIGN)
1581       return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
1582    if (family == RADV_QUEUE_IGNORED)
1583       return 1u << queue_family;
1584    return 1u << family;
1585 }
1586 
1587 bool
radv_image_is_renderable(const struct radv_device * device,const struct radv_image * image)1588 radv_image_is_renderable(const struct radv_device *device, const struct radv_image *image)
1589 {
1590    const struct radv_physical_device *pdev = radv_device_physical(device);
1591 
1592    if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1593        image->vk.format == VK_FORMAT_R32G32B32_SFLOAT)
1594       return false;
1595 
1596    if (pdev->info.gfx_level >= GFX9 && image->vk.image_type == VK_IMAGE_TYPE_3D &&
1597        vk_format_get_blocksizebits(image->vk.format) == 128 && vk_format_is_compressed(image->vk.format))
1598       return false;
1599 
1600    if (image->planes[0].surface.flags & RADEON_SURF_NO_RENDER_TARGET)
1601       return false;
1602 
1603    return true;
1604 }
1605 
1606 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateImage(VkDevice _device,const VkImageCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkImage * pImage)1607 radv_CreateImage(VkDevice _device, const VkImageCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator,
1608                  VkImage *pImage)
1609 {
1610 #if DETECT_OS_ANDROID
1611    const VkNativeBufferANDROID *gralloc_info = vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
1612 
1613    if (gralloc_info)
1614       return radv_image_from_gralloc(_device, pCreateInfo, gralloc_info, pAllocator, pImage);
1615 #endif
1616 
1617 #ifdef RADV_USE_WSI_PLATFORM
1618    /* Ignore swapchain creation info on Android. Since we don't have an implementation in Mesa,
1619     * we're guaranteed to access an Android object incorrectly.
1620     */
1621    VK_FROM_HANDLE(radv_device, device, _device);
1622    const struct radv_physical_device *pdev = radv_device_physical(device);
1623    const VkImageSwapchainCreateInfoKHR *swapchain_info =
1624       vk_find_struct_const(pCreateInfo->pNext, IMAGE_SWAPCHAIN_CREATE_INFO_KHR);
1625    if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1626       return wsi_common_create_swapchain_image(pdev->vk.wsi_device, pCreateInfo, swapchain_info->swapchain, pImage);
1627    }
1628 #endif
1629 
1630    const struct wsi_image_create_info *wsi_info = vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
1631    bool scanout = wsi_info && wsi_info->scanout;
1632    bool prime_blit_src = wsi_info && wsi_info->blit_src;
1633 
1634    return radv_image_create(_device,
1635                             &(struct radv_image_create_info){
1636                                .vk_info = pCreateInfo,
1637                                .scanout = scanout,
1638                                .prime_blit_src = prime_blit_src,
1639                             },
1640                             pAllocator, pImage, false);
1641 }
1642 
1643 VKAPI_ATTR void VKAPI_CALL
radv_DestroyImage(VkDevice _device,VkImage _image,const VkAllocationCallbacks * pAllocator)1644 radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
1645 {
1646    VK_FROM_HANDLE(radv_device, device, _device);
1647    VK_FROM_HANDLE(radv_image, image, _image);
1648 
1649    if (!image)
1650       return;
1651 
1652    radv_destroy_image(device, pAllocator, image);
1653 }
1654 
1655 static void
radv_bind_image_memory(struct radv_device * device,struct radv_image * image,uint32_t bind_idx,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t range)1656 radv_bind_image_memory(struct radv_device *device, struct radv_image *image, uint32_t bind_idx,
1657                        struct radeon_winsys_bo *bo, uint64_t offset, uint64_t range)
1658 {
1659    struct radv_physical_device *pdev = radv_device_physical(device);
1660    struct radv_instance *instance = radv_physical_device_instance(pdev);
1661 
1662    assert(bind_idx < 3);
1663 
1664    image->bindings[bind_idx].bo = bo;
1665    image->bindings[bind_idx].offset = offset;
1666    image->bindings[bind_idx].bo_va = radv_buffer_get_va(bo);
1667    image->bindings[bind_idx].range = range;
1668 
1669    radv_rmv_log_image_bind(device, bind_idx, radv_image_to_handle(image));
1670 
1671    vk_address_binding_report(&instance->vk, &image->vk.base,
1672                              image->bindings[bind_idx].bo_va + image->bindings[bind_idx].offset,
1673                              image->bindings[bind_idx].range, VK_DEVICE_ADDRESS_BINDING_TYPE_BIND_EXT);
1674 }
1675 
1676 VKAPI_ATTR VkResult VKAPI_CALL
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)1677 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount, const VkBindImageMemoryInfo *pBindInfos)
1678 {
1679    VK_FROM_HANDLE(radv_device, device, _device);
1680 
1681    for (uint32_t i = 0; i < bindInfoCount; ++i) {
1682       VK_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
1683       VK_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
1684       VkBindMemoryStatusKHR *status = (void *)vk_find_struct_const(&pBindInfos[i], BIND_MEMORY_STATUS_KHR);
1685 
1686       if (status)
1687          *status->pResult = VK_SUCCESS;
1688 
1689          /* Ignore this struct on Android, we cannot access swapchain structures there. */
1690 #ifdef RADV_USE_WSI_PLATFORM
1691       const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
1692          vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
1693 
1694       if (swapchain_info && swapchain_info->swapchain != VK_NULL_HANDLE) {
1695          struct radv_image *swapchain_img =
1696             radv_image_from_handle(wsi_common_get_image(swapchain_info->swapchain, swapchain_info->imageIndex));
1697 
1698          radv_bind_image_memory(device, image, 0,
1699                                 swapchain_img->bindings[0].bo, swapchain_img->bindings[0].offset,
1700                                 swapchain_img->bindings[0].range);
1701          continue;
1702       }
1703 #endif
1704 
1705       const VkBindImagePlaneMemoryInfo *plane_info = NULL;
1706       uint32_t bind_idx = 0;
1707 
1708       if (image->disjoint) {
1709          plane_info = vk_find_struct_const(pBindInfos[i].pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
1710          bind_idx = radv_plane_from_aspect(plane_info->planeAspect);
1711       }
1712 
1713       VkImagePlaneMemoryRequirementsInfo plane = {
1714          .sType = VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO,
1715          .planeAspect = plane_info ? plane_info->planeAspect : 0,
1716       };
1717       VkImageMemoryRequirementsInfo2 info = {
1718          .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1719          .pNext = image->disjoint ? &plane : NULL,
1720          .image = pBindInfos[i].image,
1721       };
1722       VkMemoryRequirements2 reqs = {
1723          .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
1724       };
1725 
1726       radv_GetImageMemoryRequirements2(_device, &info, &reqs);
1727 
1728       if (mem->alloc_size) {
1729          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
1730             if (status)
1731                *status->pResult = VK_ERROR_UNKNOWN;
1732             return vk_errorf(device, VK_ERROR_UNKNOWN, "Device memory object too small for the image.\n");
1733          }
1734       }
1735 
1736       radv_bind_image_memory(device, image, bind_idx, mem->bo, pBindInfos[i].memoryOffset,
1737                              reqs.memoryRequirements.size);
1738    }
1739    return VK_SUCCESS;
1740 }
1741 
1742 VKAPI_ATTR void VKAPI_CALL
radv_GetImageSubresourceLayout2KHR(VkDevice _device,VkImage _image,const VkImageSubresource2KHR * pSubresource,VkSubresourceLayout2KHR * pLayout)1743 radv_GetImageSubresourceLayout2KHR(VkDevice _device, VkImage _image, const VkImageSubresource2KHR *pSubresource,
1744                                    VkSubresourceLayout2KHR *pLayout)
1745 {
1746    VK_FROM_HANDLE(radv_image, image, _image);
1747    VK_FROM_HANDLE(radv_device, device, _device);
1748    const struct radv_physical_device *pdev = radv_device_physical(device);
1749    int level = pSubresource->imageSubresource.mipLevel;
1750    int layer = pSubresource->imageSubresource.arrayLayer;
1751 
1752    const unsigned plane_count = vk_format_get_plane_count(image->vk.format);
1753    unsigned plane_id = 0;
1754    if (plane_count > 1)
1755       plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1756 
1757    struct radv_image_plane *plane = &image->planes[plane_id];
1758    struct radeon_surf *surface = &plane->surface;
1759 
1760    if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && plane_count == 1) {
1761       unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->imageSubresource.aspectMask);
1762 
1763       assert(level == 0);
1764       assert(layer == 0);
1765 
1766       pLayout->subresourceLayout.offset = ac_surface_get_plane_offset(pdev->info.gfx_level, surface, mem_plane_id, 0);
1767       pLayout->subresourceLayout.rowPitch =
1768          ac_surface_get_plane_stride(pdev->info.gfx_level, surface, mem_plane_id, level);
1769       pLayout->subresourceLayout.arrayPitch = 0;
1770       pLayout->subresourceLayout.depthPitch = 0;
1771       pLayout->subresourceLayout.size = ac_surface_get_plane_size(surface, mem_plane_id);
1772    } else if (pdev->info.gfx_level >= GFX9) {
1773       uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
1774 
1775       pLayout->subresourceLayout.offset =
1776          ac_surface_get_plane_offset(pdev->info.gfx_level, &plane->surface, 0, layer) + level_offset;
1777       if (image->vk.format == VK_FORMAT_R32G32B32_UINT || image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1778           image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1779          /* Adjust the number of bytes between each row because
1780           * the pitch is actually the number of components per
1781           * row.
1782           */
1783          pLayout->subresourceLayout.rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
1784       } else {
1785          uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
1786 
1787          assert(util_is_power_of_two_nonzero(surface->bpe));
1788          pLayout->subresourceLayout.rowPitch = pitch * surface->bpe;
1789       }
1790 
1791       pLayout->subresourceLayout.arrayPitch = surface->u.gfx9.surf_slice_size;
1792       pLayout->subresourceLayout.depthPitch = surface->u.gfx9.surf_slice_size;
1793       pLayout->subresourceLayout.size = surface->u.gfx9.surf_slice_size;
1794       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1795          pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1796    } else {
1797       pLayout->subresourceLayout.offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
1798                                           (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
1799       pLayout->subresourceLayout.rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
1800       pLayout->subresourceLayout.arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1801       pLayout->subresourceLayout.depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1802       pLayout->subresourceLayout.size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
1803       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1804          pLayout->subresourceLayout.size *= u_minify(image->vk.extent.depth, level);
1805    }
1806 
1807    VkImageCompressionPropertiesEXT *image_compression_props =
1808       vk_find_struct(pLayout->pNext, IMAGE_COMPRESSION_PROPERTIES_EXT);
1809    if (image_compression_props) {
1810       image_compression_props->imageCompressionFixedRateFlags = VK_IMAGE_COMPRESSION_FIXED_RATE_NONE_EXT;
1811 
1812       if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1813          image_compression_props->imageCompressionFlags =
1814             radv_image_has_htile(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1815       } else {
1816          image_compression_props->imageCompressionFlags =
1817             radv_image_has_dcc(image) ? VK_IMAGE_COMPRESSION_DEFAULT_EXT : VK_IMAGE_COMPRESSION_DISABLED_EXT;
1818       }
1819    }
1820 }
1821 
1822 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,VkImage _image,VkImageDrmFormatModifierPropertiesEXT * pProperties)1823 radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
1824                                             VkImageDrmFormatModifierPropertiesEXT *pProperties)
1825 {
1826    VK_FROM_HANDLE(radv_image, image, _image);
1827 
1828    pProperties->drmFormatModifier = image->planes[0].surface.modifier;
1829    return VK_SUCCESS;
1830 }
1831