xref: /aosp_15_r20/external/mesa3d/src/nouveau/vulkan/nvk_sampler.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_sampler.h"
6 
7 #include "nvk_device.h"
8 #include "nvk_entrypoints.h"
9 #include "nvk_physical_device.h"
10 
11 #include "vk_format.h"
12 #include "vk_sampler.h"
13 
14 #include "util/bitpack_helpers.h"
15 #include "util/format/format_utils.h"
16 #include "util/format_srgb.h"
17 
18 #include "cla097.h"
19 #include "clb197.h"
20 #include "cl9097tex.h"
21 #include "cla097tex.h"
22 #include "clb197tex.h"
23 #include "drf.h"
24 
25 ALWAYS_INLINE static void
__set_u32(uint32_t * o,uint32_t v,unsigned lo,unsigned hi)26 __set_u32(uint32_t *o, uint32_t v, unsigned lo, unsigned hi)
27 {
28    assert(lo <= hi && hi < 32);
29    *o |= util_bitpack_uint(v, lo % 32, hi % 32);
30 }
31 
32 #define FIXED_FRAC_BITS 8
33 
34 ALWAYS_INLINE static void
__set_ufixed(uint32_t * o,float v,unsigned lo,unsigned hi)35 __set_ufixed(uint32_t *o, float v, unsigned lo, unsigned hi)
36 {
37    assert(lo <= hi && hi < 32);
38    *o |= util_bitpack_ufixed_clamp(v, lo % 32, hi % 32, FIXED_FRAC_BITS);
39 }
40 
41 ALWAYS_INLINE static void
__set_sfixed(uint32_t * o,float v,unsigned lo,unsigned hi)42 __set_sfixed(uint32_t *o, float v, unsigned lo, unsigned hi)
43 {
44    assert(lo <= hi && hi < 32);
45    *o |= util_bitpack_sfixed_clamp(v, lo % 32, hi % 32, FIXED_FRAC_BITS);
46 }
47 
48 ALWAYS_INLINE static void
__set_bool(uint32_t * o,bool b,unsigned lo,unsigned hi)49 __set_bool(uint32_t *o, bool b, unsigned lo, unsigned hi)
50 {
51    assert(lo == hi && hi < 32);
52    *o |= util_bitpack_uint(b, lo % 32, hi % 32);
53 }
54 
55 #define MW(x) x
56 
57 #define SAMP_SET_U(o, NV, i, FIELD, val) \
58    __set_u32(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
59                              DRF_HI(NV##_TEXSAMP##i##_##FIELD))
60 
61 #define SAMP_SET_UF(o, NV, i, FIELD, val) \
62    __set_ufixed(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
63                                 DRF_HI(NV##_TEXSAMP##i##_##FIELD))
64 
65 #define SAMP_SET_SF(o, NV, i, FIELD, val) \
66    __set_sfixed(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
67                                 DRF_HI(NV##_TEXSAMP##i##_##FIELD))
68 
69 #define SAMP_SET_B(o, NV, i, FIELD, b) \
70    __set_bool(&(o)[i], (b), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
71                             DRF_HI(NV##_TEXSAMP##i##_##FIELD))
72 
73 #define SAMP_SET_E(o, NV, i, FIELD, E) \
74    SAMP_SET_U((o), NV, i, FIELD, NV##_TEXSAMP##i##_##FIELD##_##E)
75 
76 static inline uint32_t
vk_to_9097_address_mode(VkSamplerAddressMode addr_mode)77 vk_to_9097_address_mode(VkSamplerAddressMode addr_mode)
78 {
79 #define MODE(VK, NV) \
80    [VK_SAMPLER_ADDRESS_MODE_##VK] = NV9097_TEXSAMP0_ADDRESS_U_##NV
81    static const uint8_t vk_to_9097[] = {
82       MODE(REPEAT,               WRAP),
83       MODE(MIRRORED_REPEAT,      MIRROR),
84       MODE(CLAMP_TO_EDGE,        CLAMP_TO_EDGE),
85       MODE(CLAMP_TO_BORDER,      BORDER),
86       MODE(MIRROR_CLAMP_TO_EDGE, MIRROR_ONCE_CLAMP_TO_EDGE),
87    };
88 #undef MODE
89 
90    assert(addr_mode < ARRAY_SIZE(vk_to_9097));
91    return vk_to_9097[addr_mode];
92 }
93 
94 static uint32_t
vk_to_9097_texsamp_compare_op(VkCompareOp op)95 vk_to_9097_texsamp_compare_op(VkCompareOp op)
96 {
97 #define OP(VK, NV) \
98    [VK_COMPARE_OP_##VK] = NV9097_TEXSAMP0_DEPTH_COMPARE_FUNC_##NV
99    ASSERTED static const uint8_t vk_to_9097[] = {
100       OP(NEVER,            ZC_NEVER),
101       OP(LESS,             ZC_LESS),
102       OP(EQUAL,            ZC_EQUAL),
103       OP(LESS_OR_EQUAL,    ZC_LEQUAL),
104       OP(GREATER,          ZC_GREATER),
105       OP(NOT_EQUAL,        ZC_NOTEQUAL),
106       OP(GREATER_OR_EQUAL, ZC_GEQUAL),
107       OP(ALWAYS,           ZC_ALWAYS),
108    };
109 #undef OP
110 
111    assert(op < ARRAY_SIZE(vk_to_9097));
112    assert(op == vk_to_9097[op]);
113 
114    return op;
115 }
116 
117 static uint32_t
vk_to_9097_max_anisotropy(float max_anisotropy)118 vk_to_9097_max_anisotropy(float max_anisotropy)
119 {
120    if (max_anisotropy >= 16)
121       return NV9097_TEXSAMP0_MAX_ANISOTROPY_ANISO_16_TO_1;
122 
123    if (max_anisotropy >= 12)
124       return NV9097_TEXSAMP0_MAX_ANISOTROPY_ANISO_12_TO_1;
125 
126    uint32_t aniso_u32 = MAX2(0.0f, max_anisotropy);
127    return aniso_u32 >> 1;
128 }
129 
130 static uint32_t
vk_to_9097_trilin_opt(float max_anisotropy)131 vk_to_9097_trilin_opt(float max_anisotropy)
132 {
133    /* No idea if we want this but matching nouveau */
134    if (max_anisotropy >= 12)
135       return 0;
136 
137    if (max_anisotropy >= 4)
138       return 6;
139 
140    if (max_anisotropy >= 2)
141       return 4;
142 
143    return 0;
144 }
145 
146 static void
nvk_sampler_fill_header(const struct nvk_physical_device * pdev,const struct VkSamplerCreateInfo * info,const struct vk_sampler * vk_sampler,uint32_t * samp)147 nvk_sampler_fill_header(const struct nvk_physical_device *pdev,
148                         const struct VkSamplerCreateInfo *info,
149                         const struct vk_sampler *vk_sampler,
150                         uint32_t *samp)
151 {
152    SAMP_SET_U(samp, NV9097, 0, ADDRESS_U,
153               vk_to_9097_address_mode(info->addressModeU));
154    SAMP_SET_U(samp, NV9097, 0, ADDRESS_V,
155               vk_to_9097_address_mode(info->addressModeV));
156    SAMP_SET_U(samp, NV9097, 0, ADDRESS_P,
157               vk_to_9097_address_mode(info->addressModeW));
158 
159    if (info->compareEnable) {
160       SAMP_SET_B(samp, NV9097, 0, DEPTH_COMPARE, true);
161       SAMP_SET_U(samp, NV9097, 0, DEPTH_COMPARE_FUNC,
162                  vk_to_9097_texsamp_compare_op(info->compareOp));
163    }
164 
165    SAMP_SET_B(samp, NV9097, 0, S_R_G_B_CONVERSION, true);
166    SAMP_SET_E(samp, NV9097, 0, FONT_FILTER_WIDTH, SIZE_2);
167    SAMP_SET_E(samp, NV9097, 0, FONT_FILTER_HEIGHT, SIZE_2);
168 
169    if (info->anisotropyEnable) {
170       SAMP_SET_U(samp, NV9097, 0, MAX_ANISOTROPY,
171                  vk_to_9097_max_anisotropy(info->maxAnisotropy));
172    }
173 
174    switch (info->magFilter) {
175    case VK_FILTER_NEAREST:
176       SAMP_SET_E(samp, NV9097, 1, MAG_FILTER, MAG_POINT);
177       break;
178    case VK_FILTER_LINEAR:
179       SAMP_SET_E(samp, NV9097, 1, MAG_FILTER, MAG_LINEAR);
180       break;
181    default:
182       unreachable("Invalid filter");
183    }
184 
185    switch (info->minFilter) {
186    case VK_FILTER_NEAREST:
187       SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_POINT);
188       break;
189    case VK_FILTER_LINEAR:
190       if (info->anisotropyEnable)
191          SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_ANISO);
192       else
193          SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_LINEAR);
194       break;
195    default:
196       unreachable("Invalid filter");
197    }
198 
199    switch (info->mipmapMode) {
200    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
201       SAMP_SET_E(samp, NV9097, 1, MIP_FILTER, MIP_POINT);
202       break;
203    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
204       SAMP_SET_E(samp, NV9097, 1, MIP_FILTER, MIP_LINEAR);
205       break;
206    default:
207       unreachable("Invalid mipmap mode");
208    }
209 
210    assert(pdev->info.cls_eng3d >= KEPLER_A);
211    if (info->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT) {
212       SAMP_SET_E(samp, NVA097, 1, CUBEMAP_INTERFACE_FILTERING, USE_WRAP);
213    } else {
214       SAMP_SET_E(samp, NVA097, 1, CUBEMAP_INTERFACE_FILTERING, AUTO_SPAN_SEAM);
215    }
216 
217    if (pdev->info.cls_eng3d >= MAXWELL_B) {
218       switch (vk_sampler->reduction_mode) {
219       case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE:
220          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_NONE);
221          break;
222       case VK_SAMPLER_REDUCTION_MODE_MIN:
223          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_MINIMUM);
224          break;
225       case VK_SAMPLER_REDUCTION_MODE_MAX:
226          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_MAXIMUM);
227          break;
228       default:
229          unreachable("Invalid reduction mode");
230       }
231    }
232 
233    SAMP_SET_SF(samp, NV9097, 1, MIP_LOD_BIAS, info->mipLodBias);
234 
235    assert(pdev->info.cls_eng3d >= KEPLER_A);
236    if (info->unnormalizedCoordinates) {
237       SAMP_SET_E(samp, NVA097, 1, FLOAT_COORD_NORMALIZATION,
238                                   FORCE_UNNORMALIZED_COORDS);
239    } else {
240       SAMP_SET_E(samp, NVA097, 1, FLOAT_COORD_NORMALIZATION,
241                                   USE_HEADER_SETTING);
242    }
243    SAMP_SET_U(samp, NV9097, 1, TRILIN_OPT,
244               vk_to_9097_trilin_opt(info->maxAnisotropy));
245 
246    SAMP_SET_UF(samp, NV9097, 2, MIN_LOD_CLAMP, info->minLod);
247    SAMP_SET_UF(samp, NV9097, 2, MAX_LOD_CLAMP, info->maxLod);
248 
249    VkClearColorValue bc = vk_sampler->border_color_value;
250    uint8_t bc_srgb[3];
251 
252    const VkSamplerBorderColorComponentMappingCreateInfoEXT *swiz_info =
253       vk_find_struct_const(info->pNext,
254                            SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT);
255    if (swiz_info) {
256       if (swiz_info->srgb) {
257          for (uint32_t i = 0; i < 3; i++)
258             bc.float32[i] = util_format_linear_to_srgb_float(bc.float32[i]);
259       }
260 
261       const bool is_int = vk_border_color_is_int(info->borderColor);
262       bc = vk_swizzle_color_value(bc, swiz_info->components, is_int);
263 
264       for (uint32_t i = 0; i < 3; i++)
265          bc_srgb[i] = _mesa_float_to_unorm(bc.float32[i], 8);
266    } else {
267       /* Otherwise, we can assume no swizzle or that the border color is
268        * transparent black or opaque white and there's nothing to do but
269        * convert the (unswizzled) border color to sRGB.
270        */
271       for (unsigned i = 0; i < 3; i++)
272          bc_srgb[i] = util_format_linear_float_to_srgb_8unorm(bc.float32[i]);
273    }
274 
275    SAMP_SET_U(samp, NV9097, 2, S_R_G_B_BORDER_COLOR_R, bc_srgb[0]);
276    SAMP_SET_U(samp, NV9097, 3, S_R_G_B_BORDER_COLOR_G, bc_srgb[1]);
277    SAMP_SET_U(samp, NV9097, 3, S_R_G_B_BORDER_COLOR_B, bc_srgb[2]);
278 
279    SAMP_SET_U(samp, NV9097, 4, BORDER_COLOR_R, bc.uint32[0]);
280    SAMP_SET_U(samp, NV9097, 5, BORDER_COLOR_G, bc.uint32[1]);
281    SAMP_SET_U(samp, NV9097, 6, BORDER_COLOR_B, bc.uint32[2]);
282    SAMP_SET_U(samp, NV9097, 7, BORDER_COLOR_A, bc.uint32[3]);
283 }
284 
285 VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateSampler(VkDevice device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)286 nvk_CreateSampler(VkDevice device,
287                   const VkSamplerCreateInfo *pCreateInfo,
288                   const VkAllocationCallbacks *pAllocator,
289                   VkSampler *pSampler)
290 {
291    VK_FROM_HANDLE(nvk_device, dev, device);
292    struct nvk_physical_device *pdev = nvk_device_physical(dev);
293    struct nvk_sampler *sampler;
294    VkResult result;
295 
296    const VkOpaqueCaptureDescriptorDataCreateInfoEXT *cap_info =
297       vk_find_struct_const(pCreateInfo->pNext,
298                            OPAQUE_CAPTURE_DESCRIPTOR_DATA_CREATE_INFO_EXT);
299    struct nvk_sampler_capture cap = {};
300    if (cap_info != NULL)
301       memcpy(&cap, cap_info->opaqueCaptureDescriptorData, sizeof(cap));
302 
303    sampler = vk_sampler_create(&dev->vk, pCreateInfo,
304                                pAllocator, sizeof(*sampler));
305    if (!sampler)
306       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
307 
308    {
309       uint32_t samp[8] = {};
310       sampler->plane_count = 1;
311       nvk_sampler_fill_header(pdev, pCreateInfo, &sampler->vk, samp);
312 
313       uint32_t desc_index = 0;
314       if (cap_info != NULL) {
315          desc_index = cap.planes[0].desc_index;
316          result = nvk_descriptor_table_insert(dev, &dev->samplers,
317                                               desc_index, samp, sizeof(samp));
318       } else {
319          result = nvk_descriptor_table_add(dev, &dev->samplers,
320                                            samp, sizeof(samp), &desc_index);
321       }
322       if (result != VK_SUCCESS) {
323          vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
324          return result;
325       }
326 
327       sampler->planes[0].desc_index = desc_index;
328    }
329 
330    /* In order to support CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, we
331     * need multiple sampler planes: at minimum we will need one for luminance
332     * (the default), and one for chroma.  Each sampler plane needs its own
333     * sampler table entry.  However, sampler table entries are very rare on
334     * NVIDIA; we only have 4096 entries for the whole VkDevice, and each plane
335     * would burn one of those. So we make sure to allocate only the minimum
336     * amount that we actually need (i.e., either 1 or 2), and then just copy
337     * the last sampler plane out as far as we need to fill the number of image
338     * planes.
339     */
340 
341    if (sampler->vk.ycbcr_conversion) {
342       const VkFilter chroma_filter =
343          sampler->vk.ycbcr_conversion->state.chroma_filter;
344       if (pCreateInfo->magFilter != chroma_filter ||
345           pCreateInfo->minFilter != chroma_filter) {
346          VkSamplerCreateInfo plane2_info = *pCreateInfo;
347          plane2_info.magFilter = chroma_filter;
348          plane2_info.minFilter = chroma_filter;
349 
350          uint32_t samp[8] = {};
351          sampler->plane_count = 2;
352          nvk_sampler_fill_header(pdev, &plane2_info, &sampler->vk, samp);
353 
354          uint32_t desc_index = 0;
355          if (cap_info != NULL) {
356             desc_index = cap.planes[1].desc_index;
357             result = nvk_descriptor_table_insert(dev, &dev->samplers,
358                                                  desc_index,
359                                                  samp, sizeof(samp));
360          } else {
361             result = nvk_descriptor_table_add(dev, &dev->samplers,
362                                               samp, sizeof(samp),
363                                               &desc_index);
364          }
365          if (result != VK_SUCCESS) {
366             nvk_descriptor_table_remove(dev, &dev->samplers,
367                                         sampler->planes[0].desc_index);
368             vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
369             return result;
370          }
371 
372          sampler->planes[1].desc_index = desc_index;
373       }
374    }
375 
376    *pSampler = nvk_sampler_to_handle(sampler);
377 
378    return VK_SUCCESS;
379 }
380 
381 VKAPI_ATTR void VKAPI_CALL
nvk_DestroySampler(VkDevice device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)382 nvk_DestroySampler(VkDevice device,
383                    VkSampler _sampler,
384                    const VkAllocationCallbacks *pAllocator)
385 {
386    VK_FROM_HANDLE(nvk_device, dev, device);
387    VK_FROM_HANDLE(nvk_sampler, sampler, _sampler);
388 
389    if (!sampler)
390       return;
391 
392    for (uint8_t plane = 0; plane < sampler->plane_count; plane++) {
393       nvk_descriptor_table_remove(dev, &dev->samplers,
394                                   sampler->planes[plane].desc_index);
395    }
396 
397    vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
398 }
399 
400 VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetSamplerOpaqueCaptureDescriptorDataEXT(VkDevice _device,const VkSamplerCaptureDescriptorDataInfoEXT * pInfo,void * pData)401 nvk_GetSamplerOpaqueCaptureDescriptorDataEXT(
402     VkDevice _device,
403     const VkSamplerCaptureDescriptorDataInfoEXT *pInfo,
404     void *pData)
405 {
406    VK_FROM_HANDLE(nvk_sampler, sampler, pInfo->sampler);
407 
408    struct nvk_sampler_capture cap = {};
409    for (uint8_t p = 0; p < sampler->plane_count; p++)
410       cap.planes[p].desc_index = sampler->planes[p].desc_index;
411 
412    memcpy(pData, &cap, sizeof(cap));
413 
414    return VK_SUCCESS;
415 }
416