xref: /aosp_15_r20/external/mesa3d/src/intel/isl/isl_storage_image.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "isl_priv.h"
25 #include "compiler/intel_shader_enums.h"
26 
27 bool
isl_is_storage_image_format(const struct intel_device_info * devinfo,enum isl_format format)28 isl_is_storage_image_format(const struct intel_device_info *devinfo,
29                             enum isl_format format)
30 {
31    /* XXX: Maybe we should put this in the CSV? */
32 
33    switch (format) {
34    case ISL_FORMAT_R32G32B32A32_UINT:
35    case ISL_FORMAT_R32G32B32A32_SINT:
36    case ISL_FORMAT_R32G32B32A32_FLOAT:
37    case ISL_FORMAT_R32_UINT:
38    case ISL_FORMAT_R32_SINT:
39    case ISL_FORMAT_R32_FLOAT:
40    case ISL_FORMAT_R16G16B16A16_UINT:
41    case ISL_FORMAT_R16G16B16A16_SINT:
42    case ISL_FORMAT_R16G16B16A16_FLOAT:
43    case ISL_FORMAT_R32G32_UINT:
44    case ISL_FORMAT_R32G32_SINT:
45    case ISL_FORMAT_R32G32_FLOAT:
46    case ISL_FORMAT_R8G8B8A8_UINT:
47    case ISL_FORMAT_R8G8B8A8_SINT:
48    case ISL_FORMAT_R16G16_UINT:
49    case ISL_FORMAT_R16G16_SINT:
50    case ISL_FORMAT_R16G16_FLOAT:
51    case ISL_FORMAT_R8G8_UINT:
52    case ISL_FORMAT_R8G8_SINT:
53    case ISL_FORMAT_R16_UINT:
54    case ISL_FORMAT_R16_FLOAT:
55    case ISL_FORMAT_R16_SINT:
56    case ISL_FORMAT_R8_UINT:
57    case ISL_FORMAT_R8_SINT:
58    case ISL_FORMAT_R10G10B10A2_UINT:
59    case ISL_FORMAT_R10G10B10A2_UNORM:
60    case ISL_FORMAT_R11G11B10_FLOAT:
61    case ISL_FORMAT_R16G16B16A16_UNORM:
62    case ISL_FORMAT_R16G16B16A16_SNORM:
63    case ISL_FORMAT_R8G8B8A8_UNORM:
64    case ISL_FORMAT_R8G8B8A8_SNORM:
65    case ISL_FORMAT_R16G16_UNORM:
66    case ISL_FORMAT_R16G16_SNORM:
67    case ISL_FORMAT_R8G8_UNORM:
68    case ISL_FORMAT_R8G8_SNORM:
69    case ISL_FORMAT_R16_UNORM:
70    case ISL_FORMAT_R16_SNORM:
71    case ISL_FORMAT_R8_UNORM:
72    case ISL_FORMAT_R8_SNORM:
73       return true;
74    default:
75       return false;
76    }
77 }
78 
79 enum isl_format
isl_lower_storage_image_format(const struct intel_device_info * devinfo,enum isl_format format)80 isl_lower_storage_image_format(const struct intel_device_info *devinfo,
81                                enum isl_format format)
82 {
83    switch (format) {
84    /* These are never lowered.  Up to BDW we'll have to fall back to untyped
85     * surface access for 128bpp formats.
86     */
87    case ISL_FORMAT_R32G32B32A32_UINT:
88    case ISL_FORMAT_R32G32B32A32_SINT:
89    case ISL_FORMAT_R32G32B32A32_FLOAT:
90    case ISL_FORMAT_R32_UINT:
91    case ISL_FORMAT_R32_SINT:
92       return format;
93 
94    /* The Skylake PRM's "Surface Formats" section says:
95     *
96     *   "The surface format for the typed atomic integer operations must
97     *    be R32_UINT or R32_SINT."
98     *
99     * But checking the BSpec 1706, you find a different restriction. There the
100     * wording is :
101     *
102     *    "The surface format must be one of R32_UINT, R32_SINT or R32_FLOAT"
103     *
104     * The confusion is probably related to atomic integer messages. For
105     * example an IADD instruction would require a R32_UINT/R32_SINT surface.
106     * But a CMPXCHG instruction does not really care about the type, it just
107     * does bit to bit comparison and swap.
108     *
109     * The confusion seems to have propagated to the simulation environment.
110     * Gfx12 has the same restrictions as Gfx11 regarding doing a CMPXCHG on a
111     * R32_FLOAT surface, but the Gfx11 environment will report an error while
112     * Gfx12 passes fine. More importantly HW doesn't seem to mind.
113     */
114    case ISL_FORMAT_R32_FLOAT:
115       return format;
116 
117    /* From HSW to BDW the only 64bpp format supported for typed access is
118     * RGBA_UINT16.  IVB falls back to untyped.
119     */
120    case ISL_FORMAT_R16G16B16A16_UINT:
121    case ISL_FORMAT_R16G16B16A16_SINT:
122    case ISL_FORMAT_R16G16B16A16_FLOAT:
123    case ISL_FORMAT_R32G32_UINT:
124    case ISL_FORMAT_R32G32_SINT:
125    case ISL_FORMAT_R32G32_FLOAT:
126       return (devinfo->ver >= 9 ? format :
127               devinfo->verx10 >= 75 ?
128               ISL_FORMAT_R16G16B16A16_UINT :
129               ISL_FORMAT_R32G32_UINT);
130 
131    /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
132     * are supported.  IVB doesn't support formats with more than one component
133     * for typed access.  For 8 and 16 bpp formats IVB relies on the
134     * undocumented behavior that typed reads from R_UINT8 and R_UINT16
135     * surfaces actually do a 32-bit misaligned read.  The alternative would be
136     * to use two surface state entries with different formats for each image,
137     * one for reading (using R_UINT32) and another one for writing (using
138     * R_UINT8 or R_UINT16), but that would complicate the shaders we generate
139     * even more.
140     */
141    case ISL_FORMAT_R8G8B8A8_UINT:
142    case ISL_FORMAT_R8G8B8A8_SINT:
143       return (devinfo->ver >= 9 ? format :
144               devinfo->verx10 >= 75 ?
145               ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
146 
147    case ISL_FORMAT_R16G16_UINT:
148    case ISL_FORMAT_R16G16_SINT:
149    case ISL_FORMAT_R16G16_FLOAT:
150       return (devinfo->ver >= 9 ? format :
151               devinfo->verx10 >= 75 ?
152               ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
153 
154    case ISL_FORMAT_R8G8_UINT:
155    case ISL_FORMAT_R8G8_SINT:
156       return (devinfo->ver >= 9 ? format :
157               devinfo->verx10 >= 75 ?
158               ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
159 
160    case ISL_FORMAT_R16_UINT:
161    case ISL_FORMAT_R16_FLOAT:
162    case ISL_FORMAT_R16_SINT:
163       return (devinfo->ver >= 9 ? format : ISL_FORMAT_R16_UINT);
164 
165    case ISL_FORMAT_R8_UINT:
166    case ISL_FORMAT_R8_SINT:
167       return (devinfo->ver >= 9 ? format : ISL_FORMAT_R8_UINT);
168 
169    /* Here the PRMs are a bit out of date. But according to BSpec 47635
170     * (Gfx12.5), the 2/10/10/10 and the 11/11/10 packed formats are supported
171     * by the hardware.
172     */
173    case ISL_FORMAT_R10G10B10A2_UINT:
174    case ISL_FORMAT_R10G10B10A2_UNORM:
175    case ISL_FORMAT_R11G11B10_FLOAT:
176       return devinfo->verx10 >= 125 ? format : ISL_FORMAT_R32_UINT;
177 
178    /* No normalized fixed-point formats are supported by the hardware until Gfx11. */
179    case ISL_FORMAT_R16G16B16A16_UNORM:
180    case ISL_FORMAT_R16G16B16A16_SNORM:
181       if (devinfo->ver >= 11)
182          return format;
183       if (devinfo->ver >= 9)
184          return ISL_FORMAT_R32G32_UINT;
185       if (devinfo->verx10 >= 75)
186          return ISL_FORMAT_R16G16B16A16_UINT;
187       return ISL_FORMAT_R32G32_UINT;
188 
189    case ISL_FORMAT_R8G8B8A8_UNORM:
190    case ISL_FORMAT_R8G8B8A8_SNORM:
191       if (devinfo->ver >= 11)
192          return format;
193       if (devinfo->ver >= 9)
194          return ISL_FORMAT_R32_UINT;
195       if (devinfo->verx10 >= 75)
196          return ISL_FORMAT_R8G8B8A8_UINT;
197       return ISL_FORMAT_R32_UINT;
198 
199    case ISL_FORMAT_R16G16_UNORM:
200    case ISL_FORMAT_R16G16_SNORM:
201       if (devinfo->ver >= 11)
202          return format;
203       if (devinfo->ver >= 9)
204          return ISL_FORMAT_R32_UINT;
205       if (devinfo->verx10 >= 75)
206          return ISL_FORMAT_R16G16_UINT;
207       return ISL_FORMAT_R32_UINT;
208 
209    case ISL_FORMAT_R8G8_UNORM:
210    case ISL_FORMAT_R8G8_SNORM:
211       if (devinfo->ver >= 11)
212          return format;
213       if (devinfo->ver >= 9)
214          return ISL_FORMAT_R16_UINT;
215       if (devinfo->verx10 >= 75)
216          return ISL_FORMAT_R8G8_UINT;
217       return ISL_FORMAT_R16_UINT;
218 
219    case ISL_FORMAT_R16_UNORM:
220    case ISL_FORMAT_R16_SNORM:
221       return (devinfo->ver >= 11 ? format : ISL_FORMAT_R16_UINT);
222 
223    case ISL_FORMAT_R8_UNORM:
224    case ISL_FORMAT_R8_SNORM:
225       return (devinfo->ver >= 11 ? format : ISL_FORMAT_R8_UINT);
226 
227    default:
228       assert(!"Unknown image format");
229       return ISL_FORMAT_UNSUPPORTED;
230    }
231 }
232 
233 bool
isl_has_matching_typed_storage_image_format(const struct intel_device_info * devinfo,enum isl_format fmt)234 isl_has_matching_typed_storage_image_format(const struct intel_device_info *devinfo,
235                                             enum isl_format fmt)
236 {
237    if (devinfo->ver >= 9) {
238       return true;
239    } else if (devinfo->verx10 >= 75) {
240       return isl_format_get_layout(fmt)->bpb <= 64;
241    } else {
242       return isl_format_get_layout(fmt)->bpb <= 32;
243    }
244 }
245 
246 static const struct isl_image_param image_param_defaults = {
247    /* Set the swizzling shifts to all-ones to effectively disable
248     * swizzling -- See emit_address_calculation() in
249     * brw_fs_surface_builder.cpp for a more detailed explanation of
250     * these parameters.
251     */
252    .swizzling = { 0xff, 0xff },
253 };
254 
255 void
isl_surf_fill_image_param(const struct isl_device * dev,struct isl_image_param * param,const struct isl_surf * surf,const struct isl_view * view)256 isl_surf_fill_image_param(const struct isl_device *dev,
257                           struct isl_image_param *param,
258                           const struct isl_surf *surf,
259                           const struct isl_view *view)
260 {
261    *param = image_param_defaults;
262 
263    if (surf->dim != ISL_SURF_DIM_3D) {
264       assert(view->base_array_layer + view->array_len <=
265              surf->logical_level0_px.array_len);
266    }
267    param->size[0] = isl_minify(surf->logical_level0_px.w, view->base_level);
268    param->size[1] = surf->dim == ISL_SURF_DIM_1D ?
269                     view->array_len :
270                     isl_minify(surf->logical_level0_px.h, view->base_level);
271    param->size[2] = surf->dim == ISL_SURF_DIM_2D ?
272                     view->array_len :
273                     isl_minify(surf->logical_level0_px.d, view->base_level);
274 
275    uint32_t tile_z_el, phys_array_layer;
276    isl_surf_get_image_offset_el(surf, view->base_level,
277                                 surf->dim == ISL_SURF_DIM_3D ?
278                                    0 : view->base_array_layer,
279                                 surf->dim == ISL_SURF_DIM_3D ?
280                                    view->base_array_layer : 0,
281                                 &param->offset[0],  &param->offset[1],
282                                 &tile_z_el, &phys_array_layer);
283    assert(tile_z_el == 0);
284    assert(phys_array_layer == 0);
285 
286    const int cpp = isl_format_get_layout(surf->format)->bpb / 8;
287    param->stride[0] = cpp;
288    param->stride[1] = surf->row_pitch_B / cpp;
289 
290    const struct isl_extent3d image_align_sa =
291       isl_surf_get_image_alignment_sa(surf);
292    if (ISL_GFX_VER(dev) < 9 && surf->dim == ISL_SURF_DIM_3D) {
293       param->stride[2] = isl_align_npot(param->size[0], image_align_sa.w);
294       param->stride[3] = isl_align_npot(param->size[1], image_align_sa.h);
295    } else {
296       param->stride[2] = 0;
297       param->stride[3] = isl_surf_get_array_pitch_el_rows(surf);
298    }
299 
300    switch (surf->tiling) {
301    case ISL_TILING_LINEAR:
302       /* image_param_defaults is good enough */
303       break;
304 
305    case ISL_TILING_X:
306       /* An X tile is a rectangular block of 512x8 bytes. */
307       param->tiling[0] = isl_log2u(512 / cpp);
308       param->tiling[1] = isl_log2u(8);
309 
310       if (dev->has_bit6_swizzling) {
311          /* Right shifts required to swizzle bits 9 and 10 of the memory
312           * address with bit 6.
313           */
314          param->swizzling[0] = 3;
315          param->swizzling[1] = 4;
316       }
317       break;
318 
319    case ISL_TILING_Y0:
320       /* The layout of a Y-tiled surface in memory isn't really fundamentally
321        * different to the layout of an X-tiled surface, we simply pretend that
322        * the surface is broken up in a number of smaller 16Bx32 tiles, each
323        * one arranged in X-major order just like is the case for X-tiling.
324        */
325       param->tiling[0] = isl_log2u(16 / cpp);
326       param->tiling[1] = isl_log2u(32);
327 
328       if (dev->has_bit6_swizzling) {
329          /* Right shift required to swizzle bit 9 of the memory address with
330           * bit 6.
331           */
332          param->swizzling[0] = 3;
333          param->swizzling[1] = 0xff;
334       }
335       break;
336 
337    default:
338       assert(!"Unhandled storage image tiling");
339    }
340 
341    /* 3D textures are arranged in 2D in memory with 2^lod slices per row.  The
342     * address calculation algorithm (emit_address_calculation() in
343     * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
344     * modulus equal to the LOD.
345     */
346    param->tiling[2] = (ISL_GFX_VER(dev) < 9 && surf->dim == ISL_SURF_DIM_3D ?
347                        view->base_level : 0);
348 }
349 
350 void
isl_buffer_fill_image_param(const struct isl_device * dev,struct isl_image_param * param,enum isl_format format,uint64_t size)351 isl_buffer_fill_image_param(const struct isl_device *dev,
352                             struct isl_image_param *param,
353                             enum isl_format format,
354                             uint64_t size)
355 {
356    *param = image_param_defaults;
357 
358    param->stride[0] = isl_format_get_layout(format)->bpb / 8;
359    param->size[0] = size / param->stride[0];
360 }
361