1 /*
2 * Copyright 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "isl_priv.h"
25 #include "compiler/intel_shader_enums.h"
26
27 bool
isl_is_storage_image_format(const struct intel_device_info * devinfo,enum isl_format format)28 isl_is_storage_image_format(const struct intel_device_info *devinfo,
29 enum isl_format format)
30 {
31 /* XXX: Maybe we should put this in the CSV? */
32
33 switch (format) {
34 case ISL_FORMAT_R32G32B32A32_UINT:
35 case ISL_FORMAT_R32G32B32A32_SINT:
36 case ISL_FORMAT_R32G32B32A32_FLOAT:
37 case ISL_FORMAT_R32_UINT:
38 case ISL_FORMAT_R32_SINT:
39 case ISL_FORMAT_R32_FLOAT:
40 case ISL_FORMAT_R16G16B16A16_UINT:
41 case ISL_FORMAT_R16G16B16A16_SINT:
42 case ISL_FORMAT_R16G16B16A16_FLOAT:
43 case ISL_FORMAT_R32G32_UINT:
44 case ISL_FORMAT_R32G32_SINT:
45 case ISL_FORMAT_R32G32_FLOAT:
46 case ISL_FORMAT_R8G8B8A8_UINT:
47 case ISL_FORMAT_R8G8B8A8_SINT:
48 case ISL_FORMAT_R16G16_UINT:
49 case ISL_FORMAT_R16G16_SINT:
50 case ISL_FORMAT_R16G16_FLOAT:
51 case ISL_FORMAT_R8G8_UINT:
52 case ISL_FORMAT_R8G8_SINT:
53 case ISL_FORMAT_R16_UINT:
54 case ISL_FORMAT_R16_FLOAT:
55 case ISL_FORMAT_R16_SINT:
56 case ISL_FORMAT_R8_UINT:
57 case ISL_FORMAT_R8_SINT:
58 case ISL_FORMAT_R10G10B10A2_UINT:
59 case ISL_FORMAT_R10G10B10A2_UNORM:
60 case ISL_FORMAT_R11G11B10_FLOAT:
61 case ISL_FORMAT_R16G16B16A16_UNORM:
62 case ISL_FORMAT_R16G16B16A16_SNORM:
63 case ISL_FORMAT_R8G8B8A8_UNORM:
64 case ISL_FORMAT_R8G8B8A8_SNORM:
65 case ISL_FORMAT_R16G16_UNORM:
66 case ISL_FORMAT_R16G16_SNORM:
67 case ISL_FORMAT_R8G8_UNORM:
68 case ISL_FORMAT_R8G8_SNORM:
69 case ISL_FORMAT_R16_UNORM:
70 case ISL_FORMAT_R16_SNORM:
71 case ISL_FORMAT_R8_UNORM:
72 case ISL_FORMAT_R8_SNORM:
73 return true;
74 default:
75 return false;
76 }
77 }
78
79 enum isl_format
isl_lower_storage_image_format(const struct intel_device_info * devinfo,enum isl_format format)80 isl_lower_storage_image_format(const struct intel_device_info *devinfo,
81 enum isl_format format)
82 {
83 switch (format) {
84 /* These are never lowered. Up to BDW we'll have to fall back to untyped
85 * surface access for 128bpp formats.
86 */
87 case ISL_FORMAT_R32G32B32A32_UINT:
88 case ISL_FORMAT_R32G32B32A32_SINT:
89 case ISL_FORMAT_R32G32B32A32_FLOAT:
90 case ISL_FORMAT_R32_UINT:
91 case ISL_FORMAT_R32_SINT:
92 return format;
93
94 /* The Skylake PRM's "Surface Formats" section says:
95 *
96 * "The surface format for the typed atomic integer operations must
97 * be R32_UINT or R32_SINT."
98 *
99 * But checking the BSpec 1706, you find a different restriction. There the
100 * wording is :
101 *
102 * "The surface format must be one of R32_UINT, R32_SINT or R32_FLOAT"
103 *
104 * The confusion is probably related to atomic integer messages. For
105 * example an IADD instruction would require a R32_UINT/R32_SINT surface.
106 * But a CMPXCHG instruction does not really care about the type, it just
107 * does bit to bit comparison and swap.
108 *
109 * The confusion seems to have propagated to the simulation environment.
110 * Gfx12 has the same restrictions as Gfx11 regarding doing a CMPXCHG on a
111 * R32_FLOAT surface, but the Gfx11 environment will report an error while
112 * Gfx12 passes fine. More importantly HW doesn't seem to mind.
113 */
114 case ISL_FORMAT_R32_FLOAT:
115 return format;
116
117 /* From HSW to BDW the only 64bpp format supported for typed access is
118 * RGBA_UINT16. IVB falls back to untyped.
119 */
120 case ISL_FORMAT_R16G16B16A16_UINT:
121 case ISL_FORMAT_R16G16B16A16_SINT:
122 case ISL_FORMAT_R16G16B16A16_FLOAT:
123 case ISL_FORMAT_R32G32_UINT:
124 case ISL_FORMAT_R32G32_SINT:
125 case ISL_FORMAT_R32G32_FLOAT:
126 return (devinfo->ver >= 9 ? format :
127 devinfo->verx10 >= 75 ?
128 ISL_FORMAT_R16G16B16A16_UINT :
129 ISL_FORMAT_R32G32_UINT);
130
131 /* Up to BDW no SINT or FLOAT formats of less than 32 bits per component
132 * are supported. IVB doesn't support formats with more than one component
133 * for typed access. For 8 and 16 bpp formats IVB relies on the
134 * undocumented behavior that typed reads from R_UINT8 and R_UINT16
135 * surfaces actually do a 32-bit misaligned read. The alternative would be
136 * to use two surface state entries with different formats for each image,
137 * one for reading (using R_UINT32) and another one for writing (using
138 * R_UINT8 or R_UINT16), but that would complicate the shaders we generate
139 * even more.
140 */
141 case ISL_FORMAT_R8G8B8A8_UINT:
142 case ISL_FORMAT_R8G8B8A8_SINT:
143 return (devinfo->ver >= 9 ? format :
144 devinfo->verx10 >= 75 ?
145 ISL_FORMAT_R8G8B8A8_UINT : ISL_FORMAT_R32_UINT);
146
147 case ISL_FORMAT_R16G16_UINT:
148 case ISL_FORMAT_R16G16_SINT:
149 case ISL_FORMAT_R16G16_FLOAT:
150 return (devinfo->ver >= 9 ? format :
151 devinfo->verx10 >= 75 ?
152 ISL_FORMAT_R16G16_UINT : ISL_FORMAT_R32_UINT);
153
154 case ISL_FORMAT_R8G8_UINT:
155 case ISL_FORMAT_R8G8_SINT:
156 return (devinfo->ver >= 9 ? format :
157 devinfo->verx10 >= 75 ?
158 ISL_FORMAT_R8G8_UINT : ISL_FORMAT_R16_UINT);
159
160 case ISL_FORMAT_R16_UINT:
161 case ISL_FORMAT_R16_FLOAT:
162 case ISL_FORMAT_R16_SINT:
163 return (devinfo->ver >= 9 ? format : ISL_FORMAT_R16_UINT);
164
165 case ISL_FORMAT_R8_UINT:
166 case ISL_FORMAT_R8_SINT:
167 return (devinfo->ver >= 9 ? format : ISL_FORMAT_R8_UINT);
168
169 /* Here the PRMs are a bit out of date. But according to BSpec 47635
170 * (Gfx12.5), the 2/10/10/10 and the 11/11/10 packed formats are supported
171 * by the hardware.
172 */
173 case ISL_FORMAT_R10G10B10A2_UINT:
174 case ISL_FORMAT_R10G10B10A2_UNORM:
175 case ISL_FORMAT_R11G11B10_FLOAT:
176 return devinfo->verx10 >= 125 ? format : ISL_FORMAT_R32_UINT;
177
178 /* No normalized fixed-point formats are supported by the hardware until Gfx11. */
179 case ISL_FORMAT_R16G16B16A16_UNORM:
180 case ISL_FORMAT_R16G16B16A16_SNORM:
181 if (devinfo->ver >= 11)
182 return format;
183 if (devinfo->ver >= 9)
184 return ISL_FORMAT_R32G32_UINT;
185 if (devinfo->verx10 >= 75)
186 return ISL_FORMAT_R16G16B16A16_UINT;
187 return ISL_FORMAT_R32G32_UINT;
188
189 case ISL_FORMAT_R8G8B8A8_UNORM:
190 case ISL_FORMAT_R8G8B8A8_SNORM:
191 if (devinfo->ver >= 11)
192 return format;
193 if (devinfo->ver >= 9)
194 return ISL_FORMAT_R32_UINT;
195 if (devinfo->verx10 >= 75)
196 return ISL_FORMAT_R8G8B8A8_UINT;
197 return ISL_FORMAT_R32_UINT;
198
199 case ISL_FORMAT_R16G16_UNORM:
200 case ISL_FORMAT_R16G16_SNORM:
201 if (devinfo->ver >= 11)
202 return format;
203 if (devinfo->ver >= 9)
204 return ISL_FORMAT_R32_UINT;
205 if (devinfo->verx10 >= 75)
206 return ISL_FORMAT_R16G16_UINT;
207 return ISL_FORMAT_R32_UINT;
208
209 case ISL_FORMAT_R8G8_UNORM:
210 case ISL_FORMAT_R8G8_SNORM:
211 if (devinfo->ver >= 11)
212 return format;
213 if (devinfo->ver >= 9)
214 return ISL_FORMAT_R16_UINT;
215 if (devinfo->verx10 >= 75)
216 return ISL_FORMAT_R8G8_UINT;
217 return ISL_FORMAT_R16_UINT;
218
219 case ISL_FORMAT_R16_UNORM:
220 case ISL_FORMAT_R16_SNORM:
221 return (devinfo->ver >= 11 ? format : ISL_FORMAT_R16_UINT);
222
223 case ISL_FORMAT_R8_UNORM:
224 case ISL_FORMAT_R8_SNORM:
225 return (devinfo->ver >= 11 ? format : ISL_FORMAT_R8_UINT);
226
227 default:
228 assert(!"Unknown image format");
229 return ISL_FORMAT_UNSUPPORTED;
230 }
231 }
232
233 bool
isl_has_matching_typed_storage_image_format(const struct intel_device_info * devinfo,enum isl_format fmt)234 isl_has_matching_typed_storage_image_format(const struct intel_device_info *devinfo,
235 enum isl_format fmt)
236 {
237 if (devinfo->ver >= 9) {
238 return true;
239 } else if (devinfo->verx10 >= 75) {
240 return isl_format_get_layout(fmt)->bpb <= 64;
241 } else {
242 return isl_format_get_layout(fmt)->bpb <= 32;
243 }
244 }
245
246 static const struct isl_image_param image_param_defaults = {
247 /* Set the swizzling shifts to all-ones to effectively disable
248 * swizzling -- See emit_address_calculation() in
249 * brw_fs_surface_builder.cpp for a more detailed explanation of
250 * these parameters.
251 */
252 .swizzling = { 0xff, 0xff },
253 };
254
255 void
isl_surf_fill_image_param(const struct isl_device * dev,struct isl_image_param * param,const struct isl_surf * surf,const struct isl_view * view)256 isl_surf_fill_image_param(const struct isl_device *dev,
257 struct isl_image_param *param,
258 const struct isl_surf *surf,
259 const struct isl_view *view)
260 {
261 *param = image_param_defaults;
262
263 if (surf->dim != ISL_SURF_DIM_3D) {
264 assert(view->base_array_layer + view->array_len <=
265 surf->logical_level0_px.array_len);
266 }
267 param->size[0] = isl_minify(surf->logical_level0_px.w, view->base_level);
268 param->size[1] = surf->dim == ISL_SURF_DIM_1D ?
269 view->array_len :
270 isl_minify(surf->logical_level0_px.h, view->base_level);
271 param->size[2] = surf->dim == ISL_SURF_DIM_2D ?
272 view->array_len :
273 isl_minify(surf->logical_level0_px.d, view->base_level);
274
275 uint32_t tile_z_el, phys_array_layer;
276 isl_surf_get_image_offset_el(surf, view->base_level,
277 surf->dim == ISL_SURF_DIM_3D ?
278 0 : view->base_array_layer,
279 surf->dim == ISL_SURF_DIM_3D ?
280 view->base_array_layer : 0,
281 ¶m->offset[0], ¶m->offset[1],
282 &tile_z_el, &phys_array_layer);
283 assert(tile_z_el == 0);
284 assert(phys_array_layer == 0);
285
286 const int cpp = isl_format_get_layout(surf->format)->bpb / 8;
287 param->stride[0] = cpp;
288 param->stride[1] = surf->row_pitch_B / cpp;
289
290 const struct isl_extent3d image_align_sa =
291 isl_surf_get_image_alignment_sa(surf);
292 if (ISL_GFX_VER(dev) < 9 && surf->dim == ISL_SURF_DIM_3D) {
293 param->stride[2] = isl_align_npot(param->size[0], image_align_sa.w);
294 param->stride[3] = isl_align_npot(param->size[1], image_align_sa.h);
295 } else {
296 param->stride[2] = 0;
297 param->stride[3] = isl_surf_get_array_pitch_el_rows(surf);
298 }
299
300 switch (surf->tiling) {
301 case ISL_TILING_LINEAR:
302 /* image_param_defaults is good enough */
303 break;
304
305 case ISL_TILING_X:
306 /* An X tile is a rectangular block of 512x8 bytes. */
307 param->tiling[0] = isl_log2u(512 / cpp);
308 param->tiling[1] = isl_log2u(8);
309
310 if (dev->has_bit6_swizzling) {
311 /* Right shifts required to swizzle bits 9 and 10 of the memory
312 * address with bit 6.
313 */
314 param->swizzling[0] = 3;
315 param->swizzling[1] = 4;
316 }
317 break;
318
319 case ISL_TILING_Y0:
320 /* The layout of a Y-tiled surface in memory isn't really fundamentally
321 * different to the layout of an X-tiled surface, we simply pretend that
322 * the surface is broken up in a number of smaller 16Bx32 tiles, each
323 * one arranged in X-major order just like is the case for X-tiling.
324 */
325 param->tiling[0] = isl_log2u(16 / cpp);
326 param->tiling[1] = isl_log2u(32);
327
328 if (dev->has_bit6_swizzling) {
329 /* Right shift required to swizzle bit 9 of the memory address with
330 * bit 6.
331 */
332 param->swizzling[0] = 3;
333 param->swizzling[1] = 0xff;
334 }
335 break;
336
337 default:
338 assert(!"Unhandled storage image tiling");
339 }
340
341 /* 3D textures are arranged in 2D in memory with 2^lod slices per row. The
342 * address calculation algorithm (emit_address_calculation() in
343 * brw_fs_surface_builder.cpp) handles this as a sort of tiling with
344 * modulus equal to the LOD.
345 */
346 param->tiling[2] = (ISL_GFX_VER(dev) < 9 && surf->dim == ISL_SURF_DIM_3D ?
347 view->base_level : 0);
348 }
349
350 void
isl_buffer_fill_image_param(const struct isl_device * dev,struct isl_image_param * param,enum isl_format format,uint64_t size)351 isl_buffer_fill_image_param(const struct isl_device *dev,
352 struct isl_image_param *param,
353 enum isl_format format,
354 uint64_t size)
355 {
356 *param = image_param_defaults;
357
358 param->stride[0] = isl_format_get_layout(format)->bpb / 8;
359 param->size[0] = size / param->stride[0];
360 }
361