xref: /aosp_15_r20/external/mesa3d/src/freedreno/fdl/fd6_layout.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Rob Clark <[email protected]>
3  * Copyright © 2018-2019 Google, Inc.
4  * SPDX-License-Identifier: MIT
5  *
6  * Authors:
7  *    Rob Clark <[email protected]>
8  */
9 
10 #include <stdio.h>
11 
12 #include "freedreno_layout.h"
13 
14 static bool
is_r8g8(const struct fdl_layout * layout)15 is_r8g8(const struct fdl_layout *layout)
16 {
17    return layout->cpp == 2 &&
18           util_format_get_nr_components(layout->format) == 2;
19 }
20 
21 void
fdl6_get_ubwc_blockwidth(const struct fdl_layout * layout,uint32_t * blockwidth,uint32_t * blockheight)22 fdl6_get_ubwc_blockwidth(const struct fdl_layout *layout,
23                          uint32_t *blockwidth, uint32_t *blockheight)
24 {
25    static const struct {
26       uint8_t width;
27       uint8_t height;
28    } blocksize[] = {
29       { 16, 4 }, /* cpp = 1 */
30       { 16, 4 }, /* cpp = 2 */
31       { 16, 4 }, /* cpp = 4 */
32       {  8, 4 }, /* cpp = 8 */
33       {  4, 4 }, /* cpp = 16 */
34       {  4, 2 }, /* cpp = 32 */
35       {  0, 0 }, /* cpp = 64 (TODO) */
36    };
37 
38    /* special case for r8g8: */
39    if (is_r8g8(layout)) {
40       *blockwidth = 16;
41       *blockheight = 8;
42       return;
43    }
44 
45    if (layout->format == PIPE_FORMAT_Y8_UNORM) {
46       *blockwidth = 32;
47       *blockheight = 8;
48       return;
49    }
50 
51    /* special case for 2bpp + MSAA (not layout->cpp is already
52     * pre-multiplied by nr_samples):
53     */
54    if ((layout->cpp / layout->nr_samples == 2) && (layout->nr_samples > 1)) {
55       if (layout->nr_samples == 2) {
56          *blockwidth = 8;
57          *blockheight = 4;
58       } else if (layout->nr_samples == 4) {
59          *blockwidth = 4;
60          *blockheight = 4;
61       } else {
62          unreachable("bad nr_samples");
63       }
64       return;
65    }
66 
67    uint32_t cpp = fdl_cpp_shift(layout);
68    assert(cpp < ARRAY_SIZE(blocksize));
69    *blockwidth = blocksize[cpp].width;
70    *blockheight = blocksize[cpp].height;
71 }
72 
73 static void
fdl6_tile_alignment(struct fdl_layout * layout,uint32_t * heightalign)74 fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
75 {
76    layout->pitchalign = fdl_cpp_shift(layout);
77    *heightalign = 16;
78 
79    if (is_r8g8(layout) || layout->cpp == 1) {
80       layout->pitchalign = 1;
81       *heightalign = 32;
82    } else if (layout->cpp == 2) {
83       layout->pitchalign = 2;
84    }
85 
86    /* Empirical evidence suggests that images with UBWC could have much
87     * looser alignment requirements, however the validity of alignment is
88     * heavily undertested and the "officially" supported alignment is 4096b.
89     */
90    if (layout->ubwc || util_format_is_depth_or_stencil(layout->format))
91       layout->base_align = 4096;
92    else if (layout->cpp == 1)
93       layout->base_align = 64;
94    else if (layout->cpp == 2)
95       layout->base_align = 128;
96    else
97       layout->base_align = 256;
98 }
99 
100 /* NOTE: good way to test this is:  (for example)
101  *  piglit/bin/texelFetch fs sampler3D 100x100x8
102  */
103 bool
fdl6_layout(struct fdl_layout * layout,enum pipe_format format,uint32_t nr_samples,uint32_t width0,uint32_t height0,uint32_t depth0,uint32_t mip_levels,uint32_t array_size,bool is_3d,struct fdl_explicit_layout * explicit_layout)104 fdl6_layout(struct fdl_layout *layout, enum pipe_format format,
105             uint32_t nr_samples, uint32_t width0, uint32_t height0,
106             uint32_t depth0, uint32_t mip_levels, uint32_t array_size,
107             bool is_3d, struct fdl_explicit_layout *explicit_layout)
108 {
109    uint32_t offset = 0, heightalign;
110    uint32_t ubwc_blockwidth, ubwc_blockheight;
111 
112    assert(nr_samples > 0);
113    layout->width0 = width0;
114    layout->height0 = height0;
115    layout->depth0 = depth0;
116    layout->mip_levels = mip_levels;
117 
118    layout->cpp = util_format_get_blocksize(format);
119    layout->cpp *= nr_samples;
120    layout->cpp_shift = ffs(layout->cpp) - 1;
121 
122    layout->format = format;
123    layout->nr_samples = nr_samples;
124    layout->layer_first = !is_3d;
125 
126    fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
127 
128    if (depth0 > 1 || ubwc_blockwidth == 0)
129       layout->ubwc = false;
130 
131    if (layout->ubwc || util_format_is_depth_or_stencil(format))
132       layout->tile_all = true;
133 
134    /* in layer_first layout, the level (slice) contains just one
135     * layer (since in fact the layer contains the slices)
136     */
137    uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
138 
139    /* note: for tiled+noubwc layouts, we can use a lower pitchalign
140     * which will affect the linear levels only, (the hardware will still
141     * expect the tiled alignment on the tiled levels)
142     */
143    if (layout->tile_mode) {
144       fdl6_tile_alignment(layout, &heightalign);
145    } else {
146       layout->base_align = 64;
147       layout->pitchalign = 0;
148       /* align pitch to at least 16 pixels:
149        * both turnip and galium assume there is enough alignment for 16x4
150        * aligned gmem store. turnip can use CP_BLIT to work without this
151        * extra alignment, but gallium driver doesn't implement it yet
152        */
153       if (layout->cpp > 4)
154          layout->pitchalign = fdl_cpp_shift(layout) - 2;
155 
156       /* when possible, use a bit more alignment than necessary
157        * presumably this is better for performance?
158        */
159       if (!explicit_layout)
160          layout->pitchalign = fdl_cpp_shift(layout);
161 
162       /* not used, avoid "may be used uninitialized" warning */
163       heightalign = 1;
164    }
165 
166    fdl_set_pitchalign(layout, layout->pitchalign + 6);
167 
168    if (explicit_layout) {
169       offset = explicit_layout->offset;
170       layout->pitch0 = explicit_layout->pitch;
171       if (align(layout->pitch0, 1 << layout->pitchalign) != layout->pitch0)
172          return false;
173    }
174 
175    uint32_t ubwc_width0 = width0;
176    uint32_t ubwc_height0 = height0;
177    uint32_t ubwc_tile_height_alignment = RGB_TILE_HEIGHT_ALIGNMENT;
178    if (mip_levels > 1) {
179       /* With mipmapping enabled, UBWC layout is power-of-two sized,
180        * specified in log2 width/height in the descriptors.  The height
181        * alignment is 64 for mipmapping, but for buffer sharing (always
182        * single level) other participants expect 16.
183        */
184       ubwc_width0 = util_next_power_of_two(width0);
185       ubwc_height0 = util_next_power_of_two(height0);
186       ubwc_tile_height_alignment = 64;
187    }
188    layout->ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
189                                RGB_TILE_WIDTH_ALIGNMENT);
190    ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
191                         ubwc_tile_height_alignment);
192 
193    uint32_t min_3d_layer_size = 0;
194 
195    for (uint32_t level = 0; level < mip_levels; level++) {
196       uint32_t depth = u_minify(depth0, level);
197       struct fdl_slice *slice = &layout->slices[level];
198       struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
199       uint32_t tile_mode = fdl_tile_mode(layout, level);
200       uint32_t pitch = fdl_pitch(layout, level);
201       uint32_t height = u_minify(height0, level);
202 
203       uint32_t nblocksy = util_format_get_nblocksy(format, height);
204       if (tile_mode)
205          nblocksy = align(nblocksy, heightalign);
206 
207       /* The blits used for mem<->gmem work at a granularity of
208        * 16x4, which can cause faults due to over-fetch on the
209        * last level.  The simple solution is to over-allocate a
210        * bit the last level to ensure any over-fetch is harmless.
211        * The pitch is already sufficiently aligned, but height
212        * may not be. note this only matters if last level is linear
213        */
214       if (level == mip_levels - 1)
215          nblocksy = align(nblocksy, 4);
216 
217       slice->offset = offset + layout->size;
218 
219       /* 1d array and 2d array textures must all have the same layer size for
220        * each miplevel on a6xx.  For 3D, the layer size automatically reduces
221        * until the value we specify in TEX_CONST_3_MIN_LAYERSZ, which is used to
222        * make sure that we follow alignment requirements after minification.
223        */
224       if (is_3d) {
225          if (level == 0) {
226             slice->size0 = align(nblocksy * pitch, 4096);
227          } else if (min_3d_layer_size) {
228             slice->size0 = min_3d_layer_size;
229          } else {
230             /* Note: level * 2 for minifying in both X and Y. */
231             slice->size0 = u_minify(layout->slices[0].size0, level * 2);
232 
233             /* If this level didn't reduce the pitch by half, then fix it up,
234              * and this is the end of layer size reduction.
235              */
236             uint32_t pitch = fdl_pitch(layout, level);
237             if (pitch != fdl_pitch(layout, level - 1) / 2)
238                min_3d_layer_size = slice->size0 = nblocksy * pitch;
239 
240             /* If the height is now less than the alignment requirement, then
241              * scale it up and let this be the minimum layer size.
242              */
243             if (tile_mode && util_format_get_nblocksy(format, height) < heightalign)
244                min_3d_layer_size = slice->size0 = nblocksy * pitch;
245 
246             /* If the size would become un-page-aligned, stay aligned instead. */
247             if (align(slice->size0, 4096) != slice->size0)
248                min_3d_layer_size = slice->size0 = align(slice->size0, 4096);
249          }
250       } else {
251          slice->size0 = nblocksy * pitch;
252       }
253 
254       layout->size += slice->size0 * depth * layers_in_level;
255 
256       if (layout->ubwc) {
257          /* with UBWC every level is aligned to 4K */
258          layout->size = align64(layout->size, 4096);
259 
260          uint32_t meta_pitch = fdl_ubwc_pitch(layout, level);
261          uint32_t meta_height =
262             align(u_minify(ubwc_height0, level), ubwc_tile_height_alignment);
263 
264          ubwc_slice->size0 =
265             align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
266          ubwc_slice->offset = offset + layout->ubwc_layer_size;
267          layout->ubwc_layer_size += ubwc_slice->size0;
268       }
269    }
270 
271    if (layout->layer_first) {
272       layout->layer_size = align64(layout->size, 4096);
273       layout->size = layout->layer_size * array_size;
274    }
275 
276    /* Place the UBWC slices before the uncompressed slices, because the
277     * kernel expects UBWC to be at the start of the buffer.  In the HW, we
278     * get to program the UBWC and non-UBWC offset/strides
279     * independently.
280     */
281    if (layout->ubwc) {
282       for (uint32_t level = 0; level < mip_levels; level++)
283          layout->slices[level].offset += layout->ubwc_layer_size * array_size;
284       layout->size += layout->ubwc_layer_size * array_size;
285    }
286 
287    /* include explicit offset in size */
288    layout->size += offset;
289 
290    return true;
291 }
292