xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/pan_desc.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <[email protected]>
25  *   Boris Brezillon <[email protected]>
26  */
27 
28 #include "util/macros.h"
29 
30 #include "genxml/gen_macros.h"
31 
32 #include "pan_desc.h"
33 #include "pan_encoder.h"
34 #include "pan_texture.h"
35 
36 static unsigned
mod_to_block_fmt(uint64_t mod)37 mod_to_block_fmt(uint64_t mod)
38 {
39    switch (mod) {
40    case DRM_FORMAT_MOD_LINEAR:
41       return MALI_BLOCK_FORMAT_LINEAR;
42    case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43       return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44    default:
45 #if PAN_ARCH >= 5
46       if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
47          return MALI_BLOCK_FORMAT_AFBC;
48 #endif
49 #if PAN_ARCH >= 7
50       if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
51          return MALI_BLOCK_FORMAT_AFBC_TILED;
52 #endif
53 #if PAN_ARCH >= 10
54       if (drm_is_afrc(mod))
55          return 0; /* Reserved field for AFRC state */
56 #endif
57 
58       unreachable("Unsupported modifer");
59    }
60 }
61 
62 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)63 mali_sampling_mode(const struct pan_image_view *view)
64 {
65    unsigned nr_samples = pan_image_view_get_nr_samples(view);
66 
67    if (nr_samples > 1) {
68       assert(view->nr_samples == nr_samples);
69       assert(view->planes[0]->layout.slices[0].surface_stride != 0);
70       return MALI_MSAA_LAYERED;
71    }
72 
73    if (view->nr_samples > nr_samples) {
74       assert(nr_samples == 1);
75       return MALI_MSAA_AVERAGE;
76    }
77 
78    assert(view->nr_samples == nr_samples);
79    assert(view->nr_samples == 1);
80 
81    return MALI_MSAA_SINGLE;
82 }
83 
84 int
GENX(pan_select_crc_rt)85 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
86 {
87    /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
88     * tiles are the same size as the tiles of the framebuffer. However,
89     * our code only handles 16x16 tiles. Therefore under the current
90     * implementation, we must disable CRC when 16x16 tiles are not used.
91     *
92     * This may hurt performance. However, smaller tile sizes are rare, and
93     * CRCs are more expensive at smaller tile sizes, reducing the benefit.
94     * Restricting CRC to 16x16 should work in practice.
95     */
96    if (tile_size != 16 * 16) {
97       assert(tile_size < 16 * 16);
98       return -1;
99    }
100 
101 #if PAN_ARCH <= 6
102    if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
103        pan_image_view_has_crc(fb->rts[0].view))
104       return 0;
105 
106    return -1;
107 #else
108    bool best_rt_valid = false;
109    int best_rt = -1;
110 
111    for (unsigned i = 0; i < fb->rt_count; i++) {
112       if (!fb->rts[i].view || fb->rts[0].discard ||
113           !pan_image_view_has_crc(fb->rts[i].view))
114          continue;
115 
116       bool valid = *(fb->rts[i].crc_valid);
117       bool full = !fb->extent.minx && !fb->extent.miny &&
118                   fb->extent.maxx == (fb->width - 1) &&
119                   fb->extent.maxy == (fb->height - 1);
120       if (!full && !valid)
121          continue;
122 
123       if (best_rt < 0 || (valid && !best_rt_valid)) {
124          best_rt = i;
125          best_rt_valid = valid;
126       }
127 
128       if (valid)
129          break;
130    }
131 
132    return best_rt;
133 #endif
134 }
135 
136 static enum mali_zs_format
translate_zs_format(enum pipe_format in)137 translate_zs_format(enum pipe_format in)
138 {
139    switch (in) {
140    case PIPE_FORMAT_Z16_UNORM:
141       return MALI_ZS_FORMAT_D16;
142    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
143       return MALI_ZS_FORMAT_D24S8;
144    case PIPE_FORMAT_Z24X8_UNORM:
145       return MALI_ZS_FORMAT_D24X8;
146    case PIPE_FORMAT_Z32_FLOAT:
147       return MALI_ZS_FORMAT_D32;
148 #if PAN_ARCH <= 7
149    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
150       return MALI_ZS_FORMAT_D32_S8X24;
151 #endif
152    default:
153       unreachable("Unsupported depth/stencil format.");
154    }
155 }
156 
157 #if PAN_ARCH >= 5
158 static enum mali_s_format
translate_s_format(enum pipe_format in)159 translate_s_format(enum pipe_format in)
160 {
161    switch (in) {
162    case PIPE_FORMAT_S8_UINT:
163       return MALI_S_FORMAT_S8;
164    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
165    case PIPE_FORMAT_X24S8_UINT:
166       return MALI_S_FORMAT_X24S8;
167 
168 #if PAN_ARCH <= 7
169    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
170    case PIPE_FORMAT_S8X24_UINT:
171       return MALI_S_FORMAT_S8X24;
172    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
173    case PIPE_FORMAT_X32_S8X24_UINT:
174       return MALI_S_FORMAT_X32_S8X24;
175 #endif
176 
177    default:
178       unreachable("Unsupported stencil format.");
179    }
180 }
181 
182 static void
pan_prepare_s(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)183 pan_prepare_s(const struct pan_fb_info *fb, unsigned layer_idx,
184               struct MALI_ZS_CRC_EXTENSION *ext)
185 {
186    const struct pan_image_view *s = fb->zs.view.s;
187 
188    if (!s)
189       return;
190 
191    const struct pan_image *image = pan_image_view_get_zs_image(s);
192    unsigned level = s->first_level;
193 
194    ext->s_msaa = mali_sampling_mode(s);
195 
196    struct pan_surface surf;
197    pan_iview_get_surface(s, 0, layer_idx, 0, &surf);
198 
199    assert(image->layout.modifier ==
200              DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
201           image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
202    ext->s_writeback_base = surf.data;
203    ext->s_writeback_row_stride = image->layout.slices[level].row_stride;
204    ext->s_writeback_surface_stride =
205       (pan_image_view_get_nr_samples(s) > 1)
206          ? image->layout.slices[level].surface_stride
207          : 0;
208    ext->s_block_format = mod_to_block_fmt(image->layout.modifier);
209    ext->s_write_format = translate_s_format(s->format);
210 }
211 
212 static void
pan_prepare_zs(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)213 pan_prepare_zs(const struct pan_fb_info *fb, unsigned layer_idx,
214                struct MALI_ZS_CRC_EXTENSION *ext)
215 {
216    const struct pan_image_view *zs = fb->zs.view.zs;
217 
218    if (!zs)
219       return;
220 
221    const struct pan_image *image = pan_image_view_get_zs_image(zs);
222    unsigned level = zs->first_level;
223 
224    ext->zs_msaa = mali_sampling_mode(zs);
225 
226    struct pan_surface surf;
227    pan_iview_get_surface(zs, 0, layer_idx, 0, &surf);
228    UNUSED const struct pan_image_slice_layout *slice =
229       &image->layout.slices[level];
230 
231    if (drm_is_afbc(image->layout.modifier)) {
232 #if PAN_ARCH >= 9
233       ext->zs_writeback_base = surf.afbc.header;
234       ext->zs_writeback_row_stride = slice->row_stride;
235       /* TODO: surface stride? */
236       ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
237 
238       /* TODO: stencil AFBC? */
239 #else
240 #if PAN_ARCH >= 6
241       ext->zs_afbc_row_stride =
242          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
243 #else
244       ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
245       ext->zs_afbc_body_size = 0x1000;
246       ext->zs_afbc_chunk_size = 9;
247       ext->zs_afbc_sparse = true;
248 #endif
249 
250       ext->zs_afbc_header = surf.afbc.header;
251       ext->zs_afbc_body = surf.afbc.body;
252 #endif
253    } else {
254       assert(image->layout.modifier ==
255                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
256              image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
257 
258       /* TODO: Z32F(S8) support, which is always linear */
259 
260       ext->zs_writeback_base = surf.data;
261       ext->zs_writeback_row_stride = image->layout.slices[level].row_stride;
262       ext->zs_writeback_surface_stride =
263          (pan_image_view_get_nr_samples(zs) > 1)
264             ? image->layout.slices[level].surface_stride
265             : 0;
266    }
267 
268    ext->zs_block_format = mod_to_block_fmt(image->layout.modifier);
269    ext->zs_write_format = translate_zs_format(zs->format);
270    if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
271       ext->s_writeback_base = ext->zs_writeback_base;
272 }
273 
274 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)275 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
276                 struct MALI_ZS_CRC_EXTENSION *ext)
277 {
278    if (rt_crc < 0)
279       return;
280 
281    assert(rt_crc < fb->rt_count);
282 
283    const struct pan_image_view *rt = fb->rts[rt_crc].view;
284    const struct pan_image *image = pan_image_view_get_rt_image(rt);
285    const struct pan_image_slice_layout *slice =
286       &image->layout.slices[rt->first_level];
287 
288    ext->crc_base =
289       image->data.base + image->data.offset + slice->crc.offset;
290    ext->crc_row_stride = slice->crc.stride;
291 
292 #if PAN_ARCH >= 7
293    ext->crc_render_target = rt_crc;
294 
295    if (fb->rts[rt_crc].clear) {
296       uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
297       ext->crc_clear_color = clear_val | 0xc000000000000000 |
298                              (((uint64_t)clear_val & 0xffff) << 32);
299    }
300 #endif
301 }
302 
303 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,unsigned layer_idx,int rt_crc,void * zs_crc_ext)304 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
305                     int rt_crc, void *zs_crc_ext)
306 {
307    pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
308       pan_prepare_crc(fb, rt_crc, &cfg);
309       cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
310       pan_prepare_zs(fb, layer_idx, &cfg);
311       pan_prepare_s(fb, layer_idx, &cfg);
312    }
313 }
314 
315 /* Measure format as it appears in the tile buffer */
316 
317 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)318 pan_bytes_per_pixel_tib(enum pipe_format format)
319 {
320    const struct pan_blendable_format *bf =
321      GENX(panfrost_blendable_format_from_pipe_format)(format);
322 
323    if (bf->internal) {
324       /* Blendable formats are always 32-bits in the tile buffer,
325        * extra bits are used as padding or to dither */
326       return 4;
327    } else {
328       /* Non-blendable formats are raw, rounded up to the nearest
329        * power-of-two size */
330       unsigned bytes = util_format_get_blocksize(format);
331       return util_next_power_of_two(bytes);
332    }
333 }
334 
335 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)336 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
337 {
338    unsigned sum = 0;
339 
340    for (int cb = 0; cb < fb->rt_count; ++cb) {
341       const struct pan_image_view *rt = fb->rts[cb].view;
342 
343       if (!rt)
344          continue;
345 
346       sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
347    }
348 
349    return sum;
350 }
351 
352 /*
353  * Select the largest tile size that fits within the tilebuffer budget.
354  * Formally, maximize (pixels per tile) such that it is a power of two and
355  *
356  *      (bytes per pixel) (pixels per tile) <= (max bytes per tile)
357  *
358  * A bit of algebra gives the following formula.
359  */
360 static unsigned
pan_select_max_tile_size(unsigned tile_buffer_bytes,unsigned bytes_per_pixel)361 pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
362 {
363    assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
364    assert(tile_buffer_bytes >= 1024);
365 
366    return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
367 }
368 
369 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)370 pan_mfbd_raw_format(unsigned bits)
371 {
372    /* clang-format off */
373    switch (bits) {
374    case    8: return MALI_COLOR_FORMAT_RAW8;
375    case   16: return MALI_COLOR_FORMAT_RAW16;
376    case   24: return MALI_COLOR_FORMAT_RAW24;
377    case   32: return MALI_COLOR_FORMAT_RAW32;
378    case   48: return MALI_COLOR_FORMAT_RAW48;
379    case   64: return MALI_COLOR_FORMAT_RAW64;
380    case   96: return MALI_COLOR_FORMAT_RAW96;
381    case  128: return MALI_COLOR_FORMAT_RAW128;
382    case  192: return MALI_COLOR_FORMAT_RAW192;
383    case  256: return MALI_COLOR_FORMAT_RAW256;
384    case  384: return MALI_COLOR_FORMAT_RAW384;
385    case  512: return MALI_COLOR_FORMAT_RAW512;
386    case  768: return MALI_COLOR_FORMAT_RAW768;
387    case 1024: return MALI_COLOR_FORMAT_RAW1024;
388    case 1536: return MALI_COLOR_FORMAT_RAW1536;
389    case 2048: return MALI_COLOR_FORMAT_RAW2048;
390    default: unreachable("invalid raw bpp");
391    }
392    /* clang-format on */
393 }
394 
395 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)396 pan_rt_init_format(const struct pan_image_view *rt,
397                    struct MALI_RENDER_TARGET *cfg)
398 {
399    /* Explode details on the format */
400 
401    const struct util_format_description *desc =
402       util_format_description(rt->format);
403 
404    /* The swizzle for rendering is inverted from texturing */
405 
406    unsigned char swizzle[4] = {
407       PIPE_SWIZZLE_X,
408       PIPE_SWIZZLE_Y,
409       PIPE_SWIZZLE_Z,
410       PIPE_SWIZZLE_W,
411    };
412 
413    /* Fill in accordingly, defaulting to 8-bit UNORM */
414 
415    if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
416       cfg->srgb = true;
417 
418    struct pan_blendable_format fmt =
419       *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
420    enum mali_color_format writeback_format;
421 
422    if (fmt.internal) {
423       cfg->internal_format = fmt.internal;
424       writeback_format = fmt.writeback;
425       panfrost_invert_swizzle(desc->swizzle, swizzle);
426    } else {
427       /* Construct RAW internal/writeback, where internal is
428        * specified logarithmically (round to next power-of-two).
429        * Offset specified from RAW8, where 8 = 2^3 */
430 
431       unsigned bits = desc->block.bits;
432       unsigned offset = util_logbase2_ceil(bits) - 3;
433       assert(offset <= 4);
434 
435       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
436       writeback_format = pan_mfbd_raw_format(bits);
437    }
438 
439 #if PAN_ARCH >= 10
440    const struct pan_image *image = pan_image_view_get_rt_image(rt);
441 
442    if (drm_is_afrc(image->layout.modifier))
443       cfg->afrc.writeback_format = writeback_format;
444    else
445       cfg->writeback_format = writeback_format;
446 #else
447    cfg->writeback_format = writeback_format;
448 #endif
449 
450    cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
451 }
452 
453 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned rt_idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)454 pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx,
455                unsigned rt_idx, unsigned cbuf_offset,
456                struct MALI_RENDER_TARGET *cfg)
457 {
458    cfg->clean_pixel_write_enable = fb->rts[rt_idx].clear;
459    cfg->internal_buffer_offset = cbuf_offset;
460    if (fb->rts[rt_idx].clear) {
461       cfg->clear.color_0 = fb->rts[rt_idx].clear_value[0];
462       cfg->clear.color_1 = fb->rts[rt_idx].clear_value[1];
463       cfg->clear.color_2 = fb->rts[rt_idx].clear_value[2];
464       cfg->clear.color_3 = fb->rts[rt_idx].clear_value[3];
465    }
466 
467    const struct pan_image_view *rt = fb->rts[rt_idx].view;
468    if (!rt || fb->rts[rt_idx].discard) {
469       cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
470       cfg->internal_buffer_offset = cbuf_offset;
471 #if PAN_ARCH >= 7
472       cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
473       cfg->dithering_enable = true;
474 #endif
475       return;
476    }
477 
478    const struct pan_image *image = pan_image_view_get_rt_image(rt);
479 
480    if (!drm_is_afrc(image->layout.modifier))
481       cfg->write_enable = true;
482 
483    cfg->dithering_enable = true;
484 
485    unsigned level = rt->first_level;
486    ASSERTED unsigned layer_count = rt->dim == MALI_TEXTURE_DIMENSION_3D
487                                       ? rt->planes[0]->layout.depth
488                                       : rt->last_layer - rt->first_layer + 1;
489 
490    assert(rt->last_level == rt->first_level);
491    assert(layer_idx < layer_count);
492 
493    int row_stride = image->layout.slices[level].row_stride;
494 
495    /* Only set layer_stride for layered MSAA rendering  */
496 
497    unsigned layer_stride = (pan_image_view_get_nr_samples(rt) > 1)
498                               ? image->layout.slices[level].surface_stride
499                               : 0;
500 
501    cfg->writeback_msaa = mali_sampling_mode(rt);
502 
503    pan_rt_init_format(rt, cfg);
504 
505    cfg->writeback_block_format = mod_to_block_fmt(image->layout.modifier);
506 
507    struct pan_surface surf;
508    pan_iview_get_surface(rt, 0, layer_idx, 0, &surf);
509 
510    if (drm_is_afbc(image->layout.modifier)) {
511 #if PAN_ARCH >= 9
512       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
513          cfg->afbc.yuv_transform = true;
514 
515       cfg->afbc.wide_block = panfrost_afbc_is_wide(image->layout.modifier);
516       cfg->afbc.header = surf.afbc.header;
517       cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
518       assert(surf.afbc.body >= surf.afbc.header);
519 
520       cfg->afbc.compression_mode = GENX(pan_afbc_compression_mode)(rt->format);
521       cfg->afbc.row_stride = row_stride;
522 #else
523       const struct pan_image_slice_layout *slice = &image->layout.slices[level];
524 
525 #if PAN_ARCH >= 6
526       cfg->afbc.row_stride =
527          pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
528       cfg->afbc.afbc_wide_block_enable =
529          panfrost_afbc_is_wide(image->layout.modifier);
530 #else
531       cfg->afbc.chunk_size = 9;
532       cfg->afbc.sparse = true;
533       cfg->afbc.body_size = slice->afbc.body_size;
534 #endif
535 
536       cfg->afbc.header = surf.afbc.header;
537       cfg->afbc.body = surf.afbc.body;
538 
539       if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
540          cfg->afbc.yuv_transform_enable = true;
541 #endif
542 #if PAN_ARCH >= 10
543    } else if (drm_is_afrc(image->layout.modifier)) {
544       struct pan_afrc_format_info finfo =
545          panfrost_afrc_get_format_info(image->layout.format);
546 
547       cfg->writeback_mode = MALI_WRITEBACK_MODE_AFRC_RGB;
548       cfg->afrc.block_size =
549          GENX(pan_afrc_block_size)(image->layout.modifier, 0);
550       cfg->afrc.format =
551          GENX(pan_afrc_format)(finfo, image->layout.modifier, 0);
552 
553       cfg->rgb.base = surf.data;
554       cfg->rgb.row_stride = row_stride;
555       cfg->rgb.surface_stride = layer_stride;
556 #endif
557    } else {
558       assert(image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
559              image->layout.modifier ==
560                 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
561       cfg->rgb.base = surf.data;
562       cfg->rgb.row_stride = row_stride;
563       cfg->rgb.surface_stride = layer_stride;
564    }
565 }
566 #endif
567 
568 void
GENX(pan_emit_tls)569 GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
570 {
571    pan_pack(out, LOCAL_STORAGE, cfg) {
572       if (info->tls.size) {
573          unsigned shift = panfrost_get_stack_shift(info->tls.size);
574 
575          cfg.tls_size = shift;
576 #if PAN_ARCH >= 9
577          /* For now, always use packed TLS addressing. This is
578           * better for the cache and requires no fix up code in
579           * the shader. We may need to revisit this someday for
580           * OpenCL generic pointer support.
581           */
582          cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
583 
584          assert((info->tls.ptr & 4095) == 0);
585          cfg.tls_base_pointer = info->tls.ptr >> 8;
586 #else
587          cfg.tls_base_pointer = info->tls.ptr;
588 #endif
589       }
590 
591       if (info->wls.size) {
592          assert(!(info->wls.ptr & 4095));
593          assert((info->wls.ptr & 0xffffffff00000000ULL) ==
594                 ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
595          cfg.wls_base_pointer = info->wls.ptr;
596          unsigned wls_size = pan_wls_adjust_size(info->wls.size);
597          cfg.wls_instances = info->wls.instances;
598          cfg.wls_size_scale = util_logbase2(wls_size) + 1;
599       } else {
600          cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
601       }
602    }
603 }
604 
605 #if PAN_ARCH <= 5
606 static void
pan_emit_midgard_tiler(const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)607 pan_emit_midgard_tiler(const struct pan_fb_info *fb,
608                        const struct pan_tiler_context *tiler_ctx, void *out)
609 {
610    bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
611 
612    assert(tiler_ctx->midgard.polygon_list);
613 
614    pan_pack(out, TILER_CONTEXT, cfg) {
615       unsigned header_size;
616 
617       if (tiler_ctx->midgard.disable) {
618          cfg.hierarchy_mask =
619             hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
620          header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
621          cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
622          cfg.heap_start = tiler_ctx->midgard.polygon_list;
623          cfg.heap_end = tiler_ctx->midgard.polygon_list;
624       } else {
625          cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(
626             fb->width, fb->height, tiler_ctx->midgard.vertex_count, hierarchy);
627          header_size = panfrost_tiler_header_size(
628             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
629          cfg.polygon_list_size = panfrost_tiler_full_size(
630             fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
631          cfg.heap_start = tiler_ctx->midgard.heap.start;
632          cfg.heap_end = cfg.heap_start + tiler_ctx->midgard.heap.size;
633       }
634 
635       cfg.polygon_list = tiler_ctx->midgard.polygon_list;
636       cfg.polygon_list_body = cfg.polygon_list + header_size;
637    }
638 }
639 #endif
640 
641 #if PAN_ARCH >= 5
642 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned idx,unsigned cbuf_offset,void * out)643 pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx,
644             unsigned idx, unsigned cbuf_offset, void *out)
645 {
646    pan_pack(out, RENDER_TARGET, cfg) {
647       pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg);
648    }
649 }
650 
651 #if PAN_ARCH >= 6
652 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
653  *
654  *      Forcing clean_tile_writes breaks INTERSECT readbacks
655  *
656  * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
657  * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
658  * ignore, this cannot affect correctness, only performance */
659 
660 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)661 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
662                           bool force_clean_tile)
663 {
664    if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
665       return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
666    else
667       return mode;
668 }
669 
670 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
671  * the effective tile size differs from the superblock size of any enabled AFBC
672  * render target. Check this condition. */
673 
674 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)675 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
676 {
677    const struct pan_image *image = pan_image_view_get_rt_image(rt);
678    if (!drm_is_afbc(image->layout.modifier))
679       return false;
680 
681    unsigned superblock = panfrost_afbc_superblock_width(image->layout.modifier);
682 
683    assert(superblock >= 16);
684    assert(tile_size <= 16 * 16);
685 
686    /* Tile size and superblock differ unless they are both 16x16 */
687    return !(superblock == 16 && tile_size == 16 * 16);
688 }
689 
690 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)691 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
692 {
693    /* Maximum tile size */
694    assert(tile_size <= 16 * 16);
695 
696    for (unsigned i = 0; i < fb->rt_count; ++i) {
697       if (fb->rts[i].view && !fb->rts[i].discard &&
698           pan_force_clean_write_rt(fb->rts[i].view, tile_size))
699          return true;
700    }
701 
702    if (fb->zs.view.zs && !fb->zs.discard.z &&
703        pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
704       return true;
705 
706    if (fb->zs.view.s && !fb->zs.discard.s &&
707        pan_force_clean_write_rt(fb->zs.view.s, tile_size))
708       return true;
709 
710    return false;
711 }
712 
713 #endif
714 
715 unsigned
GENX(pan_emit_fbd)716 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
717                    const struct pan_tls_info *tls,
718                    const struct pan_tiler_context *tiler_ctx, void *out)
719 {
720    void *fbd = out;
721    void *rtd = out + pan_size(FRAMEBUFFER);
722 
723 #if PAN_ARCH <= 5
724    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
725 #endif
726 
727    unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
728    unsigned tile_size =
729       pan_select_max_tile_size(fb->tile_buf_budget, bytes_per_pixel);
730 
731    /* Clamp tile size to hardware limits */
732    tile_size = MIN2(tile_size, 16 * 16);
733    assert(tile_size >= 4 * 4);
734 
735    /* Colour buffer allocations must be 1K aligned. */
736    unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
737    assert(cbuf_allocation <= fb->tile_buf_budget && "tile too big");
738 
739    int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
740    bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
741 
742    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
743 #if PAN_ARCH >= 6
744       bool force_clean_write = pan_force_clean_write(fb, tile_size);
745 
746       cfg.sample_locations = fb->sample_positions;
747       cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
748                                                   force_clean_write);
749       cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
750                                                   force_clean_write);
751       cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
752                                                  force_clean_write);
753       cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
754       cfg.tiler =
755          PAN_ARCH >= 9 ? tiler_ctx->valhall.desc : tiler_ctx->bifrost.desc;
756 #endif
757       cfg.width = fb->width;
758       cfg.height = fb->height;
759       cfg.bound_max_x = fb->width - 1;
760       cfg.bound_max_y = fb->height - 1;
761 
762       cfg.effective_tile_size = tile_size;
763       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
764       cfg.render_target_count = MAX2(fb->rt_count, 1);
765 
766       /* Default to 24 bit depth if there's no surface. */
767       cfg.z_internal_format =
768          fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
769                         : MALI_Z_INTERNAL_FORMAT_D24;
770 
771       cfg.z_clear = fb->zs.clear_value.depth;
772       cfg.s_clear = fb->zs.clear_value.stencil;
773       cfg.color_buffer_allocation = cbuf_allocation;
774 
775       /* The force_samples setting dictates the sample-count that is used
776        * for rasterization, and works like D3D11's ForcedSampleCount feature:
777        *
778        * - If force_samples == 0: Let nr_samples dictate sample count
779        * - If force_samples == 1: force single-sampled rasterization
780        * - If force_samples >= 1: force multi-sampled rasterization
781        *
782        * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
783        * fragment shader, even when performing single-sampled rendering.
784        */
785       if (!fb->force_samples) {
786          cfg.sample_count = fb->nr_samples;
787          cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
788       } else if (fb->force_samples == 1) {
789          cfg.sample_count = fb->nr_samples;
790          cfg.sample_pattern = pan_sample_pattern(1);
791       } else {
792          cfg.sample_count = 1;
793          cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
794       }
795 
796       cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
797       cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
798       cfg.has_zs_crc_extension = has_zs_crc_ext;
799 
800       if (crc_rt >= 0) {
801          bool *valid = fb->rts[crc_rt].crc_valid;
802          bool full = !fb->extent.minx && !fb->extent.miny &&
803                      fb->extent.maxx == (fb->width - 1) &&
804                      fb->extent.maxy == (fb->height - 1);
805 
806          cfg.crc_read_enable = *valid;
807 
808          /* If the data is currently invalid, still write CRC
809           * data if we are doing a full write, so that it is
810           * valid for next time. */
811          cfg.crc_write_enable = *valid || full;
812 
813          *valid |= full;
814       }
815 
816 #if PAN_ARCH >= 9
817       cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
818       cfg.first_provoking_vertex = fb->first_provoking_vertex;
819 
820       /* internal_layer_index is used to select the right primitive list in the
821        * tiler context, and frame_arg is the value that's passed to the fragment
822        * shader through r62-r63, which we use to pass gl_Layer. Since the
823        * layer_idx only takes 8-bits, we might use the extra 56-bits we have
824        * in frame_argument to pass other information to the fragment shader at
825        * some point. */
826       cfg.internal_layer_index = layer_idx;
827       cfg.frame_argument = layer_idx;
828 #endif
829    }
830 
831 #if PAN_ARCH >= 6
832    pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
833       ;
834 #else
835    pan_emit_midgard_tiler(fb, tiler_ctx,
836                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
837 
838    /* All weights set to 0, nothing to do here */
839    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
840       ;
841 #endif
842 
843    if (has_zs_crc_ext) {
844       pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, out + pan_size(FRAMEBUFFER));
845       rtd += pan_size(ZS_CRC_EXTENSION);
846    }
847 
848    unsigned rt_count = MAX2(fb->rt_count, 1);
849    unsigned cbuf_offset = 0;
850    for (unsigned i = 0; i < rt_count; i++) {
851       pan_emit_rt(fb, layer_idx, i, cbuf_offset, rtd);
852       rtd += pan_size(RENDER_TARGET);
853       if (!fb->rts[i].view)
854          continue;
855 
856       cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
857                      tile_size * pan_image_view_get_nr_samples(fb->rts[i].view);
858 
859       if (i != crc_rt)
860          *(fb->rts[i].crc_valid) = false;
861    }
862 
863    struct mali_framebuffer_pointer_packed tag;
864    pan_pack(tag.opaque, FRAMEBUFFER_POINTER, cfg) {
865       cfg.zs_crc_extension_present = has_zs_crc_ext;
866       cfg.render_target_count = MAX2(fb->rt_count, 1);
867    }
868    return tag.opaque[0];
869 }
870 #else /* PAN_ARCH == 4 */
871 static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)872 pan_sfbd_raw_format(unsigned bits)
873 {
874    /* clang-format off */
875    switch (bits) {
876    case   16: return MALI_COLOR_FORMAT_1_16B_CHANNEL;
877    case   32: return MALI_COLOR_FORMAT_1_32B_CHANNEL;
878    case   48: return MALI_COLOR_FORMAT_3_16B_CHANNELS;
879    case   64: return MALI_COLOR_FORMAT_2_32B_CHANNELS;
880    case   96: return MALI_COLOR_FORMAT_3_32B_CHANNELS;
881    case  128: return MALI_COLOR_FORMAT_4_32B_CHANNELS;
882    default: unreachable("invalid raw bpp");
883    }
884    /* clang-format on */
885 }
886 unsigned
GENX(pan_emit_fbd)887 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
888                    const struct pan_tls_info *tls,
889                    const struct pan_tiler_context *tiler_ctx, void *fbd)
890 {
891    assert(fb->rt_count <= 1);
892 
893    GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
894    pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
895       cfg.bound_max_x = fb->width - 1;
896       cfg.bound_max_y = fb->height - 1;
897       cfg.dithering_enable = true;
898       cfg.clean_pixel_write_enable = true;
899       cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
900       if (fb->rts[0].clear) {
901          cfg.clear_color_0 = fb->rts[0].clear_value[0];
902          cfg.clear_color_1 = fb->rts[0].clear_value[1];
903          cfg.clear_color_2 = fb->rts[0].clear_value[2];
904          cfg.clear_color_3 = fb->rts[0].clear_value[3];
905       }
906 
907       if (fb->zs.clear.z)
908          cfg.z_clear = fb->zs.clear_value.depth;
909 
910       if (fb->zs.clear.s)
911          cfg.s_clear = fb->zs.clear_value.stencil;
912 
913       if (fb->rt_count && fb->rts[0].view) {
914          const struct pan_image_view *rt = fb->rts[0].view;
915          const struct pan_image *image = pan_image_view_get_rt_image(rt);
916 
917          const struct util_format_description *desc =
918             util_format_description(rt->format);
919 
920          /* The swizzle for rendering is inverted from texturing */
921          unsigned char swizzle[4];
922          panfrost_invert_swizzle(desc->swizzle, swizzle);
923          cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
924 
925          struct pan_blendable_format fmt =
926             *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
927 
928          if (fmt.internal) {
929             cfg.internal_format = fmt.internal;
930             cfg.color_writeback_format = fmt.writeback;
931          } else {
932             /* Construct RAW internal/writeback */
933             unsigned bits = desc->block.bits;
934 
935             cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE;
936             cfg.color_writeback_format = pan_sfbd_raw_format(bits);
937          }
938 
939          unsigned level = rt->first_level;
940          struct pan_surface surf;
941 
942          pan_iview_get_surface(rt, 0, 0, 0, &surf);
943 
944          cfg.color_write_enable = !fb->rts[0].discard;
945          cfg.color_writeback.base = surf.data;
946          cfg.color_writeback.row_stride =
947             image->layout.slices[level].row_stride;
948 
949          cfg.color_block_format = mod_to_block_fmt(image->layout.modifier);
950          assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
951                 cfg.color_block_format ==
952                    MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
953 
954          if (pan_image_view_has_crc(rt)) {
955             const struct pan_image_slice_layout *slice =
956                &image->layout.slices[level];
957 
958             cfg.crc_buffer.row_stride = slice->crc.stride;
959             cfg.crc_buffer.base =
960                image->data.base + image->data.offset + slice->crc.offset;
961          }
962       }
963 
964       if (fb->zs.view.zs) {
965          const struct pan_image_view *zs = fb->zs.view.zs;
966          const struct pan_image *image = pan_image_view_get_zs_image(zs);
967          unsigned level = zs->first_level;
968          struct pan_surface surf;
969 
970          pan_iview_get_surface(zs, 0, 0, 0, &surf);
971 
972          cfg.zs_write_enable = !fb->zs.discard.z;
973          cfg.zs_writeback.base = surf.data;
974          cfg.zs_writeback.row_stride = image->layout.slices[level].row_stride;
975          cfg.zs_block_format = mod_to_block_fmt(image->layout.modifier);
976          assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
977                 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
978 
979          cfg.zs_format = translate_zs_format(zs->format);
980       }
981 
982       cfg.sample_count = fb->nr_samples;
983 
984       if (fb->rt_count)
985          cfg.msaa = mali_sampling_mode(fb->rts[0].view);
986    }
987 
988    pan_emit_midgard_tiler(fb, tiler_ctx,
989                           pan_section_ptr(fbd, FRAMEBUFFER, TILER));
990 
991    /* All weights set to 0, nothing to do here */
992    pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
993       ;
994 
995    pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
996       ;
997    pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
998       ;
999    return 0;
1000 }
1001 #endif
1002 
1003 #if PAN_ARCH <= 9
1004 void
GENX(pan_emit_fragment_job_payload)1005 GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, mali_ptr fbd,
1006                                     void *out)
1007 {
1008    pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
1009       payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
1010       payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
1011       payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
1012       payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
1013       payload.framebuffer = fbd;
1014 
1015 #if PAN_ARCH >= 5
1016       if (fb->tile_map.base) {
1017          payload.has_tile_enable_map = true;
1018          payload.tile_enable_map = fb->tile_map.base;
1019          payload.tile_enable_map_row_stride = fb->tile_map.stride;
1020       }
1021 #endif
1022    }
1023 }
1024 #endif
1025