1 /*
2 * Copyright (C) 2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <[email protected]>
25 * Boris Brezillon <[email protected]>
26 */
27
28 #include "util/macros.h"
29
30 #include "genxml/gen_macros.h"
31
32 #include "pan_desc.h"
33 #include "pan_encoder.h"
34 #include "pan_texture.h"
35
36 static unsigned
mod_to_block_fmt(uint64_t mod)37 mod_to_block_fmt(uint64_t mod)
38 {
39 switch (mod) {
40 case DRM_FORMAT_MOD_LINEAR:
41 return MALI_BLOCK_FORMAT_LINEAR;
42 case DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED:
43 return MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
44 default:
45 #if PAN_ARCH >= 5
46 if (drm_is_afbc(mod) && !(mod & AFBC_FORMAT_MOD_TILED))
47 return MALI_BLOCK_FORMAT_AFBC;
48 #endif
49 #if PAN_ARCH >= 7
50 if (drm_is_afbc(mod) && (mod & AFBC_FORMAT_MOD_TILED))
51 return MALI_BLOCK_FORMAT_AFBC_TILED;
52 #endif
53 #if PAN_ARCH >= 10
54 if (drm_is_afrc(mod))
55 return 0; /* Reserved field for AFRC state */
56 #endif
57
58 unreachable("Unsupported modifer");
59 }
60 }
61
62 static enum mali_msaa
mali_sampling_mode(const struct pan_image_view * view)63 mali_sampling_mode(const struct pan_image_view *view)
64 {
65 unsigned nr_samples = pan_image_view_get_nr_samples(view);
66
67 if (nr_samples > 1) {
68 assert(view->nr_samples == nr_samples);
69 assert(view->planes[0]->layout.slices[0].surface_stride != 0);
70 return MALI_MSAA_LAYERED;
71 }
72
73 if (view->nr_samples > nr_samples) {
74 assert(nr_samples == 1);
75 return MALI_MSAA_AVERAGE;
76 }
77
78 assert(view->nr_samples == nr_samples);
79 assert(view->nr_samples == 1);
80
81 return MALI_MSAA_SINGLE;
82 }
83
84 int
GENX(pan_select_crc_rt)85 GENX(pan_select_crc_rt)(const struct pan_fb_info *fb, unsigned tile_size)
86 {
87 /* Disable CRC when the tile size is not 16x16. In the hardware, CRC
88 * tiles are the same size as the tiles of the framebuffer. However,
89 * our code only handles 16x16 tiles. Therefore under the current
90 * implementation, we must disable CRC when 16x16 tiles are not used.
91 *
92 * This may hurt performance. However, smaller tile sizes are rare, and
93 * CRCs are more expensive at smaller tile sizes, reducing the benefit.
94 * Restricting CRC to 16x16 should work in practice.
95 */
96 if (tile_size != 16 * 16) {
97 assert(tile_size < 16 * 16);
98 return -1;
99 }
100
101 #if PAN_ARCH <= 6
102 if (fb->rt_count == 1 && fb->rts[0].view && !fb->rts[0].discard &&
103 pan_image_view_has_crc(fb->rts[0].view))
104 return 0;
105
106 return -1;
107 #else
108 bool best_rt_valid = false;
109 int best_rt = -1;
110
111 for (unsigned i = 0; i < fb->rt_count; i++) {
112 if (!fb->rts[i].view || fb->rts[0].discard ||
113 !pan_image_view_has_crc(fb->rts[i].view))
114 continue;
115
116 bool valid = *(fb->rts[i].crc_valid);
117 bool full = !fb->extent.minx && !fb->extent.miny &&
118 fb->extent.maxx == (fb->width - 1) &&
119 fb->extent.maxy == (fb->height - 1);
120 if (!full && !valid)
121 continue;
122
123 if (best_rt < 0 || (valid && !best_rt_valid)) {
124 best_rt = i;
125 best_rt_valid = valid;
126 }
127
128 if (valid)
129 break;
130 }
131
132 return best_rt;
133 #endif
134 }
135
136 static enum mali_zs_format
translate_zs_format(enum pipe_format in)137 translate_zs_format(enum pipe_format in)
138 {
139 switch (in) {
140 case PIPE_FORMAT_Z16_UNORM:
141 return MALI_ZS_FORMAT_D16;
142 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
143 return MALI_ZS_FORMAT_D24S8;
144 case PIPE_FORMAT_Z24X8_UNORM:
145 return MALI_ZS_FORMAT_D24X8;
146 case PIPE_FORMAT_Z32_FLOAT:
147 return MALI_ZS_FORMAT_D32;
148 #if PAN_ARCH <= 7
149 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
150 return MALI_ZS_FORMAT_D32_S8X24;
151 #endif
152 default:
153 unreachable("Unsupported depth/stencil format.");
154 }
155 }
156
157 #if PAN_ARCH >= 5
158 static enum mali_s_format
translate_s_format(enum pipe_format in)159 translate_s_format(enum pipe_format in)
160 {
161 switch (in) {
162 case PIPE_FORMAT_S8_UINT:
163 return MALI_S_FORMAT_S8;
164 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
165 case PIPE_FORMAT_X24S8_UINT:
166 return MALI_S_FORMAT_X24S8;
167
168 #if PAN_ARCH <= 7
169 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
170 case PIPE_FORMAT_S8X24_UINT:
171 return MALI_S_FORMAT_S8X24;
172 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
173 case PIPE_FORMAT_X32_S8X24_UINT:
174 return MALI_S_FORMAT_X32_S8X24;
175 #endif
176
177 default:
178 unreachable("Unsupported stencil format.");
179 }
180 }
181
182 static void
pan_prepare_s(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)183 pan_prepare_s(const struct pan_fb_info *fb, unsigned layer_idx,
184 struct MALI_ZS_CRC_EXTENSION *ext)
185 {
186 const struct pan_image_view *s = fb->zs.view.s;
187
188 if (!s)
189 return;
190
191 const struct pan_image *image = pan_image_view_get_zs_image(s);
192 unsigned level = s->first_level;
193
194 ext->s_msaa = mali_sampling_mode(s);
195
196 struct pan_surface surf;
197 pan_iview_get_surface(s, 0, layer_idx, 0, &surf);
198
199 assert(image->layout.modifier ==
200 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
201 image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
202 ext->s_writeback_base = surf.data;
203 ext->s_writeback_row_stride = image->layout.slices[level].row_stride;
204 ext->s_writeback_surface_stride =
205 (pan_image_view_get_nr_samples(s) > 1)
206 ? image->layout.slices[level].surface_stride
207 : 0;
208 ext->s_block_format = mod_to_block_fmt(image->layout.modifier);
209 ext->s_write_format = translate_s_format(s->format);
210 }
211
212 static void
pan_prepare_zs(const struct pan_fb_info * fb,unsigned layer_idx,struct MALI_ZS_CRC_EXTENSION * ext)213 pan_prepare_zs(const struct pan_fb_info *fb, unsigned layer_idx,
214 struct MALI_ZS_CRC_EXTENSION *ext)
215 {
216 const struct pan_image_view *zs = fb->zs.view.zs;
217
218 if (!zs)
219 return;
220
221 const struct pan_image *image = pan_image_view_get_zs_image(zs);
222 unsigned level = zs->first_level;
223
224 ext->zs_msaa = mali_sampling_mode(zs);
225
226 struct pan_surface surf;
227 pan_iview_get_surface(zs, 0, layer_idx, 0, &surf);
228 UNUSED const struct pan_image_slice_layout *slice =
229 &image->layout.slices[level];
230
231 if (drm_is_afbc(image->layout.modifier)) {
232 #if PAN_ARCH >= 9
233 ext->zs_writeback_base = surf.afbc.header;
234 ext->zs_writeback_row_stride = slice->row_stride;
235 /* TODO: surface stride? */
236 ext->zs_afbc_body_offset = surf.afbc.body - surf.afbc.header;
237
238 /* TODO: stencil AFBC? */
239 #else
240 #if PAN_ARCH >= 6
241 ext->zs_afbc_row_stride =
242 pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
243 #else
244 ext->zs_block_format = MALI_BLOCK_FORMAT_AFBC;
245 ext->zs_afbc_body_size = 0x1000;
246 ext->zs_afbc_chunk_size = 9;
247 ext->zs_afbc_sparse = true;
248 #endif
249
250 ext->zs_afbc_header = surf.afbc.header;
251 ext->zs_afbc_body = surf.afbc.body;
252 #endif
253 } else {
254 assert(image->layout.modifier ==
255 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED ||
256 image->layout.modifier == DRM_FORMAT_MOD_LINEAR);
257
258 /* TODO: Z32F(S8) support, which is always linear */
259
260 ext->zs_writeback_base = surf.data;
261 ext->zs_writeback_row_stride = image->layout.slices[level].row_stride;
262 ext->zs_writeback_surface_stride =
263 (pan_image_view_get_nr_samples(zs) > 1)
264 ? image->layout.slices[level].surface_stride
265 : 0;
266 }
267
268 ext->zs_block_format = mod_to_block_fmt(image->layout.modifier);
269 ext->zs_write_format = translate_zs_format(zs->format);
270 if (ext->zs_write_format == MALI_ZS_FORMAT_D24S8)
271 ext->s_writeback_base = ext->zs_writeback_base;
272 }
273
274 static void
pan_prepare_crc(const struct pan_fb_info * fb,int rt_crc,struct MALI_ZS_CRC_EXTENSION * ext)275 pan_prepare_crc(const struct pan_fb_info *fb, int rt_crc,
276 struct MALI_ZS_CRC_EXTENSION *ext)
277 {
278 if (rt_crc < 0)
279 return;
280
281 assert(rt_crc < fb->rt_count);
282
283 const struct pan_image_view *rt = fb->rts[rt_crc].view;
284 const struct pan_image *image = pan_image_view_get_rt_image(rt);
285 const struct pan_image_slice_layout *slice =
286 &image->layout.slices[rt->first_level];
287
288 ext->crc_base =
289 image->data.base + image->data.offset + slice->crc.offset;
290 ext->crc_row_stride = slice->crc.stride;
291
292 #if PAN_ARCH >= 7
293 ext->crc_render_target = rt_crc;
294
295 if (fb->rts[rt_crc].clear) {
296 uint32_t clear_val = fb->rts[rt_crc].clear_value[0];
297 ext->crc_clear_color = clear_val | 0xc000000000000000 |
298 (((uint64_t)clear_val & 0xffff) << 32);
299 }
300 #endif
301 }
302
303 static void
pan_emit_zs_crc_ext(const struct pan_fb_info * fb,unsigned layer_idx,int rt_crc,void * zs_crc_ext)304 pan_emit_zs_crc_ext(const struct pan_fb_info *fb, unsigned layer_idx,
305 int rt_crc, void *zs_crc_ext)
306 {
307 pan_pack(zs_crc_ext, ZS_CRC_EXTENSION, cfg) {
308 pan_prepare_crc(fb, rt_crc, &cfg);
309 cfg.zs_clean_pixel_write_enable = fb->zs.clear.z || fb->zs.clear.s;
310 pan_prepare_zs(fb, layer_idx, &cfg);
311 pan_prepare_s(fb, layer_idx, &cfg);
312 }
313 }
314
315 /* Measure format as it appears in the tile buffer */
316
317 static unsigned
pan_bytes_per_pixel_tib(enum pipe_format format)318 pan_bytes_per_pixel_tib(enum pipe_format format)
319 {
320 const struct pan_blendable_format *bf =
321 GENX(panfrost_blendable_format_from_pipe_format)(format);
322
323 if (bf->internal) {
324 /* Blendable formats are always 32-bits in the tile buffer,
325 * extra bits are used as padding or to dither */
326 return 4;
327 } else {
328 /* Non-blendable formats are raw, rounded up to the nearest
329 * power-of-two size */
330 unsigned bytes = util_format_get_blocksize(format);
331 return util_next_power_of_two(bytes);
332 }
333 }
334
335 static unsigned
pan_cbuf_bytes_per_pixel(const struct pan_fb_info * fb)336 pan_cbuf_bytes_per_pixel(const struct pan_fb_info *fb)
337 {
338 unsigned sum = 0;
339
340 for (int cb = 0; cb < fb->rt_count; ++cb) {
341 const struct pan_image_view *rt = fb->rts[cb].view;
342
343 if (!rt)
344 continue;
345
346 sum += pan_bytes_per_pixel_tib(rt->format) * rt->nr_samples;
347 }
348
349 return sum;
350 }
351
352 /*
353 * Select the largest tile size that fits within the tilebuffer budget.
354 * Formally, maximize (pixels per tile) such that it is a power of two and
355 *
356 * (bytes per pixel) (pixels per tile) <= (max bytes per tile)
357 *
358 * A bit of algebra gives the following formula.
359 */
360 static unsigned
pan_select_max_tile_size(unsigned tile_buffer_bytes,unsigned bytes_per_pixel)361 pan_select_max_tile_size(unsigned tile_buffer_bytes, unsigned bytes_per_pixel)
362 {
363 assert(util_is_power_of_two_nonzero(tile_buffer_bytes));
364 assert(tile_buffer_bytes >= 1024);
365
366 return tile_buffer_bytes >> util_logbase2_ceil(bytes_per_pixel);
367 }
368
369 static enum mali_color_format
pan_mfbd_raw_format(unsigned bits)370 pan_mfbd_raw_format(unsigned bits)
371 {
372 /* clang-format off */
373 switch (bits) {
374 case 8: return MALI_COLOR_FORMAT_RAW8;
375 case 16: return MALI_COLOR_FORMAT_RAW16;
376 case 24: return MALI_COLOR_FORMAT_RAW24;
377 case 32: return MALI_COLOR_FORMAT_RAW32;
378 case 48: return MALI_COLOR_FORMAT_RAW48;
379 case 64: return MALI_COLOR_FORMAT_RAW64;
380 case 96: return MALI_COLOR_FORMAT_RAW96;
381 case 128: return MALI_COLOR_FORMAT_RAW128;
382 case 192: return MALI_COLOR_FORMAT_RAW192;
383 case 256: return MALI_COLOR_FORMAT_RAW256;
384 case 384: return MALI_COLOR_FORMAT_RAW384;
385 case 512: return MALI_COLOR_FORMAT_RAW512;
386 case 768: return MALI_COLOR_FORMAT_RAW768;
387 case 1024: return MALI_COLOR_FORMAT_RAW1024;
388 case 1536: return MALI_COLOR_FORMAT_RAW1536;
389 case 2048: return MALI_COLOR_FORMAT_RAW2048;
390 default: unreachable("invalid raw bpp");
391 }
392 /* clang-format on */
393 }
394
395 static void
pan_rt_init_format(const struct pan_image_view * rt,struct MALI_RENDER_TARGET * cfg)396 pan_rt_init_format(const struct pan_image_view *rt,
397 struct MALI_RENDER_TARGET *cfg)
398 {
399 /* Explode details on the format */
400
401 const struct util_format_description *desc =
402 util_format_description(rt->format);
403
404 /* The swizzle for rendering is inverted from texturing */
405
406 unsigned char swizzle[4] = {
407 PIPE_SWIZZLE_X,
408 PIPE_SWIZZLE_Y,
409 PIPE_SWIZZLE_Z,
410 PIPE_SWIZZLE_W,
411 };
412
413 /* Fill in accordingly, defaulting to 8-bit UNORM */
414
415 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
416 cfg->srgb = true;
417
418 struct pan_blendable_format fmt =
419 *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
420 enum mali_color_format writeback_format;
421
422 if (fmt.internal) {
423 cfg->internal_format = fmt.internal;
424 writeback_format = fmt.writeback;
425 panfrost_invert_swizzle(desc->swizzle, swizzle);
426 } else {
427 /* Construct RAW internal/writeback, where internal is
428 * specified logarithmically (round to next power-of-two).
429 * Offset specified from RAW8, where 8 = 2^3 */
430
431 unsigned bits = desc->block.bits;
432 unsigned offset = util_logbase2_ceil(bits) - 3;
433 assert(offset <= 4);
434
435 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW8 + offset;
436 writeback_format = pan_mfbd_raw_format(bits);
437 }
438
439 #if PAN_ARCH >= 10
440 const struct pan_image *image = pan_image_view_get_rt_image(rt);
441
442 if (drm_is_afrc(image->layout.modifier))
443 cfg->afrc.writeback_format = writeback_format;
444 else
445 cfg->writeback_format = writeback_format;
446 #else
447 cfg->writeback_format = writeback_format;
448 #endif
449
450 cfg->swizzle = panfrost_translate_swizzle_4(swizzle);
451 }
452
453 static void
pan_prepare_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned rt_idx,unsigned cbuf_offset,struct MALI_RENDER_TARGET * cfg)454 pan_prepare_rt(const struct pan_fb_info *fb, unsigned layer_idx,
455 unsigned rt_idx, unsigned cbuf_offset,
456 struct MALI_RENDER_TARGET *cfg)
457 {
458 cfg->clean_pixel_write_enable = fb->rts[rt_idx].clear;
459 cfg->internal_buffer_offset = cbuf_offset;
460 if (fb->rts[rt_idx].clear) {
461 cfg->clear.color_0 = fb->rts[rt_idx].clear_value[0];
462 cfg->clear.color_1 = fb->rts[rt_idx].clear_value[1];
463 cfg->clear.color_2 = fb->rts[rt_idx].clear_value[2];
464 cfg->clear.color_3 = fb->rts[rt_idx].clear_value[3];
465 }
466
467 const struct pan_image_view *rt = fb->rts[rt_idx].view;
468 if (!rt || fb->rts[rt_idx].discard) {
469 cfg->internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_R8G8B8A8;
470 cfg->internal_buffer_offset = cbuf_offset;
471 #if PAN_ARCH >= 7
472 cfg->writeback_block_format = MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED;
473 cfg->dithering_enable = true;
474 #endif
475 return;
476 }
477
478 const struct pan_image *image = pan_image_view_get_rt_image(rt);
479
480 if (!drm_is_afrc(image->layout.modifier))
481 cfg->write_enable = true;
482
483 cfg->dithering_enable = true;
484
485 unsigned level = rt->first_level;
486 ASSERTED unsigned layer_count = rt->dim == MALI_TEXTURE_DIMENSION_3D
487 ? rt->planes[0]->layout.depth
488 : rt->last_layer - rt->first_layer + 1;
489
490 assert(rt->last_level == rt->first_level);
491 assert(layer_idx < layer_count);
492
493 int row_stride = image->layout.slices[level].row_stride;
494
495 /* Only set layer_stride for layered MSAA rendering */
496
497 unsigned layer_stride = (pan_image_view_get_nr_samples(rt) > 1)
498 ? image->layout.slices[level].surface_stride
499 : 0;
500
501 cfg->writeback_msaa = mali_sampling_mode(rt);
502
503 pan_rt_init_format(rt, cfg);
504
505 cfg->writeback_block_format = mod_to_block_fmt(image->layout.modifier);
506
507 struct pan_surface surf;
508 pan_iview_get_surface(rt, 0, layer_idx, 0, &surf);
509
510 if (drm_is_afbc(image->layout.modifier)) {
511 #if PAN_ARCH >= 9
512 if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
513 cfg->afbc.yuv_transform = true;
514
515 cfg->afbc.wide_block = panfrost_afbc_is_wide(image->layout.modifier);
516 cfg->afbc.header = surf.afbc.header;
517 cfg->afbc.body_offset = surf.afbc.body - surf.afbc.header;
518 assert(surf.afbc.body >= surf.afbc.header);
519
520 cfg->afbc.compression_mode = GENX(pan_afbc_compression_mode)(rt->format);
521 cfg->afbc.row_stride = row_stride;
522 #else
523 const struct pan_image_slice_layout *slice = &image->layout.slices[level];
524
525 #if PAN_ARCH >= 6
526 cfg->afbc.row_stride =
527 pan_afbc_stride_blocks(image->layout.modifier, slice->row_stride);
528 cfg->afbc.afbc_wide_block_enable =
529 panfrost_afbc_is_wide(image->layout.modifier);
530 #else
531 cfg->afbc.chunk_size = 9;
532 cfg->afbc.sparse = true;
533 cfg->afbc.body_size = slice->afbc.body_size;
534 #endif
535
536 cfg->afbc.header = surf.afbc.header;
537 cfg->afbc.body = surf.afbc.body;
538
539 if (image->layout.modifier & AFBC_FORMAT_MOD_YTR)
540 cfg->afbc.yuv_transform_enable = true;
541 #endif
542 #if PAN_ARCH >= 10
543 } else if (drm_is_afrc(image->layout.modifier)) {
544 struct pan_afrc_format_info finfo =
545 panfrost_afrc_get_format_info(image->layout.format);
546
547 cfg->writeback_mode = MALI_WRITEBACK_MODE_AFRC_RGB;
548 cfg->afrc.block_size =
549 GENX(pan_afrc_block_size)(image->layout.modifier, 0);
550 cfg->afrc.format =
551 GENX(pan_afrc_format)(finfo, image->layout.modifier, 0);
552
553 cfg->rgb.base = surf.data;
554 cfg->rgb.row_stride = row_stride;
555 cfg->rgb.surface_stride = layer_stride;
556 #endif
557 } else {
558 assert(image->layout.modifier == DRM_FORMAT_MOD_LINEAR ||
559 image->layout.modifier ==
560 DRM_FORMAT_MOD_ARM_16X16_BLOCK_U_INTERLEAVED);
561 cfg->rgb.base = surf.data;
562 cfg->rgb.row_stride = row_stride;
563 cfg->rgb.surface_stride = layer_stride;
564 }
565 }
566 #endif
567
568 void
GENX(pan_emit_tls)569 GENX(pan_emit_tls)(const struct pan_tls_info *info, void *out)
570 {
571 pan_pack(out, LOCAL_STORAGE, cfg) {
572 if (info->tls.size) {
573 unsigned shift = panfrost_get_stack_shift(info->tls.size);
574
575 cfg.tls_size = shift;
576 #if PAN_ARCH >= 9
577 /* For now, always use packed TLS addressing. This is
578 * better for the cache and requires no fix up code in
579 * the shader. We may need to revisit this someday for
580 * OpenCL generic pointer support.
581 */
582 cfg.tls_address_mode = MALI_ADDRESS_MODE_PACKED;
583
584 assert((info->tls.ptr & 4095) == 0);
585 cfg.tls_base_pointer = info->tls.ptr >> 8;
586 #else
587 cfg.tls_base_pointer = info->tls.ptr;
588 #endif
589 }
590
591 if (info->wls.size) {
592 assert(!(info->wls.ptr & 4095));
593 assert((info->wls.ptr & 0xffffffff00000000ULL) ==
594 ((info->wls.ptr + info->wls.size - 1) & 0xffffffff00000000ULL));
595 cfg.wls_base_pointer = info->wls.ptr;
596 unsigned wls_size = pan_wls_adjust_size(info->wls.size);
597 cfg.wls_instances = info->wls.instances;
598 cfg.wls_size_scale = util_logbase2(wls_size) + 1;
599 } else {
600 cfg.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
601 }
602 }
603 }
604
605 #if PAN_ARCH <= 5
606 static void
pan_emit_midgard_tiler(const struct pan_fb_info * fb,const struct pan_tiler_context * tiler_ctx,void * out)607 pan_emit_midgard_tiler(const struct pan_fb_info *fb,
608 const struct pan_tiler_context *tiler_ctx, void *out)
609 {
610 bool hierarchy = !tiler_ctx->midgard.no_hierarchical_tiling;
611
612 assert(tiler_ctx->midgard.polygon_list);
613
614 pan_pack(out, TILER_CONTEXT, cfg) {
615 unsigned header_size;
616
617 if (tiler_ctx->midgard.disable) {
618 cfg.hierarchy_mask =
619 hierarchy ? MALI_MIDGARD_TILER_DISABLED : MALI_MIDGARD_TILER_USER;
620 header_size = MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
621 cfg.polygon_list_size = header_size + (hierarchy ? 0 : 4);
622 cfg.heap_start = tiler_ctx->midgard.polygon_list;
623 cfg.heap_end = tiler_ctx->midgard.polygon_list;
624 } else {
625 cfg.hierarchy_mask = panfrost_choose_hierarchy_mask(
626 fb->width, fb->height, tiler_ctx->midgard.vertex_count, hierarchy);
627 header_size = panfrost_tiler_header_size(
628 fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
629 cfg.polygon_list_size = panfrost_tiler_full_size(
630 fb->width, fb->height, cfg.hierarchy_mask, hierarchy);
631 cfg.heap_start = tiler_ctx->midgard.heap.start;
632 cfg.heap_end = cfg.heap_start + tiler_ctx->midgard.heap.size;
633 }
634
635 cfg.polygon_list = tiler_ctx->midgard.polygon_list;
636 cfg.polygon_list_body = cfg.polygon_list + header_size;
637 }
638 }
639 #endif
640
641 #if PAN_ARCH >= 5
642 static void
pan_emit_rt(const struct pan_fb_info * fb,unsigned layer_idx,unsigned idx,unsigned cbuf_offset,void * out)643 pan_emit_rt(const struct pan_fb_info *fb, unsigned layer_idx,
644 unsigned idx, unsigned cbuf_offset, void *out)
645 {
646 pan_pack(out, RENDER_TARGET, cfg) {
647 pan_prepare_rt(fb, layer_idx, idx, cbuf_offset, &cfg);
648 }
649 }
650
651 #if PAN_ARCH >= 6
652 /* All Bifrost and Valhall GPUs are affected by issue TSIX-2033:
653 *
654 * Forcing clean_tile_writes breaks INTERSECT readbacks
655 *
656 * To workaround, use the frame shader mode ALWAYS instead of INTERSECT if
657 * clean tile writes is forced. Since INTERSECT is a hint that the hardware may
658 * ignore, this cannot affect correctness, only performance */
659
660 static enum mali_pre_post_frame_shader_mode
pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,bool force_clean_tile)661 pan_fix_frame_shader_mode(enum mali_pre_post_frame_shader_mode mode,
662 bool force_clean_tile)
663 {
664 if (force_clean_tile && mode == MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT)
665 return MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS;
666 else
667 return mode;
668 }
669
670 /* Regardless of clean_tile_write_enable, the hardware writes clean tiles if
671 * the effective tile size differs from the superblock size of any enabled AFBC
672 * render target. Check this condition. */
673
674 static bool
pan_force_clean_write_rt(const struct pan_image_view * rt,unsigned tile_size)675 pan_force_clean_write_rt(const struct pan_image_view *rt, unsigned tile_size)
676 {
677 const struct pan_image *image = pan_image_view_get_rt_image(rt);
678 if (!drm_is_afbc(image->layout.modifier))
679 return false;
680
681 unsigned superblock = panfrost_afbc_superblock_width(image->layout.modifier);
682
683 assert(superblock >= 16);
684 assert(tile_size <= 16 * 16);
685
686 /* Tile size and superblock differ unless they are both 16x16 */
687 return !(superblock == 16 && tile_size == 16 * 16);
688 }
689
690 static bool
pan_force_clean_write(const struct pan_fb_info * fb,unsigned tile_size)691 pan_force_clean_write(const struct pan_fb_info *fb, unsigned tile_size)
692 {
693 /* Maximum tile size */
694 assert(tile_size <= 16 * 16);
695
696 for (unsigned i = 0; i < fb->rt_count; ++i) {
697 if (fb->rts[i].view && !fb->rts[i].discard &&
698 pan_force_clean_write_rt(fb->rts[i].view, tile_size))
699 return true;
700 }
701
702 if (fb->zs.view.zs && !fb->zs.discard.z &&
703 pan_force_clean_write_rt(fb->zs.view.zs, tile_size))
704 return true;
705
706 if (fb->zs.view.s && !fb->zs.discard.s &&
707 pan_force_clean_write_rt(fb->zs.view.s, tile_size))
708 return true;
709
710 return false;
711 }
712
713 #endif
714
715 unsigned
GENX(pan_emit_fbd)716 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
717 const struct pan_tls_info *tls,
718 const struct pan_tiler_context *tiler_ctx, void *out)
719 {
720 void *fbd = out;
721 void *rtd = out + pan_size(FRAMEBUFFER);
722
723 #if PAN_ARCH <= 5
724 GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
725 #endif
726
727 unsigned bytes_per_pixel = pan_cbuf_bytes_per_pixel(fb);
728 unsigned tile_size =
729 pan_select_max_tile_size(fb->tile_buf_budget, bytes_per_pixel);
730
731 /* Clamp tile size to hardware limits */
732 tile_size = MIN2(tile_size, 16 * 16);
733 assert(tile_size >= 4 * 4);
734
735 /* Colour buffer allocations must be 1K aligned. */
736 unsigned cbuf_allocation = ALIGN_POT(bytes_per_pixel * tile_size, 1024);
737 assert(cbuf_allocation <= fb->tile_buf_budget && "tile too big");
738
739 int crc_rt = GENX(pan_select_crc_rt)(fb, tile_size);
740 bool has_zs_crc_ext = (fb->zs.view.zs || fb->zs.view.s || crc_rt >= 0);
741
742 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
743 #if PAN_ARCH >= 6
744 bool force_clean_write = pan_force_clean_write(fb, tile_size);
745
746 cfg.sample_locations = fb->sample_positions;
747 cfg.pre_frame_0 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[0],
748 force_clean_write);
749 cfg.pre_frame_1 = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[1],
750 force_clean_write);
751 cfg.post_frame = pan_fix_frame_shader_mode(fb->bifrost.pre_post.modes[2],
752 force_clean_write);
753 cfg.frame_shader_dcds = fb->bifrost.pre_post.dcds.gpu;
754 cfg.tiler =
755 PAN_ARCH >= 9 ? tiler_ctx->valhall.desc : tiler_ctx->bifrost.desc;
756 #endif
757 cfg.width = fb->width;
758 cfg.height = fb->height;
759 cfg.bound_max_x = fb->width - 1;
760 cfg.bound_max_y = fb->height - 1;
761
762 cfg.effective_tile_size = tile_size;
763 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
764 cfg.render_target_count = MAX2(fb->rt_count, 1);
765
766 /* Default to 24 bit depth if there's no surface. */
767 cfg.z_internal_format =
768 fb->zs.view.zs ? panfrost_get_z_internal_format(fb->zs.view.zs->format)
769 : MALI_Z_INTERNAL_FORMAT_D24;
770
771 cfg.z_clear = fb->zs.clear_value.depth;
772 cfg.s_clear = fb->zs.clear_value.stencil;
773 cfg.color_buffer_allocation = cbuf_allocation;
774
775 /* The force_samples setting dictates the sample-count that is used
776 * for rasterization, and works like D3D11's ForcedSampleCount feature:
777 *
778 * - If force_samples == 0: Let nr_samples dictate sample count
779 * - If force_samples == 1: force single-sampled rasterization
780 * - If force_samples >= 1: force multi-sampled rasterization
781 *
782 * This can be used to read SYSTEM_VALUE_SAMPLE_MASK_IN from the
783 * fragment shader, even when performing single-sampled rendering.
784 */
785 if (!fb->force_samples) {
786 cfg.sample_count = fb->nr_samples;
787 cfg.sample_pattern = pan_sample_pattern(fb->nr_samples);
788 } else if (fb->force_samples == 1) {
789 cfg.sample_count = fb->nr_samples;
790 cfg.sample_pattern = pan_sample_pattern(1);
791 } else {
792 cfg.sample_count = 1;
793 cfg.sample_pattern = pan_sample_pattern(fb->force_samples);
794 }
795
796 cfg.z_write_enable = (fb->zs.view.zs && !fb->zs.discard.z);
797 cfg.s_write_enable = (fb->zs.view.s && !fb->zs.discard.s);
798 cfg.has_zs_crc_extension = has_zs_crc_ext;
799
800 if (crc_rt >= 0) {
801 bool *valid = fb->rts[crc_rt].crc_valid;
802 bool full = !fb->extent.minx && !fb->extent.miny &&
803 fb->extent.maxx == (fb->width - 1) &&
804 fb->extent.maxy == (fb->height - 1);
805
806 cfg.crc_read_enable = *valid;
807
808 /* If the data is currently invalid, still write CRC
809 * data if we are doing a full write, so that it is
810 * valid for next time. */
811 cfg.crc_write_enable = *valid || full;
812
813 *valid |= full;
814 }
815
816 #if PAN_ARCH >= 9
817 cfg.point_sprite_coord_origin_max_y = fb->sprite_coord_origin;
818 cfg.first_provoking_vertex = fb->first_provoking_vertex;
819
820 /* internal_layer_index is used to select the right primitive list in the
821 * tiler context, and frame_arg is the value that's passed to the fragment
822 * shader through r62-r63, which we use to pass gl_Layer. Since the
823 * layer_idx only takes 8-bits, we might use the extra 56-bits we have
824 * in frame_argument to pass other information to the fragment shader at
825 * some point. */
826 cfg.internal_layer_index = layer_idx;
827 cfg.frame_argument = layer_idx;
828 #endif
829 }
830
831 #if PAN_ARCH >= 6
832 pan_section_pack(fbd, FRAMEBUFFER, PADDING, padding)
833 ;
834 #else
835 pan_emit_midgard_tiler(fb, tiler_ctx,
836 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
837
838 /* All weights set to 0, nothing to do here */
839 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
840 ;
841 #endif
842
843 if (has_zs_crc_ext) {
844 pan_emit_zs_crc_ext(fb, layer_idx, crc_rt, out + pan_size(FRAMEBUFFER));
845 rtd += pan_size(ZS_CRC_EXTENSION);
846 }
847
848 unsigned rt_count = MAX2(fb->rt_count, 1);
849 unsigned cbuf_offset = 0;
850 for (unsigned i = 0; i < rt_count; i++) {
851 pan_emit_rt(fb, layer_idx, i, cbuf_offset, rtd);
852 rtd += pan_size(RENDER_TARGET);
853 if (!fb->rts[i].view)
854 continue;
855
856 cbuf_offset += pan_bytes_per_pixel_tib(fb->rts[i].view->format) *
857 tile_size * pan_image_view_get_nr_samples(fb->rts[i].view);
858
859 if (i != crc_rt)
860 *(fb->rts[i].crc_valid) = false;
861 }
862
863 struct mali_framebuffer_pointer_packed tag;
864 pan_pack(tag.opaque, FRAMEBUFFER_POINTER, cfg) {
865 cfg.zs_crc_extension_present = has_zs_crc_ext;
866 cfg.render_target_count = MAX2(fb->rt_count, 1);
867 }
868 return tag.opaque[0];
869 }
870 #else /* PAN_ARCH == 4 */
871 static enum mali_color_format
pan_sfbd_raw_format(unsigned bits)872 pan_sfbd_raw_format(unsigned bits)
873 {
874 /* clang-format off */
875 switch (bits) {
876 case 16: return MALI_COLOR_FORMAT_1_16B_CHANNEL;
877 case 32: return MALI_COLOR_FORMAT_1_32B_CHANNEL;
878 case 48: return MALI_COLOR_FORMAT_3_16B_CHANNELS;
879 case 64: return MALI_COLOR_FORMAT_2_32B_CHANNELS;
880 case 96: return MALI_COLOR_FORMAT_3_32B_CHANNELS;
881 case 128: return MALI_COLOR_FORMAT_4_32B_CHANNELS;
882 default: unreachable("invalid raw bpp");
883 }
884 /* clang-format on */
885 }
886 unsigned
GENX(pan_emit_fbd)887 GENX(pan_emit_fbd)(const struct pan_fb_info *fb, unsigned layer_idx,
888 const struct pan_tls_info *tls,
889 const struct pan_tiler_context *tiler_ctx, void *fbd)
890 {
891 assert(fb->rt_count <= 1);
892
893 GENX(pan_emit_tls)(tls, pan_section_ptr(fbd, FRAMEBUFFER, LOCAL_STORAGE));
894 pan_section_pack(fbd, FRAMEBUFFER, PARAMETERS, cfg) {
895 cfg.bound_max_x = fb->width - 1;
896 cfg.bound_max_y = fb->height - 1;
897 cfg.dithering_enable = true;
898 cfg.clean_pixel_write_enable = true;
899 cfg.tie_break_rule = MALI_TIE_BREAK_RULE_MINUS_180_IN_0_OUT;
900 if (fb->rts[0].clear) {
901 cfg.clear_color_0 = fb->rts[0].clear_value[0];
902 cfg.clear_color_1 = fb->rts[0].clear_value[1];
903 cfg.clear_color_2 = fb->rts[0].clear_value[2];
904 cfg.clear_color_3 = fb->rts[0].clear_value[3];
905 }
906
907 if (fb->zs.clear.z)
908 cfg.z_clear = fb->zs.clear_value.depth;
909
910 if (fb->zs.clear.s)
911 cfg.s_clear = fb->zs.clear_value.stencil;
912
913 if (fb->rt_count && fb->rts[0].view) {
914 const struct pan_image_view *rt = fb->rts[0].view;
915 const struct pan_image *image = pan_image_view_get_rt_image(rt);
916
917 const struct util_format_description *desc =
918 util_format_description(rt->format);
919
920 /* The swizzle for rendering is inverted from texturing */
921 unsigned char swizzle[4];
922 panfrost_invert_swizzle(desc->swizzle, swizzle);
923 cfg.swizzle = panfrost_translate_swizzle_4(swizzle);
924
925 struct pan_blendable_format fmt =
926 *GENX(panfrost_blendable_format_from_pipe_format)(rt->format);
927
928 if (fmt.internal) {
929 cfg.internal_format = fmt.internal;
930 cfg.color_writeback_format = fmt.writeback;
931 } else {
932 /* Construct RAW internal/writeback */
933 unsigned bits = desc->block.bits;
934
935 cfg.internal_format = MALI_COLOR_BUFFER_INTERNAL_FORMAT_RAW_VALUE;
936 cfg.color_writeback_format = pan_sfbd_raw_format(bits);
937 }
938
939 unsigned level = rt->first_level;
940 struct pan_surface surf;
941
942 pan_iview_get_surface(rt, 0, 0, 0, &surf);
943
944 cfg.color_write_enable = !fb->rts[0].discard;
945 cfg.color_writeback.base = surf.data;
946 cfg.color_writeback.row_stride =
947 image->layout.slices[level].row_stride;
948
949 cfg.color_block_format = mod_to_block_fmt(image->layout.modifier);
950 assert(cfg.color_block_format == MALI_BLOCK_FORMAT_LINEAR ||
951 cfg.color_block_format ==
952 MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
953
954 if (pan_image_view_has_crc(rt)) {
955 const struct pan_image_slice_layout *slice =
956 &image->layout.slices[level];
957
958 cfg.crc_buffer.row_stride = slice->crc.stride;
959 cfg.crc_buffer.base =
960 image->data.base + image->data.offset + slice->crc.offset;
961 }
962 }
963
964 if (fb->zs.view.zs) {
965 const struct pan_image_view *zs = fb->zs.view.zs;
966 const struct pan_image *image = pan_image_view_get_zs_image(zs);
967 unsigned level = zs->first_level;
968 struct pan_surface surf;
969
970 pan_iview_get_surface(zs, 0, 0, 0, &surf);
971
972 cfg.zs_write_enable = !fb->zs.discard.z;
973 cfg.zs_writeback.base = surf.data;
974 cfg.zs_writeback.row_stride = image->layout.slices[level].row_stride;
975 cfg.zs_block_format = mod_to_block_fmt(image->layout.modifier);
976 assert(cfg.zs_block_format == MALI_BLOCK_FORMAT_LINEAR ||
977 cfg.zs_block_format == MALI_BLOCK_FORMAT_TILED_U_INTERLEAVED);
978
979 cfg.zs_format = translate_zs_format(zs->format);
980 }
981
982 cfg.sample_count = fb->nr_samples;
983
984 if (fb->rt_count)
985 cfg.msaa = mali_sampling_mode(fb->rts[0].view);
986 }
987
988 pan_emit_midgard_tiler(fb, tiler_ctx,
989 pan_section_ptr(fbd, FRAMEBUFFER, TILER));
990
991 /* All weights set to 0, nothing to do here */
992 pan_section_pack(fbd, FRAMEBUFFER, TILER_WEIGHTS, w)
993 ;
994
995 pan_section_pack(fbd, FRAMEBUFFER, PADDING_1, padding)
996 ;
997 pan_section_pack(fbd, FRAMEBUFFER, PADDING_2, padding)
998 ;
999 return 0;
1000 }
1001 #endif
1002
1003 #if PAN_ARCH <= 9
1004 void
GENX(pan_emit_fragment_job_payload)1005 GENX(pan_emit_fragment_job_payload)(const struct pan_fb_info *fb, mali_ptr fbd,
1006 void *out)
1007 {
1008 pan_section_pack(out, FRAGMENT_JOB, PAYLOAD, payload) {
1009 payload.bound_min_x = fb->extent.minx >> MALI_TILE_SHIFT;
1010 payload.bound_min_y = fb->extent.miny >> MALI_TILE_SHIFT;
1011 payload.bound_max_x = fb->extent.maxx >> MALI_TILE_SHIFT;
1012 payload.bound_max_y = fb->extent.maxy >> MALI_TILE_SHIFT;
1013 payload.framebuffer = fbd;
1014
1015 #if PAN_ARCH >= 5
1016 if (fb->tile_map.base) {
1017 payload.has_tile_enable_map = true;
1018 payload.tile_enable_map = fb->tile_map.base;
1019 payload.tile_enable_map_row_stride = fb->tile_map.stride;
1020 }
1021 #endif
1022 }
1023 }
1024 #endif
1025