1 /*
2 * Copyright (C) 2020-2021 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <[email protected]>
25 * Boris Brezillon <[email protected]>
26 */
27
28 #include "pan_blitter.h"
29 #include <math.h>
30 #include <stdio.h>
31 #include "compiler/nir/nir_builder.h"
32 #include "util/u_math.h"
33 #include "pan_blend.h"
34 #include "pan_desc.h"
35 #include "pan_encoder.h"
36 #include "pan_jc.h"
37 #include "pan_pool.h"
38 #include "pan_shader.h"
39 #include "pan_texture.h"
40
41 #if PAN_ARCH >= 6
42 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
43 * missing in many cases. We instead use software paths as fallbacks to
44 * implement blits, which are done as TILER jobs. No vertex shader is
45 * necessary since we can supply screen-space coordinates directly.
46 *
47 * This is primarily designed as a fallback for preloads but could be extended
48 * for other clears/blits if needed in the future. */
49
50 static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)51 blit_type_to_reg_fmt(nir_alu_type in)
52 {
53 switch (in) {
54 case nir_type_float32:
55 return MALI_REGISTER_FILE_FORMAT_F32;
56 case nir_type_int32:
57 return MALI_REGISTER_FILE_FORMAT_I32;
58 case nir_type_uint32:
59 return MALI_REGISTER_FILE_FORMAT_U32;
60 default:
61 unreachable("Invalid blit type");
62 }
63 }
64 #endif
65
66 /* On Valhall, the driver gives the hardware a table of resource tables.
67 * Resources are addressed as the index of the table together with the index of
68 * the resource within the table. For simplicity, we put one type of resource
69 * in each table and fix the numbering of the tables.
70 *
71 * This numbering is arbitrary.
72 */
73 enum pan_blit_resource_table {
74 PAN_BLIT_TABLE_ATTRIBUTE = 0,
75 PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
76 PAN_BLIT_TABLE_SAMPLER,
77 PAN_BLIT_TABLE_TEXTURE,
78
79 PAN_BLIT_NUM_RESOURCE_TABLES
80 };
81
82 struct pan_blit_surface {
83 gl_frag_result loc : 4;
84 nir_alu_type type : 8;
85 enum mali_texture_dimension dim : 2;
86 bool array : 1;
87 unsigned src_samples : 5;
88 unsigned dst_samples : 5;
89 };
90
91 struct pan_blit_shader_key {
92 struct pan_blit_surface surfaces[8];
93 };
94
95 struct pan_blit_shader_data {
96 struct pan_blit_shader_key key;
97 struct pan_shader_info info;
98 mali_ptr address;
99 unsigned blend_ret_offsets[8];
100 nir_alu_type blend_types[8];
101 };
102
103 struct pan_blit_blend_shader_key {
104 enum pipe_format format;
105 nir_alu_type type;
106 unsigned rt : 3;
107 unsigned nr_samples : 5;
108 unsigned pad : 24;
109 };
110
111 struct pan_blit_blend_shader_data {
112 struct pan_blit_blend_shader_key key;
113 mali_ptr address;
114 };
115
116 struct pan_blit_rsd_key {
117 struct {
118 enum pipe_format format;
119 nir_alu_type type : 8;
120 unsigned src_samples : 5;
121 unsigned dst_samples : 5;
122 enum mali_texture_dimension dim : 2;
123 bool array : 1;
124 } rts[8], z, s;
125 };
126
127 struct pan_blit_rsd_data {
128 struct pan_blit_rsd_key key;
129 mali_ptr address;
130 };
131
132 #if PAN_ARCH >= 5
133 static void
pan_blitter_emit_blend(unsigned rt,const struct pan_image_view * iview,const struct pan_blit_shader_data * blit_shader,mali_ptr blend_shader,void * out)134 pan_blitter_emit_blend(unsigned rt,
135 const struct pan_image_view *iview,
136 const struct pan_blit_shader_data *blit_shader,
137 mali_ptr blend_shader, void *out)
138 {
139 assert(blend_shader == 0 || PAN_ARCH <= 5);
140
141 pan_pack(out, BLEND, cfg) {
142 if (!iview) {
143 cfg.enable = false;
144 #if PAN_ARCH >= 6
145 cfg.internal.mode = MALI_BLEND_MODE_OFF;
146 #endif
147 continue;
148 }
149
150 cfg.round_to_fb_precision = true;
151 cfg.srgb = util_format_is_srgb(iview->format);
152
153 #if PAN_ARCH >= 6
154 cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
155 #endif
156
157 if (!blend_shader) {
158 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
159 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
160 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
161 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
162 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
163 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
164 cfg.equation.color_mask = 0xf;
165
166 #if PAN_ARCH >= 6
167 nir_alu_type type = blit_shader->key.surfaces[rt].type;
168
169 cfg.internal.fixed_function.num_comps = 4;
170 cfg.internal.fixed_function.conversion.memory_format = GENX(
171 panfrost_dithered_format_from_pipe_format)(iview->format, false);
172 cfg.internal.fixed_function.conversion.register_format =
173 blit_type_to_reg_fmt(type);
174
175 cfg.internal.fixed_function.rt = rt;
176 #endif
177 } else {
178 #if PAN_ARCH <= 5
179 cfg.blend_shader = true;
180 cfg.shader_pc = blend_shader;
181 #endif
182 }
183 }
184 }
185 #endif
186
187 struct pan_blitter_views {
188 unsigned rt_count;
189 const struct pan_image_view *src_rts[8];
190 const struct pan_image_view *dst_rts[8];
191 const struct pan_image_view *src_z;
192 const struct pan_image_view *dst_z;
193 const struct pan_image_view *src_s;
194 const struct pan_image_view *dst_s;
195 };
196
197 static bool
pan_blitter_is_ms(struct pan_blitter_views * views)198 pan_blitter_is_ms(struct pan_blitter_views *views)
199 {
200 for (unsigned i = 0; i < views->rt_count; i++) {
201 if (views->dst_rts[i]) {
202 if (pan_image_view_get_nr_samples(views->dst_rts[i]) > 1)
203 return true;
204 }
205 }
206
207 if (views->dst_z && pan_image_view_get_nr_samples(views->dst_z) > 1)
208 return true;
209
210 if (views->dst_s && pan_image_view_get_nr_samples(views->dst_s) > 1)
211 return true;
212
213 return false;
214 }
215
216 #if PAN_ARCH >= 5
217 static void
pan_blitter_emit_blends(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)218 pan_blitter_emit_blends(const struct pan_blit_shader_data *blit_shader,
219 struct pan_blitter_views *views,
220 mali_ptr *blend_shaders, void *out)
221 {
222 for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
223 void *dest = out + pan_size(BLEND) * i;
224 const struct pan_image_view *rt_view = views->dst_rts[i];
225 mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
226
227 pan_blitter_emit_blend(i, rt_view, blit_shader, blend_shader, dest);
228 }
229 }
230 #endif
231
232 #if PAN_ARCH <= 7
233 static void
pan_blitter_emit_rsd(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)234 pan_blitter_emit_rsd(const struct pan_blit_shader_data *blit_shader,
235 struct pan_blitter_views *views, mali_ptr *blend_shaders,
236 void *out)
237 {
238 UNUSED bool zs = (views->dst_z || views->dst_s);
239 bool ms = pan_blitter_is_ms(views);
240
241 pan_pack(out, RENDERER_STATE, cfg) {
242 assert(blit_shader->address);
243 pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
244
245 cfg.multisample_misc.sample_mask = 0xFFFF;
246 cfg.multisample_misc.multisample_enable = ms;
247 cfg.multisample_misc.evaluate_per_sample = ms;
248 cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
249 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
250
251 cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
252 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
253 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
254 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
255 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
256 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
257 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
258 cfg.stencil_front.mask = 0xFF;
259 cfg.stencil_back = cfg.stencil_front;
260
261 #if PAN_ARCH >= 6
262 if (zs) {
263 /* Writing Z/S requires late updates */
264 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
265 cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
266 } else {
267 /* Skipping ATEST requires forcing Z/S */
268 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
269 cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
270 }
271
272 /* However, while shaders writing Z/S can normally be killed, on v6
273 * for frame shaders it can cause GPU timeouts, so only allow colour
274 * blit shaders to be killed. */
275 cfg.properties.allow_forward_pixel_to_kill = !zs;
276
277 if (PAN_ARCH == 6)
278 cfg.properties.allow_forward_pixel_to_be_killed = !zs;
279 #else
280
281 mali_ptr blend_shader =
282 blend_shaders
283 ? panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1))
284 : 0;
285
286 cfg.properties.work_register_count = 4;
287 cfg.properties.force_early_z = !zs;
288 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
289
290 /* Set even on v5 for erratum workaround */
291 #if PAN_ARCH == 5
292 cfg.legacy_blend_shader = blend_shader;
293 #else
294 cfg.blend_shader = blend_shader;
295 cfg.stencil_mask_misc.write_enable = true;
296 cfg.stencil_mask_misc.dither_disable = true;
297 cfg.multisample_misc.blend_shader = !!blend_shader;
298 cfg.blend_shader = blend_shader;
299 if (!cfg.multisample_misc.blend_shader) {
300 cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
301 cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
302 cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
303 cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
304 cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
305 cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
306 cfg.blend_constant = 0;
307
308 if (views->dst_rts[0] != NULL) {
309 cfg.stencil_mask_misc.srgb =
310 util_format_is_srgb(views->dst_rts[0]->format);
311 cfg.blend_equation.color_mask = 0xf;
312 }
313 }
314 #endif
315 #endif
316 }
317
318 #if PAN_ARCH >= 5
319 pan_blitter_emit_blends(blit_shader, views, blend_shaders,
320 out + pan_size(RENDERER_STATE));
321 #endif
322 }
323 #endif
324
325 #if PAN_ARCH <= 5
326 static void
pan_blitter_get_blend_shaders(struct pan_blitter_cache * cache,unsigned rt_count,const struct pan_image_view ** rts,const struct pan_blit_shader_data * blit_shader,mali_ptr * blend_shaders)327 pan_blitter_get_blend_shaders(struct pan_blitter_cache *cache,
328 unsigned rt_count,
329 const struct pan_image_view **rts,
330 const struct pan_blit_shader_data *blit_shader,
331 mali_ptr *blend_shaders)
332 {
333 if (!rt_count)
334 return;
335
336 struct pan_blend_state blend_state = {
337 .rt_count = rt_count,
338 };
339
340 for (unsigned i = 0; i < rt_count; i++) {
341 if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
342 continue;
343
344 struct pan_blit_blend_shader_key key = {
345 .format = rts[i]->format,
346 .rt = i,
347 .nr_samples = pan_image_view_get_nr_samples(rts[i]),
348 .type = blit_shader->blend_types[i],
349 };
350
351 pthread_mutex_lock(&cache->shaders.lock);
352 struct hash_entry *he =
353 _mesa_hash_table_search(cache->shaders.blend, &key);
354 struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
355 if (blend_shader) {
356 blend_shaders[i] = blend_shader->address;
357 pthread_mutex_unlock(&cache->shaders.lock);
358 continue;
359 }
360
361 blend_shader =
362 rzalloc(cache->shaders.blend, struct pan_blit_blend_shader_data);
363 blend_shader->key = key;
364
365 blend_state.rts[i] = (struct pan_blend_rt_state){
366 .format = rts[i]->format,
367 .nr_samples = pan_image_view_get_nr_samples(rts[i]),
368 .equation =
369 {
370 .blend_enable = false,
371 .color_mask = 0xf,
372 },
373 };
374
375 pthread_mutex_lock(&cache->blend_shader_cache->lock);
376 struct pan_blend_shader_variant *b = GENX(pan_blend_get_shader_locked)(
377 cache->blend_shader_cache, &blend_state, blit_shader->blend_types[i],
378 nir_type_float32, /* unused */
379 i);
380
381 assert(b->work_reg_count <= 4);
382 struct panfrost_ptr bin =
383 pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
384 memcpy(bin.cpu, b->binary.data, b->binary.size);
385
386 blend_shader->address = bin.gpu | b->first_tag;
387 pthread_mutex_unlock(&cache->blend_shader_cache->lock);
388 _mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
389 blend_shader);
390 pthread_mutex_unlock(&cache->shaders.lock);
391 blend_shaders[i] = blend_shader->address;
392 }
393 }
394 #endif
395
396 /*
397 * Early Mali GPUs did not respect sampler LOD clamps or bias, so the Midgard
398 * compiler inserts lowering code with a load_sampler_lod_parameters_pan sysval
399 * that we need to lower. Our samplers do not use LOD clamps or bias, so we
400 * lower to the identity settings and let constant folding get rid of the
401 * unnecessary lowering.
402 */
403 static bool
lower_sampler_parameters(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)404 lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr,
405 UNUSED void *data)
406 {
407 if (intr->intrinsic != nir_intrinsic_load_sampler_lod_parameters_pan)
408 return false;
409
410 const nir_const_value constants[4] = {
411 nir_const_value_for_float(0.0f, 32), /* min_lod */
412 nir_const_value_for_float(INFINITY, 32), /* max_lod */
413 nir_const_value_for_float(0.0f, 32), /* lod_bias */
414 };
415
416 b->cursor = nir_after_instr(&intr->instr);
417 nir_def_rewrite_uses(&intr->def, nir_build_imm(b, 3, 32, constants));
418 return true;
419 }
420
421 static uint32_t
sampler_hw_index(uint32_t index)422 sampler_hw_index(uint32_t index)
423 {
424 return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_SAMPLER, index) : index;
425 }
426
427 static uint32_t
tex_hw_index(uint32_t index)428 tex_hw_index(uint32_t index)
429 {
430 return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_TEXTURE, index) : index;
431 }
432
433 static uint32_t
attr_hw_index(uint32_t index)434 attr_hw_index(uint32_t index)
435 {
436 return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_ATTRIBUTE, index)
437 : index;
438 }
439
440 static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct pan_blitter_cache * cache,const struct pan_blit_shader_key * key)441 pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
442 const struct pan_blit_shader_key *key)
443 {
444 pthread_mutex_lock(&cache->shaders.lock);
445 struct hash_entry *he =
446 _mesa_hash_table_search(cache->shaders.blit, key);
447 struct pan_blit_shader_data *shader = he ? he->data : NULL;
448
449 if (shader)
450 goto out;
451
452 unsigned coord_comps = 0;
453 unsigned sig_offset = 0;
454 char sig[256];
455 bool first = true;
456 for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
457 const char *type_str, *dim_str;
458 if (key->surfaces[i].type == nir_type_invalid)
459 continue;
460
461 switch (key->surfaces[i].type) {
462 case nir_type_float32:
463 type_str = "float";
464 break;
465 case nir_type_uint32:
466 type_str = "uint";
467 break;
468 case nir_type_int32:
469 type_str = "int";
470 break;
471 default:
472 unreachable("Invalid type\n");
473 }
474
475 switch (key->surfaces[i].dim) {
476 case MALI_TEXTURE_DIMENSION_CUBE:
477 dim_str = "cube";
478 break;
479 case MALI_TEXTURE_DIMENSION_1D:
480 dim_str = "1D";
481 break;
482 case MALI_TEXTURE_DIMENSION_2D:
483 dim_str = "2D";
484 break;
485 case MALI_TEXTURE_DIMENSION_3D:
486 dim_str = "3D";
487 break;
488 default:
489 unreachable("Invalid dim\n");
490 }
491
492 coord_comps = MAX2(coord_comps, (key->surfaces[i].dim ?: 3) +
493 (key->surfaces[i].array ? 1 : 0));
494
495 if (sig_offset >= sizeof(sig)) {
496 first = false;
497 continue;
498 }
499
500 sig_offset +=
501 snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
502 "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
503 first ? "" : ",", gl_frag_result_name(key->surfaces[i].loc),
504 type_str, dim_str, key->surfaces[i].array ? "[]" : "",
505 key->surfaces[i].src_samples, key->surfaces[i].dst_samples);
506
507 first = false;
508 }
509
510 nir_builder b = nir_builder_init_simple_shader(
511 MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
512 "pan_blit(%s)", sig);
513
514 nir_def *barycentric = nir_load_barycentric(
515 &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH);
516 nir_def *coord = nir_load_interpolated_input(
517 &b, coord_comps, 32, barycentric, nir_imm_int(&b, 0),
518 .base = attr_hw_index(0), .dest_type = nir_type_float32,
519 .io_semantics.location = VARYING_SLOT_VAR0, .io_semantics.num_slots = 1);
520
521 unsigned active_count = 0;
522 for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
523 if (key->surfaces[i].type == nir_type_invalid)
524 continue;
525
526 /* Resolve operations only work for N -> 1 samples. */
527 assert(key->surfaces[i].dst_samples == 1 ||
528 key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
529
530 bool resolve =
531 key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
532 bool ms = key->surfaces[i].src_samples > 1;
533 enum glsl_sampler_dim sampler_dim;
534
535 switch (key->surfaces[i].dim) {
536 case MALI_TEXTURE_DIMENSION_1D:
537 sampler_dim = GLSL_SAMPLER_DIM_1D;
538 break;
539 case MALI_TEXTURE_DIMENSION_2D:
540 sampler_dim = ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
541 break;
542 case MALI_TEXTURE_DIMENSION_3D:
543 sampler_dim = GLSL_SAMPLER_DIM_3D;
544 break;
545 case MALI_TEXTURE_DIMENSION_CUBE:
546 sampler_dim = GLSL_SAMPLER_DIM_CUBE;
547 break;
548 }
549
550 nir_def *res = NULL;
551
552 if (resolve) {
553 /* When resolving a float type, we need to calculate
554 * the average of all samples. For integer resolve, GL
555 * and Vulkan say that one sample should be chosen
556 * without telling which. Let's just pick the first one
557 * in that case.
558 */
559 nir_alu_type base_type =
560 nir_alu_type_get_base_type(key->surfaces[i].type);
561 unsigned nsamples =
562 base_type == nir_type_float ? key->surfaces[i].src_samples : 1;
563
564 for (unsigned s = 0; s < nsamples; s++) {
565 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
566
567 tex->op = nir_texop_txf_ms;
568 tex->dest_type = key->surfaces[i].type;
569 tex->texture_index = tex_hw_index(active_count);
570 tex->sampler_index = sampler_hw_index(0);
571 tex->is_array = key->surfaces[i].array;
572 tex->sampler_dim = sampler_dim;
573
574 tex->src[0] =
575 nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
576 tex->coord_components = coord_comps;
577
578 tex->src[1] =
579 nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_imm_int(&b, s));
580
581 tex->src[2] =
582 nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
583 nir_def_init(&tex->instr, &tex->def, 4, 32);
584 nir_builder_instr_insert(&b, &tex->instr);
585
586 res = res ? nir_fadd(&b, res, &tex->def) : &tex->def;
587 }
588
589 if (base_type == nir_type_float)
590 res = nir_fmul_imm(&b, res, 1.0f / nsamples);
591 } else {
592 nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
593
594 tex->dest_type = key->surfaces[i].type;
595 tex->texture_index = tex_hw_index(active_count);
596 tex->sampler_index = sampler_hw_index(0);
597 tex->is_array = key->surfaces[i].array;
598 tex->sampler_dim = sampler_dim;
599
600 if (ms) {
601 tex->op = nir_texop_txf_ms;
602
603 tex->src[0] =
604 nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
605 tex->coord_components = coord_comps;
606
607 tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
608 nir_load_sample_id(&b));
609
610 tex->src[2] =
611 nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
612 } else {
613 tex->op = nir_texop_txl;
614
615 tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
616 tex->coord_components = coord_comps;
617 }
618
619 nir_def_init(&tex->instr, &tex->def, 4, 32);
620 nir_builder_instr_insert(&b, &tex->instr);
621 res = &tex->def;
622 }
623
624 assert(res);
625
626 if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
627 nir_store_output(
628 &b, res, nir_imm_int(&b, 0), .base = active_count,
629 .src_type = key->surfaces[i].type,
630 .io_semantics.location = key->surfaces[i].loc,
631 .io_semantics.num_slots = 1,
632 .write_mask = nir_component_mask(res->num_components));
633 } else {
634 unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
635 nir_store_output(
636 &b, nir_channel(&b, res, c), nir_imm_int(&b, 0),
637 .base = active_count, .src_type = key->surfaces[i].type,
638 .io_semantics.location = key->surfaces[i].loc,
639 .io_semantics.num_slots = 1, .write_mask = nir_component_mask(1));
640 }
641 active_count++;
642 }
643
644 struct panfrost_compile_inputs inputs = {
645 .gpu_id = cache->gpu_id,
646 .is_blit = true,
647 .no_idvs = true,
648 };
649 struct util_dynarray binary;
650
651 util_dynarray_init(&binary, NULL);
652
653 shader = rzalloc(cache->shaders.blit, struct pan_blit_shader_data);
654
655 nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
656
657 for (unsigned i = 0; i < active_count; ++i)
658 BITSET_SET(b.shader->info.textures_used, i);
659
660 pan_shader_preprocess(b.shader, inputs.gpu_id);
661
662 if (PAN_ARCH == 4) {
663 NIR_PASS_V(b.shader, nir_shader_intrinsics_pass, lower_sampler_parameters,
664 nir_metadata_control_flow, NULL);
665 }
666
667 GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
668
669 shader->key = *key;
670 shader->address =
671 pan_pool_upload_aligned(cache->shaders.pool, binary.data,
672 binary.size, PAN_ARCH >= 6 ? 128 : 64);
673
674 util_dynarray_fini(&binary);
675 ralloc_free(b.shader);
676
677 #if PAN_ARCH >= 6
678 for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
679 shader->blend_ret_offsets[i] =
680 shader->info.bifrost.blend[i].return_offset;
681 shader->blend_types[i] = shader->info.bifrost.blend[i].type;
682 }
683 #endif
684
685 _mesa_hash_table_insert(cache->shaders.blit, &shader->key, shader);
686
687 out:
688 pthread_mutex_unlock(&cache->shaders.lock);
689 return shader;
690 }
691
692 static struct pan_blit_shader_key
pan_blitter_get_key(struct pan_blitter_views * views)693 pan_blitter_get_key(struct pan_blitter_views *views)
694 {
695 struct pan_blit_shader_key key = {0};
696
697 if (views->src_z) {
698 assert(views->dst_z);
699 key.surfaces[0].loc = FRAG_RESULT_DEPTH;
700 key.surfaces[0].type = nir_type_float32;
701 key.surfaces[0].src_samples = pan_image_view_get_nr_samples(views->src_z);
702 key.surfaces[0].dst_samples = pan_image_view_get_nr_samples(views->dst_z);
703 key.surfaces[0].dim = views->src_z->dim;
704 key.surfaces[0].array =
705 views->src_z->first_layer != views->src_z->last_layer;
706 }
707
708 if (views->src_s) {
709 assert(views->dst_s);
710 key.surfaces[1].loc = FRAG_RESULT_STENCIL;
711 key.surfaces[1].type = nir_type_uint32;
712 key.surfaces[1].src_samples = pan_image_view_get_nr_samples(views->src_s);
713 key.surfaces[1].dst_samples = pan_image_view_get_nr_samples(views->dst_s);
714 key.surfaces[1].dim = views->src_s->dim;
715 key.surfaces[1].array =
716 views->src_s->first_layer != views->src_s->last_layer;
717 }
718
719 for (unsigned i = 0; i < views->rt_count; i++) {
720 if (!views->src_rts[i])
721 continue;
722
723 assert(views->dst_rts[i]);
724 key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
725 key.surfaces[i].type =
726 util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32
727 : util_format_is_pure_sint(views->src_rts[i]->format)
728 ? nir_type_int32
729 : nir_type_float32;
730 key.surfaces[i].src_samples =
731 pan_image_view_get_nr_samples(views->src_rts[i]);
732 key.surfaces[i].dst_samples =
733 pan_image_view_get_nr_samples(views->dst_rts[i]);
734 key.surfaces[i].dim = views->src_rts[i]->dim;
735 key.surfaces[i].array =
736 views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
737 }
738
739 return key;
740 }
741
742 #if PAN_ARCH <= 7
743 static mali_ptr
pan_blitter_get_rsd(struct pan_blitter_cache * cache,struct pan_blitter_views * views)744 pan_blitter_get_rsd(struct pan_blitter_cache *cache,
745 struct pan_blitter_views *views)
746 {
747 struct pan_blit_rsd_key rsd_key = {0};
748
749 assert(!views->rt_count || (!views->src_z && !views->src_s));
750
751 struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
752
753 if (views->src_z) {
754 assert(views->dst_z);
755 rsd_key.z.format = views->dst_z->format;
756 rsd_key.z.type = blit_key.surfaces[0].type;
757 rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
758 rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
759 rsd_key.z.dim = blit_key.surfaces[0].dim;
760 rsd_key.z.array = blit_key.surfaces[0].array;
761 }
762
763 if (views->src_s) {
764 assert(views->dst_s);
765 rsd_key.s.format = views->dst_s->format;
766 rsd_key.s.type = blit_key.surfaces[1].type;
767 rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
768 rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
769 rsd_key.s.dim = blit_key.surfaces[1].dim;
770 rsd_key.s.array = blit_key.surfaces[1].array;
771 }
772
773 for (unsigned i = 0; i < views->rt_count; i++) {
774 if (!views->src_rts[i])
775 continue;
776
777 assert(views->dst_rts[i]);
778 rsd_key.rts[i].format = views->dst_rts[i]->format;
779 rsd_key.rts[i].type = blit_key.surfaces[i].type;
780 rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
781 rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
782 rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
783 rsd_key.rts[i].array = blit_key.surfaces[i].array;
784 }
785
786 pthread_mutex_lock(&cache->rsds.lock);
787 struct hash_entry *he =
788 _mesa_hash_table_search(cache->rsds.rsds, &rsd_key);
789 struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
790 if (rsd)
791 goto out;
792
793 rsd = rzalloc(cache->rsds.rsds, struct pan_blit_rsd_data);
794 rsd->key = rsd_key;
795
796 #if PAN_ARCH == 4
797 struct panfrost_ptr rsd_ptr =
798 pan_pool_alloc_desc(cache->rsds.pool, RENDERER_STATE);
799 #else
800 unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
801 struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
802 cache->rsds.pool, PAN_DESC(RENDERER_STATE),
803 PAN_DESC_ARRAY(bd_count, BLEND));
804 #endif
805
806 mali_ptr blend_shaders[8] = {0};
807
808 const struct pan_blit_shader_data *blit_shader =
809 pan_blitter_get_blit_shader(cache, &blit_key);
810
811 #if PAN_ARCH <= 5
812 pan_blitter_get_blend_shaders(cache,
813 views->rt_count, views->dst_rts, blit_shader,
814 blend_shaders);
815 #endif
816
817 pan_blitter_emit_rsd(blit_shader, views, blend_shaders, rsd_ptr.cpu);
818 rsd->address = rsd_ptr.gpu;
819 _mesa_hash_table_insert(cache->rsds.rsds, &rsd->key, rsd);
820
821 out:
822 pthread_mutex_unlock(&cache->rsds.lock);
823 return rsd->address;
824 }
825 #endif
826
827 static struct pan_blitter_views
pan_preload_get_views(const struct pan_fb_info * fb,bool zs,struct pan_image_view * patched_s)828 pan_preload_get_views(const struct pan_fb_info *fb, bool zs,
829 struct pan_image_view *patched_s)
830 {
831 struct pan_blitter_views views = {0};
832
833 if (zs) {
834 if (fb->zs.preload.z)
835 views.src_z = views.dst_z = fb->zs.view.zs;
836
837 if (fb->zs.preload.s) {
838 const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
839 enum pipe_format fmt = util_format_get_depth_only(view->format);
840
841 switch (view->format) {
842 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
843 fmt = PIPE_FORMAT_X24S8_UINT;
844 break;
845 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
846 fmt = PIPE_FORMAT_X32_S8X24_UINT;
847 break;
848 default:
849 fmt = view->format;
850 break;
851 }
852
853 if (fmt != view->format) {
854 *patched_s = *view;
855 patched_s->format = fmt;
856 views.src_s = views.dst_s = patched_s;
857 } else {
858 views.src_s = views.dst_s = view;
859 }
860 }
861 } else {
862 for (unsigned i = 0; i < fb->rt_count; i++) {
863 if (fb->rts[i].preload) {
864 views.src_rts[i] = fb->rts[i].view;
865 views.dst_rts[i] = fb->rts[i].view;
866 }
867 }
868
869 views.rt_count = fb->rt_count;
870 }
871
872 return views;
873 }
874
875 static bool
pan_preload_needed(const struct pan_fb_info * fb,bool zs)876 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
877 {
878 if (zs) {
879 if (fb->zs.preload.z || fb->zs.preload.s)
880 return true;
881 } else {
882 for (unsigned i = 0; i < fb->rt_count; i++) {
883 if (fb->rts[i].preload)
884 return true;
885 }
886 }
887
888 return false;
889 }
890
891 static mali_ptr
pan_blitter_emit_varying(struct pan_pool * pool)892 pan_blitter_emit_varying(struct pan_pool *pool)
893 {
894 struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
895
896 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
897 cfg.buffer_index = 0;
898 cfg.offset_enable = PAN_ARCH <= 5;
899 cfg.format =
900 GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_R32G32B32_FLOAT)->hw;
901
902 #if PAN_ARCH >= 9
903 cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
904 cfg.table = PAN_BLIT_TABLE_ATTRIBUTE_BUFFER;
905 cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
906 cfg.stride = 4 * sizeof(float);
907 #endif
908 }
909
910 return varying.gpu;
911 }
912
913 static mali_ptr
pan_blitter_emit_varying_buffer(struct pan_pool * pool,mali_ptr coordinates)914 pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
915 {
916 #if PAN_ARCH >= 9
917 struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
918
919 pan_pack(varying_buffer.cpu, BUFFER, cfg) {
920 cfg.address = coordinates;
921 cfg.size = 4 * sizeof(float) * 4;
922 }
923 #else
924 /* Bifrost needs an empty desc to mark end of prefetching */
925 bool padding_buffer = PAN_ARCH >= 6;
926
927 struct panfrost_ptr varying_buffer = pan_pool_alloc_desc_array(
928 pool, (padding_buffer ? 2 : 1), ATTRIBUTE_BUFFER);
929
930 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
931 cfg.pointer = coordinates;
932 cfg.stride = 4 * sizeof(float);
933 cfg.size = cfg.stride * 4;
934 }
935
936 if (padding_buffer) {
937 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
938 ATTRIBUTE_BUFFER, cfg)
939 ;
940 }
941 #endif
942
943 return varying_buffer.gpu;
944 }
945
946 static mali_ptr
pan_blitter_emit_sampler(struct pan_pool * pool,bool nearest_filter)947 pan_blitter_emit_sampler(struct pan_pool *pool, bool nearest_filter)
948 {
949 struct panfrost_ptr sampler = pan_pool_alloc_desc(pool, SAMPLER);
950
951 pan_pack(sampler.cpu, SAMPLER, cfg) {
952 cfg.seamless_cube_map = false;
953 cfg.normalized_coordinates = false;
954 cfg.minify_nearest = nearest_filter;
955 cfg.magnify_nearest = nearest_filter;
956 }
957
958 return sampler.gpu;
959 }
960
961 static mali_ptr
pan_blitter_emit_textures(struct pan_pool * pool,unsigned tex_count,const struct pan_image_view ** views)962 pan_blitter_emit_textures(struct pan_pool *pool, unsigned tex_count,
963 const struct pan_image_view **views)
964 {
965 #if PAN_ARCH >= 6
966 struct panfrost_ptr textures =
967 pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
968
969 for (unsigned i = 0; i < tex_count; i++) {
970 void *texture = textures.cpu + (pan_size(TEXTURE) * i);
971 size_t payload_size =
972 GENX(panfrost_estimate_texture_payload_size)(views[i]);
973 struct panfrost_ptr surfaces =
974 pan_pool_alloc_aligned(pool, payload_size, 64);
975
976 GENX(panfrost_new_texture)(views[i], texture, &surfaces);
977 }
978
979 return textures.gpu;
980 #else
981 mali_ptr textures[8] = {0};
982
983 for (unsigned i = 0; i < tex_count; i++) {
984 size_t sz = pan_size(TEXTURE) +
985 GENX(panfrost_estimate_texture_payload_size)(views[i]);
986 struct panfrost_ptr texture =
987 pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
988 struct panfrost_ptr surfaces = {
989 .cpu = texture.cpu + pan_size(TEXTURE),
990 .gpu = texture.gpu + pan_size(TEXTURE),
991 };
992
993 GENX(panfrost_new_texture)(views[i], texture.cpu, &surfaces);
994 textures[i] = texture.gpu;
995 }
996
997 return pan_pool_upload_aligned(pool, textures, tex_count * sizeof(mali_ptr),
998 sizeof(mali_ptr));
999 #endif
1000 }
1001
1002 static mali_ptr
pan_preload_emit_textures(struct pan_pool * pool,const struct pan_fb_info * fb,bool zs,unsigned * tex_count_out)1003 pan_preload_emit_textures(struct pan_pool *pool, const struct pan_fb_info *fb,
1004 bool zs, unsigned *tex_count_out)
1005 {
1006 const struct pan_image_view *views[8];
1007 struct pan_image_view patched_s_view;
1008 unsigned tex_count = 0;
1009
1010 if (zs) {
1011 if (fb->zs.preload.z)
1012 views[tex_count++] = fb->zs.view.zs;
1013
1014 if (fb->zs.preload.s) {
1015 const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
1016 enum pipe_format fmt = util_format_get_depth_only(view->format);
1017
1018 switch (view->format) {
1019 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1020 fmt = PIPE_FORMAT_X24S8_UINT;
1021 break;
1022 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1023 fmt = PIPE_FORMAT_X32_S8X24_UINT;
1024 break;
1025 default:
1026 fmt = view->format;
1027 break;
1028 }
1029
1030 if (fmt != view->format) {
1031 patched_s_view = *view;
1032 patched_s_view.format = fmt;
1033 view = &patched_s_view;
1034 }
1035 views[tex_count++] = view;
1036 }
1037 } else {
1038 for (unsigned i = 0; i < fb->rt_count; i++) {
1039 if (fb->rts[i].preload)
1040 views[tex_count++] = fb->rts[i].view;
1041 }
1042 }
1043
1044 *tex_count_out = tex_count;
1045
1046 return pan_blitter_emit_textures(pool, tex_count, views);
1047 }
1048
1049 #if PAN_ARCH >= 8
1050 /* TODO: cache */
1051 static mali_ptr
pan_blitter_emit_zs(struct pan_pool * pool,bool z,bool s)1052 pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
1053 {
1054 struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
1055
1056 pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
1057 cfg.depth_function = MALI_FUNC_ALWAYS;
1058 cfg.depth_write_enable = z;
1059
1060 if (z)
1061 cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
1062
1063 cfg.stencil_test_enable = s;
1064 cfg.stencil_from_shader = s;
1065
1066 cfg.front_compare_function = MALI_FUNC_ALWAYS;
1067 cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
1068 cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
1069 cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
1070 cfg.front_write_mask = 0xFF;
1071 cfg.front_value_mask = 0xFF;
1072
1073 cfg.back_compare_function = MALI_FUNC_ALWAYS;
1074 cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
1075 cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
1076 cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
1077 cfg.back_write_mask = 0xFF;
1078 cfg.back_value_mask = 0xFF;
1079
1080 cfg.depth_cull_enable = false;
1081 }
1082
1083 return zsd.gpu;
1084 }
1085 #else
1086 static mali_ptr
pan_blitter_emit_viewport(struct pan_pool * pool,uint16_t minx,uint16_t miny,uint16_t maxx,uint16_t maxy)1087 pan_blitter_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
1088 uint16_t maxx, uint16_t maxy)
1089 {
1090 struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
1091
1092 pan_pack(vp.cpu, VIEWPORT, cfg) {
1093 cfg.scissor_minimum_x = minx;
1094 cfg.scissor_minimum_y = miny;
1095 cfg.scissor_maximum_x = maxx;
1096 cfg.scissor_maximum_y = maxy;
1097 }
1098
1099 return vp.gpu;
1100 }
1101 #endif
1102
1103 static void
pan_preload_emit_dcd(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coordinates,mali_ptr tsd,void * out,bool always_write)1104 pan_preload_emit_dcd(struct pan_blitter_cache *cache,
1105 struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
1106 mali_ptr coordinates, mali_ptr tsd, void *out,
1107 bool always_write)
1108 {
1109 unsigned tex_count = 0;
1110 mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
1111 mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
1112 mali_ptr varyings = pan_blitter_emit_varying(pool);
1113 mali_ptr varying_buffers =
1114 pan_blitter_emit_varying_buffer(pool, coordinates);
1115
1116 /* Tiles updated by blit shaders are still considered clean (separate
1117 * for colour and Z/S), allowing us to suppress unnecessary writeback
1118 */
1119 UNUSED bool clean_fragment_write = !always_write;
1120
1121 /* Image view used when patching stencil formats for combined
1122 * depth/stencil preloads.
1123 */
1124 struct pan_image_view patched_s;
1125
1126 struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
1127
1128 #if PAN_ARCH <= 7
1129 pan_pack(out, DRAW, cfg) {
1130 uint16_t minx = 0, miny = 0, maxx, maxy;
1131
1132 if (PAN_ARCH == 4) {
1133 maxx = fb->width - 1;
1134 maxy = fb->height - 1;
1135 } else {
1136 /* Align on 32x32 tiles */
1137 minx = fb->extent.minx & ~31;
1138 miny = fb->extent.miny & ~31;
1139 maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
1140 maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
1141 }
1142
1143 cfg.thread_storage = tsd;
1144 cfg.state = pan_blitter_get_rsd(cache, &views);
1145
1146 cfg.position = coordinates;
1147 cfg.viewport = pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1148
1149 cfg.varyings = varyings;
1150 cfg.varying_buffers = varying_buffers;
1151 cfg.textures = textures;
1152 cfg.samplers = samplers;
1153
1154 #if PAN_ARCH >= 6
1155 cfg.clean_fragment_write = clean_fragment_write;
1156 #endif
1157 }
1158 #else
1159 struct panfrost_ptr T;
1160 unsigned nr_tables = PAN_BLIT_NUM_RESOURCE_TABLES;
1161
1162 /* Although individual resources need only 16 byte alignment, the
1163 * resource table as a whole must be 64-byte aligned.
1164 */
1165 T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
1166 memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
1167
1168 panfrost_make_resource_table(T, PAN_BLIT_TABLE_TEXTURE, textures, tex_count);
1169 panfrost_make_resource_table(T, PAN_BLIT_TABLE_SAMPLER, samplers, 1);
1170 panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE, varyings, 1);
1171 panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
1172 varying_buffers, 1);
1173
1174 struct pan_blit_shader_key key = pan_blitter_get_key(&views);
1175 const struct pan_blit_shader_data *blit_shader =
1176 pan_blitter_get_blit_shader(cache, &key);
1177
1178 bool z = fb->zs.preload.z;
1179 bool s = fb->zs.preload.s;
1180 bool ms = pan_blitter_is_ms(&views);
1181
1182 struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
1183 pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
1184 cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
1185 cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
1186 cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
1187 cfg.binary = blit_shader->address;
1188 cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
1189 }
1190
1191 unsigned bd_count = views.rt_count;
1192 struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
1193
1194 if (!zs) {
1195 pan_blitter_emit_blends(blit_shader, &views, NULL, blend.cpu);
1196 }
1197
1198 pan_pack(out, DRAW, cfg) {
1199 if (zs) {
1200 /* ZS_EMIT requires late update/kill */
1201 cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
1202 cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
1203 cfg.blend_count = 0;
1204 } else {
1205 /* Skipping ATEST requires forcing Z/S */
1206 cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
1207 cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
1208
1209 cfg.blend = blend.gpu;
1210 cfg.blend_count = bd_count;
1211 cfg.render_target_mask = 0x1;
1212 }
1213
1214 cfg.allow_forward_pixel_to_kill = !zs;
1215 cfg.allow_forward_pixel_to_be_killed = true;
1216 cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
1217 cfg.sample_mask = 0xFFFF;
1218 cfg.multisample_enable = ms;
1219 cfg.evaluate_per_sample = ms;
1220 cfg.maximum_z = 1.0;
1221 cfg.clean_fragment_write = clean_fragment_write;
1222 cfg.shader.resources = T.gpu | nr_tables;
1223 cfg.shader.shader = spd.gpu;
1224 cfg.shader.thread_storage = tsd;
1225 }
1226 #endif
1227 }
1228
1229 #if PAN_ARCH >= 6
1230 static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool * desc_pool,struct pan_fb_info * fb)1231 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1232 struct pan_fb_info *fb)
1233 {
1234 if (fb->bifrost.pre_post.dcds.gpu)
1235 return;
1236
1237 fb->bifrost.pre_post.dcds = pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1238 }
1239
1240 static void
pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1241 pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache *cache,
1242 struct pan_pool *desc_pool,
1243 struct pan_fb_info *fb, bool zs, mali_ptr coords,
1244 mali_ptr tsd)
1245 {
1246 unsigned dcd_idx = zs ? 1 : 0;
1247 pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1248 assert(fb->bifrost.pre_post.dcds.cpu);
1249 void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
1250
1251 /* We only use crc_rt to determine whether to force writes for updating
1252 * the CRCs, so use a conservative tile size (16x16).
1253 */
1254 int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
1255
1256 bool always_write = false;
1257
1258 /* If CRC data is currently invalid and this batch will make it valid,
1259 * write even clean tiles to make sure CRC data is updated. */
1260 if (crc_rt >= 0) {
1261 bool *valid = fb->rts[crc_rt].crc_valid;
1262 bool full = !fb->extent.minx && !fb->extent.miny &&
1263 fb->extent.maxx == (fb->width - 1) &&
1264 fb->extent.maxy == (fb->height - 1);
1265
1266 if (full && !(*valid))
1267 always_write = true;
1268 }
1269
1270 pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
1271 always_write);
1272 if (zs) {
1273 enum pipe_format fmt = fb->zs.view.zs
1274 ? fb->zs.view.zs->planes[0]->layout.format
1275 : fb->zs.view.s->planes[0]->layout.format;
1276 bool always = false;
1277
1278 /* If we're dealing with a combined ZS resource and only one
1279 * component is cleared, we need to reload the whole surface
1280 * because the zs_clean_pixel_write_enable flag is set in that
1281 * case.
1282 */
1283 if (util_format_is_depth_and_stencil(fmt) &&
1284 fb->zs.clear.z != fb->zs.clear.s)
1285 always = true;
1286
1287 /* We could use INTERSECT on Bifrost v7 too, but
1288 * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1289 * buffer one or more tiles ahead, making ZS data immediately
1290 * available for any ZS tests taking place in other shaders.
1291 * Thing's haven't been benchmarked to determine what's
1292 * preferable (saving bandwidth vs having ZS preloaded
1293 * earlier), so let's leave it like that for now.
1294 */
1295 fb->bifrost.pre_post.modes[dcd_idx] =
1296 PAN_ARCH > 6
1297 ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
1298 : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1299 : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1300 } else {
1301 fb->bifrost.pre_post.modes[dcd_idx] =
1302 always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1303 : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1304 }
1305 }
1306 #else
1307 static struct panfrost_ptr
pan_preload_emit_tiler_job(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1308 pan_preload_emit_tiler_job(struct pan_blitter_cache *cache, struct pan_pool *desc_pool,
1309 struct pan_fb_info *fb, bool zs, mali_ptr coords,
1310 mali_ptr tsd)
1311 {
1312 struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
1313
1314 pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
1315 pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
1316
1317 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1318 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1319 cfg.index_count = 4;
1320 cfg.job_task_split = 6;
1321 }
1322
1323 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1324 cfg.constant = 1.0f;
1325 }
1326
1327 void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
1328 panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
1329
1330 return job;
1331 }
1332 #endif
1333
1334 static struct panfrost_ptr
pan_preload_fb_part(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1335 pan_preload_fb_part(struct pan_blitter_cache *cache, struct pan_pool *pool,
1336 struct pan_fb_info *fb, bool zs, mali_ptr coords,
1337 mali_ptr tsd)
1338 {
1339 struct panfrost_ptr job = {0};
1340
1341 #if PAN_ARCH >= 6
1342 pan_preload_emit_pre_frame_dcd(cache, pool, fb, zs, coords, tsd);
1343 #else
1344 job = pan_preload_emit_tiler_job(cache, pool, fb, zs, coords, tsd);
1345 #endif
1346 return job;
1347 }
1348
1349 unsigned
GENX(pan_preload_fb)1350 GENX(pan_preload_fb)(struct pan_blitter_cache *cache, struct pan_pool *pool,
1351 struct pan_fb_info *fb, unsigned layer_idx, mali_ptr tsd,
1352 struct panfrost_ptr *jobs)
1353 {
1354 bool preload_zs = pan_preload_needed(fb, true);
1355 bool preload_rts = pan_preload_needed(fb, false);
1356 mali_ptr coords;
1357
1358 if (!preload_zs && !preload_rts)
1359 return 0;
1360
1361 float rect[] = {
1362 0.0, 0.0, layer_idx, 1.0,
1363 fb->width, 0.0, layer_idx, 1.0,
1364 0.0, fb->height, layer_idx, 1.0,
1365 fb->width, fb->height, layer_idx, 1.0,
1366 };
1367
1368 coords = pan_pool_upload_aligned(pool, rect, sizeof(rect), 64);
1369
1370 unsigned njobs = 0;
1371 if (preload_zs) {
1372 struct panfrost_ptr job =
1373 pan_preload_fb_part(cache, pool, fb, true, coords, tsd);
1374 if (jobs && job.cpu)
1375 jobs[njobs++] = job;
1376 }
1377
1378 if (preload_rts) {
1379 struct panfrost_ptr job =
1380 pan_preload_fb_part(cache, pool, fb, false, coords, tsd);
1381 if (jobs && job.cpu)
1382 jobs[njobs++] = job;
1383 }
1384
1385 return njobs;
1386 }
1387
1388 DERIVE_HASH_TABLE(pan_blit_shader_key);
1389 DERIVE_HASH_TABLE(pan_blit_blend_shader_key);
1390 DERIVE_HASH_TABLE(pan_blit_rsd_key);
1391
1392 static void
pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache * cache)1393 pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache *cache)
1394 {
1395 static const struct pan_blit_shader_key prefill[] = {
1396 {
1397 .surfaces[0] =
1398 {
1399 .loc = FRAG_RESULT_DEPTH,
1400 .type = nir_type_float32,
1401 .dim = MALI_TEXTURE_DIMENSION_2D,
1402 .src_samples = 1,
1403 .dst_samples = 1,
1404 },
1405 },
1406 {
1407 .surfaces[1] =
1408 {
1409 .loc = FRAG_RESULT_STENCIL,
1410 .type = nir_type_uint32,
1411 .dim = MALI_TEXTURE_DIMENSION_2D,
1412 .src_samples = 1,
1413 .dst_samples = 1,
1414 },
1415 },
1416 {
1417 .surfaces[0] =
1418 {
1419 .loc = FRAG_RESULT_DATA0,
1420 .type = nir_type_float32,
1421 .dim = MALI_TEXTURE_DIMENSION_2D,
1422 .src_samples = 1,
1423 .dst_samples = 1,
1424 },
1425 },
1426 };
1427
1428 for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1429 pan_blitter_get_blit_shader(cache, &prefill[i]);
1430 }
1431
1432 void
GENX(pan_blitter_cache_init)1433 GENX(pan_blitter_cache_init)(struct pan_blitter_cache *cache,
1434 unsigned gpu_id,
1435 struct pan_blend_shader_cache *blend_shader_cache,
1436 struct pan_pool *bin_pool,
1437 struct pan_pool *desc_pool)
1438 {
1439 cache->gpu_id = gpu_id;
1440 cache->shaders.blit = pan_blit_shader_key_table_create(NULL);
1441 cache->shaders.blend = pan_blit_blend_shader_key_table_create(NULL);
1442 cache->shaders.pool = bin_pool;
1443 pthread_mutex_init(&cache->shaders.lock, NULL);
1444 pan_blitter_prefill_blit_shader_cache(cache);
1445
1446 cache->rsds.pool = desc_pool;
1447 cache->rsds.rsds = pan_blit_rsd_key_table_create(NULL);
1448 pthread_mutex_init(&cache->rsds.lock, NULL);
1449 cache->blend_shader_cache = blend_shader_cache;
1450 }
1451
1452 void
GENX(pan_blitter_cache_cleanup)1453 GENX(pan_blitter_cache_cleanup)(struct pan_blitter_cache *cache)
1454 {
1455 _mesa_hash_table_destroy(cache->shaders.blit, NULL);
1456 _mesa_hash_table_destroy(cache->shaders.blend, NULL);
1457 pthread_mutex_destroy(&cache->shaders.lock);
1458 _mesa_hash_table_destroy(cache->rsds.rsds, NULL);
1459 pthread_mutex_destroy(&cache->rsds.lock);
1460 }
1461