xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/pan_blitter.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2020-2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <[email protected]>
25  *   Boris Brezillon <[email protected]>
26  */
27 
28 #include "pan_blitter.h"
29 #include <math.h>
30 #include <stdio.h>
31 #include "compiler/nir/nir_builder.h"
32 #include "util/u_math.h"
33 #include "pan_blend.h"
34 #include "pan_desc.h"
35 #include "pan_encoder.h"
36 #include "pan_jc.h"
37 #include "pan_pool.h"
38 #include "pan_shader.h"
39 #include "pan_texture.h"
40 
41 #if PAN_ARCH >= 6
42 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
43  * missing in many cases. We instead use software paths as fallbacks to
44  * implement blits, which are done as TILER jobs. No vertex shader is
45  * necessary since we can supply screen-space coordinates directly.
46  *
47  * This is primarily designed as a fallback for preloads but could be extended
48  * for other clears/blits if needed in the future. */
49 
50 static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)51 blit_type_to_reg_fmt(nir_alu_type in)
52 {
53    switch (in) {
54    case nir_type_float32:
55       return MALI_REGISTER_FILE_FORMAT_F32;
56    case nir_type_int32:
57       return MALI_REGISTER_FILE_FORMAT_I32;
58    case nir_type_uint32:
59       return MALI_REGISTER_FILE_FORMAT_U32;
60    default:
61       unreachable("Invalid blit type");
62    }
63 }
64 #endif
65 
66 /* On Valhall, the driver gives the hardware a table of resource tables.
67  * Resources are addressed as the index of the table together with the index of
68  * the resource within the table. For simplicity, we put one type of resource
69  * in each table and fix the numbering of the tables.
70  *
71  * This numbering is arbitrary.
72  */
73 enum pan_blit_resource_table {
74    PAN_BLIT_TABLE_ATTRIBUTE = 0,
75    PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
76    PAN_BLIT_TABLE_SAMPLER,
77    PAN_BLIT_TABLE_TEXTURE,
78 
79    PAN_BLIT_NUM_RESOURCE_TABLES
80 };
81 
82 struct pan_blit_surface {
83    gl_frag_result loc              : 4;
84    nir_alu_type type               : 8;
85    enum mali_texture_dimension dim : 2;
86    bool array                      : 1;
87    unsigned src_samples            : 5;
88    unsigned dst_samples            : 5;
89 };
90 
91 struct pan_blit_shader_key {
92    struct pan_blit_surface surfaces[8];
93 };
94 
95 struct pan_blit_shader_data {
96    struct pan_blit_shader_key key;
97    struct pan_shader_info info;
98    mali_ptr address;
99    unsigned blend_ret_offsets[8];
100    nir_alu_type blend_types[8];
101 };
102 
103 struct pan_blit_blend_shader_key {
104    enum pipe_format format;
105    nir_alu_type type;
106    unsigned rt         : 3;
107    unsigned nr_samples : 5;
108    unsigned pad        : 24;
109 };
110 
111 struct pan_blit_blend_shader_data {
112    struct pan_blit_blend_shader_key key;
113    mali_ptr address;
114 };
115 
116 struct pan_blit_rsd_key {
117    struct {
118       enum pipe_format format;
119       nir_alu_type type               : 8;
120       unsigned src_samples            : 5;
121       unsigned dst_samples            : 5;
122       enum mali_texture_dimension dim : 2;
123       bool array                      : 1;
124    } rts[8], z, s;
125 };
126 
127 struct pan_blit_rsd_data {
128    struct pan_blit_rsd_key key;
129    mali_ptr address;
130 };
131 
132 #if PAN_ARCH >= 5
133 static void
pan_blitter_emit_blend(unsigned rt,const struct pan_image_view * iview,const struct pan_blit_shader_data * blit_shader,mali_ptr blend_shader,void * out)134 pan_blitter_emit_blend(unsigned rt,
135                        const struct pan_image_view *iview,
136                        const struct pan_blit_shader_data *blit_shader,
137                        mali_ptr blend_shader, void *out)
138 {
139    assert(blend_shader == 0 || PAN_ARCH <= 5);
140 
141    pan_pack(out, BLEND, cfg) {
142       if (!iview) {
143          cfg.enable = false;
144 #if PAN_ARCH >= 6
145          cfg.internal.mode = MALI_BLEND_MODE_OFF;
146 #endif
147          continue;
148       }
149 
150       cfg.round_to_fb_precision = true;
151       cfg.srgb = util_format_is_srgb(iview->format);
152 
153 #if PAN_ARCH >= 6
154       cfg.internal.mode = MALI_BLEND_MODE_OPAQUE;
155 #endif
156 
157       if (!blend_shader) {
158          cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
159          cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
160          cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
161          cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
162          cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
163          cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
164          cfg.equation.color_mask = 0xf;
165 
166 #if PAN_ARCH >= 6
167          nir_alu_type type = blit_shader->key.surfaces[rt].type;
168 
169          cfg.internal.fixed_function.num_comps = 4;
170          cfg.internal.fixed_function.conversion.memory_format = GENX(
171             panfrost_dithered_format_from_pipe_format)(iview->format, false);
172          cfg.internal.fixed_function.conversion.register_format =
173             blit_type_to_reg_fmt(type);
174 
175          cfg.internal.fixed_function.rt = rt;
176 #endif
177       } else {
178 #if PAN_ARCH <= 5
179          cfg.blend_shader = true;
180          cfg.shader_pc = blend_shader;
181 #endif
182       }
183    }
184 }
185 #endif
186 
187 struct pan_blitter_views {
188    unsigned rt_count;
189    const struct pan_image_view *src_rts[8];
190    const struct pan_image_view *dst_rts[8];
191    const struct pan_image_view *src_z;
192    const struct pan_image_view *dst_z;
193    const struct pan_image_view *src_s;
194    const struct pan_image_view *dst_s;
195 };
196 
197 static bool
pan_blitter_is_ms(struct pan_blitter_views * views)198 pan_blitter_is_ms(struct pan_blitter_views *views)
199 {
200    for (unsigned i = 0; i < views->rt_count; i++) {
201       if (views->dst_rts[i]) {
202          if (pan_image_view_get_nr_samples(views->dst_rts[i]) > 1)
203             return true;
204       }
205    }
206 
207    if (views->dst_z && pan_image_view_get_nr_samples(views->dst_z) > 1)
208       return true;
209 
210    if (views->dst_s && pan_image_view_get_nr_samples(views->dst_s) > 1)
211       return true;
212 
213    return false;
214 }
215 
216 #if PAN_ARCH >= 5
217 static void
pan_blitter_emit_blends(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)218 pan_blitter_emit_blends(const struct pan_blit_shader_data *blit_shader,
219                         struct pan_blitter_views *views,
220                         mali_ptr *blend_shaders, void *out)
221 {
222    for (unsigned i = 0; i < MAX2(views->rt_count, 1); ++i) {
223       void *dest = out + pan_size(BLEND) * i;
224       const struct pan_image_view *rt_view = views->dst_rts[i];
225       mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
226 
227       pan_blitter_emit_blend(i, rt_view, blit_shader, blend_shader, dest);
228    }
229 }
230 #endif
231 
232 #if PAN_ARCH <= 7
233 static void
pan_blitter_emit_rsd(const struct pan_blit_shader_data * blit_shader,struct pan_blitter_views * views,mali_ptr * blend_shaders,void * out)234 pan_blitter_emit_rsd(const struct pan_blit_shader_data *blit_shader,
235                      struct pan_blitter_views *views, mali_ptr *blend_shaders,
236                      void *out)
237 {
238    UNUSED bool zs = (views->dst_z || views->dst_s);
239    bool ms = pan_blitter_is_ms(views);
240 
241    pan_pack(out, RENDERER_STATE, cfg) {
242       assert(blit_shader->address);
243       pan_shader_prepare_rsd(&blit_shader->info, blit_shader->address, &cfg);
244 
245       cfg.multisample_misc.sample_mask = 0xFFFF;
246       cfg.multisample_misc.multisample_enable = ms;
247       cfg.multisample_misc.evaluate_per_sample = ms;
248       cfg.multisample_misc.depth_write_mask = views->dst_z != NULL;
249       cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
250 
251       cfg.stencil_mask_misc.stencil_enable = views->dst_s != NULL;
252       cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
253       cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
254       cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
255       cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
256       cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
257       cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
258       cfg.stencil_front.mask = 0xFF;
259       cfg.stencil_back = cfg.stencil_front;
260 
261 #if PAN_ARCH >= 6
262       if (zs) {
263          /* Writing Z/S requires late updates */
264          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
265          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
266       } else {
267          /* Skipping ATEST requires forcing Z/S */
268          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
269          cfg.properties.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
270       }
271 
272       /* However, while shaders writing Z/S can normally be killed, on v6
273        * for frame shaders it can cause GPU timeouts, so only allow colour
274        * blit shaders to be killed. */
275       cfg.properties.allow_forward_pixel_to_kill = !zs;
276 
277       if (PAN_ARCH == 6)
278          cfg.properties.allow_forward_pixel_to_be_killed = !zs;
279 #else
280 
281       mali_ptr blend_shader =
282          blend_shaders
283             ? panfrost_last_nonnull(blend_shaders, MAX2(views->rt_count, 1))
284             : 0;
285 
286       cfg.properties.work_register_count = 4;
287       cfg.properties.force_early_z = !zs;
288       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
289 
290       /* Set even on v5 for erratum workaround */
291 #if PAN_ARCH == 5
292       cfg.legacy_blend_shader = blend_shader;
293 #else
294       cfg.blend_shader = blend_shader;
295       cfg.stencil_mask_misc.write_enable = true;
296       cfg.stencil_mask_misc.dither_disable = true;
297       cfg.multisample_misc.blend_shader = !!blend_shader;
298       cfg.blend_shader = blend_shader;
299       if (!cfg.multisample_misc.blend_shader) {
300          cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
301          cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
302          cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
303          cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
304          cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
305          cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
306          cfg.blend_constant = 0;
307 
308          if (views->dst_rts[0] != NULL) {
309             cfg.stencil_mask_misc.srgb =
310                util_format_is_srgb(views->dst_rts[0]->format);
311             cfg.blend_equation.color_mask = 0xf;
312          }
313       }
314 #endif
315 #endif
316    }
317 
318 #if PAN_ARCH >= 5
319    pan_blitter_emit_blends(blit_shader, views, blend_shaders,
320                            out + pan_size(RENDERER_STATE));
321 #endif
322 }
323 #endif
324 
325 #if PAN_ARCH <= 5
326 static void
pan_blitter_get_blend_shaders(struct pan_blitter_cache * cache,unsigned rt_count,const struct pan_image_view ** rts,const struct pan_blit_shader_data * blit_shader,mali_ptr * blend_shaders)327 pan_blitter_get_blend_shaders(struct pan_blitter_cache *cache,
328                               unsigned rt_count,
329                               const struct pan_image_view **rts,
330                               const struct pan_blit_shader_data *blit_shader,
331                               mali_ptr *blend_shaders)
332 {
333    if (!rt_count)
334       return;
335 
336    struct pan_blend_state blend_state = {
337       .rt_count = rt_count,
338    };
339 
340    for (unsigned i = 0; i < rt_count; i++) {
341       if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
342          continue;
343 
344       struct pan_blit_blend_shader_key key = {
345          .format = rts[i]->format,
346          .rt = i,
347          .nr_samples = pan_image_view_get_nr_samples(rts[i]),
348          .type = blit_shader->blend_types[i],
349       };
350 
351       pthread_mutex_lock(&cache->shaders.lock);
352       struct hash_entry *he =
353          _mesa_hash_table_search(cache->shaders.blend, &key);
354       struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
355       if (blend_shader) {
356          blend_shaders[i] = blend_shader->address;
357          pthread_mutex_unlock(&cache->shaders.lock);
358          continue;
359       }
360 
361       blend_shader =
362          rzalloc(cache->shaders.blend, struct pan_blit_blend_shader_data);
363       blend_shader->key = key;
364 
365       blend_state.rts[i] = (struct pan_blend_rt_state){
366          .format = rts[i]->format,
367          .nr_samples = pan_image_view_get_nr_samples(rts[i]),
368          .equation =
369             {
370                .blend_enable = false,
371                .color_mask = 0xf,
372             },
373       };
374 
375       pthread_mutex_lock(&cache->blend_shader_cache->lock);
376       struct pan_blend_shader_variant *b = GENX(pan_blend_get_shader_locked)(
377          cache->blend_shader_cache, &blend_state, blit_shader->blend_types[i],
378          nir_type_float32, /* unused */
379          i);
380 
381       assert(b->work_reg_count <= 4);
382       struct panfrost_ptr bin =
383          pan_pool_alloc_aligned(cache->shaders.pool, b->binary.size, 64);
384       memcpy(bin.cpu, b->binary.data, b->binary.size);
385 
386       blend_shader->address = bin.gpu | b->first_tag;
387       pthread_mutex_unlock(&cache->blend_shader_cache->lock);
388       _mesa_hash_table_insert(cache->shaders.blend, &blend_shader->key,
389                               blend_shader);
390       pthread_mutex_unlock(&cache->shaders.lock);
391       blend_shaders[i] = blend_shader->address;
392    }
393 }
394 #endif
395 
396 /*
397  * Early Mali GPUs did not respect sampler LOD clamps or bias, so the Midgard
398  * compiler inserts lowering code with a load_sampler_lod_parameters_pan sysval
399  * that we need to lower. Our samplers do not use LOD clamps or bias, so we
400  * lower to the identity settings and let constant folding get rid of the
401  * unnecessary lowering.
402  */
403 static bool
lower_sampler_parameters(nir_builder * b,nir_intrinsic_instr * intr,UNUSED void * data)404 lower_sampler_parameters(nir_builder *b, nir_intrinsic_instr *intr,
405                          UNUSED void *data)
406 {
407    if (intr->intrinsic != nir_intrinsic_load_sampler_lod_parameters_pan)
408       return false;
409 
410    const nir_const_value constants[4] = {
411       nir_const_value_for_float(0.0f, 32),     /* min_lod */
412       nir_const_value_for_float(INFINITY, 32), /* max_lod */
413       nir_const_value_for_float(0.0f, 32),     /* lod_bias */
414    };
415 
416    b->cursor = nir_after_instr(&intr->instr);
417    nir_def_rewrite_uses(&intr->def, nir_build_imm(b, 3, 32, constants));
418    return true;
419 }
420 
421 static uint32_t
sampler_hw_index(uint32_t index)422 sampler_hw_index(uint32_t index)
423 {
424    return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_SAMPLER, index) : index;
425 }
426 
427 static uint32_t
tex_hw_index(uint32_t index)428 tex_hw_index(uint32_t index)
429 {
430    return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_TEXTURE, index) : index;
431 }
432 
433 static uint32_t
attr_hw_index(uint32_t index)434 attr_hw_index(uint32_t index)
435 {
436    return PAN_ARCH >= 9 ? pan_res_handle(PAN_BLIT_TABLE_ATTRIBUTE, index)
437                         : index;
438 }
439 
440 static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct pan_blitter_cache * cache,const struct pan_blit_shader_key * key)441 pan_blitter_get_blit_shader(struct pan_blitter_cache *cache,
442                             const struct pan_blit_shader_key *key)
443 {
444    pthread_mutex_lock(&cache->shaders.lock);
445    struct hash_entry *he =
446       _mesa_hash_table_search(cache->shaders.blit, key);
447    struct pan_blit_shader_data *shader = he ? he->data : NULL;
448 
449    if (shader)
450       goto out;
451 
452    unsigned coord_comps = 0;
453    unsigned sig_offset = 0;
454    char sig[256];
455    bool first = true;
456    for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
457       const char *type_str, *dim_str;
458       if (key->surfaces[i].type == nir_type_invalid)
459          continue;
460 
461       switch (key->surfaces[i].type) {
462       case nir_type_float32:
463          type_str = "float";
464          break;
465       case nir_type_uint32:
466          type_str = "uint";
467          break;
468       case nir_type_int32:
469          type_str = "int";
470          break;
471       default:
472          unreachable("Invalid type\n");
473       }
474 
475       switch (key->surfaces[i].dim) {
476       case MALI_TEXTURE_DIMENSION_CUBE:
477          dim_str = "cube";
478          break;
479       case MALI_TEXTURE_DIMENSION_1D:
480          dim_str = "1D";
481          break;
482       case MALI_TEXTURE_DIMENSION_2D:
483          dim_str = "2D";
484          break;
485       case MALI_TEXTURE_DIMENSION_3D:
486          dim_str = "3D";
487          break;
488       default:
489          unreachable("Invalid dim\n");
490       }
491 
492       coord_comps = MAX2(coord_comps, (key->surfaces[i].dim ?: 3) +
493                                          (key->surfaces[i].array ? 1 : 0));
494 
495       if (sig_offset >= sizeof(sig)) {
496          first = false;
497          continue;
498       }
499 
500       sig_offset +=
501          snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
502                   "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
503                   first ? "" : ",", gl_frag_result_name(key->surfaces[i].loc),
504                   type_str, dim_str, key->surfaces[i].array ? "[]" : "",
505                   key->surfaces[i].src_samples, key->surfaces[i].dst_samples);
506 
507       first = false;
508    }
509 
510    nir_builder b = nir_builder_init_simple_shader(
511       MESA_SHADER_FRAGMENT, GENX(pan_shader_get_compiler_options)(),
512       "pan_blit(%s)", sig);
513 
514    nir_def *barycentric = nir_load_barycentric(
515       &b, nir_intrinsic_load_barycentric_pixel, INTERP_MODE_SMOOTH);
516    nir_def *coord = nir_load_interpolated_input(
517       &b, coord_comps, 32, barycentric, nir_imm_int(&b, 0),
518       .base = attr_hw_index(0), .dest_type = nir_type_float32,
519       .io_semantics.location = VARYING_SLOT_VAR0, .io_semantics.num_slots = 1);
520 
521    unsigned active_count = 0;
522    for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
523       if (key->surfaces[i].type == nir_type_invalid)
524          continue;
525 
526       /* Resolve operations only work for N -> 1 samples. */
527       assert(key->surfaces[i].dst_samples == 1 ||
528              key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
529 
530       bool resolve =
531          key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
532       bool ms = key->surfaces[i].src_samples > 1;
533       enum glsl_sampler_dim sampler_dim;
534 
535       switch (key->surfaces[i].dim) {
536       case MALI_TEXTURE_DIMENSION_1D:
537          sampler_dim = GLSL_SAMPLER_DIM_1D;
538          break;
539       case MALI_TEXTURE_DIMENSION_2D:
540          sampler_dim = ms ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
541          break;
542       case MALI_TEXTURE_DIMENSION_3D:
543          sampler_dim = GLSL_SAMPLER_DIM_3D;
544          break;
545       case MALI_TEXTURE_DIMENSION_CUBE:
546          sampler_dim = GLSL_SAMPLER_DIM_CUBE;
547          break;
548       }
549 
550       nir_def *res = NULL;
551 
552       if (resolve) {
553          /* When resolving a float type, we need to calculate
554           * the average of all samples. For integer resolve, GL
555           * and Vulkan say that one sample should be chosen
556           * without telling which. Let's just pick the first one
557           * in that case.
558           */
559          nir_alu_type base_type =
560             nir_alu_type_get_base_type(key->surfaces[i].type);
561          unsigned nsamples =
562             base_type == nir_type_float ? key->surfaces[i].src_samples : 1;
563 
564          for (unsigned s = 0; s < nsamples; s++) {
565             nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
566 
567             tex->op = nir_texop_txf_ms;
568             tex->dest_type = key->surfaces[i].type;
569             tex->texture_index = tex_hw_index(active_count);
570             tex->sampler_index = sampler_hw_index(0);
571             tex->is_array = key->surfaces[i].array;
572             tex->sampler_dim = sampler_dim;
573 
574             tex->src[0] =
575                nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
576             tex->coord_components = coord_comps;
577 
578             tex->src[1] =
579                nir_tex_src_for_ssa(nir_tex_src_ms_index, nir_imm_int(&b, s));
580 
581             tex->src[2] =
582                nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
583             nir_def_init(&tex->instr, &tex->def, 4, 32);
584             nir_builder_instr_insert(&b, &tex->instr);
585 
586             res = res ? nir_fadd(&b, res, &tex->def) : &tex->def;
587          }
588 
589          if (base_type == nir_type_float)
590             res = nir_fmul_imm(&b, res, 1.0f / nsamples);
591       } else {
592          nir_tex_instr *tex = nir_tex_instr_create(b.shader, ms ? 3 : 1);
593 
594          tex->dest_type = key->surfaces[i].type;
595          tex->texture_index = tex_hw_index(active_count);
596          tex->sampler_index = sampler_hw_index(0);
597          tex->is_array = key->surfaces[i].array;
598          tex->sampler_dim = sampler_dim;
599 
600          if (ms) {
601             tex->op = nir_texop_txf_ms;
602 
603             tex->src[0] =
604                nir_tex_src_for_ssa(nir_tex_src_coord, nir_f2i32(&b, coord));
605             tex->coord_components = coord_comps;
606 
607             tex->src[1] = nir_tex_src_for_ssa(nir_tex_src_ms_index,
608                                               nir_load_sample_id(&b));
609 
610             tex->src[2] =
611                nir_tex_src_for_ssa(nir_tex_src_lod, nir_imm_int(&b, 0));
612          } else {
613             tex->op = nir_texop_txl;
614 
615             tex->src[0] = nir_tex_src_for_ssa(nir_tex_src_coord, coord);
616             tex->coord_components = coord_comps;
617          }
618 
619          nir_def_init(&tex->instr, &tex->def, 4, 32);
620          nir_builder_instr_insert(&b, &tex->instr);
621          res = &tex->def;
622       }
623 
624       assert(res);
625 
626       if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
627          nir_store_output(
628             &b, res, nir_imm_int(&b, 0), .base = active_count,
629             .src_type = key->surfaces[i].type,
630             .io_semantics.location = key->surfaces[i].loc,
631             .io_semantics.num_slots = 1,
632             .write_mask = nir_component_mask(res->num_components));
633       } else {
634          unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
635          nir_store_output(
636             &b, nir_channel(&b, res, c), nir_imm_int(&b, 0),
637             .base = active_count, .src_type = key->surfaces[i].type,
638             .io_semantics.location = key->surfaces[i].loc,
639             .io_semantics.num_slots = 1, .write_mask = nir_component_mask(1));
640       }
641       active_count++;
642    }
643 
644    struct panfrost_compile_inputs inputs = {
645       .gpu_id = cache->gpu_id,
646       .is_blit = true,
647       .no_idvs = true,
648    };
649    struct util_dynarray binary;
650 
651    util_dynarray_init(&binary, NULL);
652 
653    shader = rzalloc(cache->shaders.blit, struct pan_blit_shader_data);
654 
655    nir_shader_gather_info(b.shader, nir_shader_get_entrypoint(b.shader));
656 
657    for (unsigned i = 0; i < active_count; ++i)
658       BITSET_SET(b.shader->info.textures_used, i);
659 
660    pan_shader_preprocess(b.shader, inputs.gpu_id);
661 
662    if (PAN_ARCH == 4) {
663       NIR_PASS_V(b.shader, nir_shader_intrinsics_pass, lower_sampler_parameters,
664                  nir_metadata_control_flow, NULL);
665    }
666 
667    GENX(pan_shader_compile)(b.shader, &inputs, &binary, &shader->info);
668 
669    shader->key = *key;
670    shader->address =
671       pan_pool_upload_aligned(cache->shaders.pool, binary.data,
672                               binary.size, PAN_ARCH >= 6 ? 128 : 64);
673 
674    util_dynarray_fini(&binary);
675    ralloc_free(b.shader);
676 
677 #if PAN_ARCH >= 6
678    for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
679       shader->blend_ret_offsets[i] =
680          shader->info.bifrost.blend[i].return_offset;
681       shader->blend_types[i] = shader->info.bifrost.blend[i].type;
682    }
683 #endif
684 
685    _mesa_hash_table_insert(cache->shaders.blit, &shader->key, shader);
686 
687 out:
688    pthread_mutex_unlock(&cache->shaders.lock);
689    return shader;
690 }
691 
692 static struct pan_blit_shader_key
pan_blitter_get_key(struct pan_blitter_views * views)693 pan_blitter_get_key(struct pan_blitter_views *views)
694 {
695    struct pan_blit_shader_key key = {0};
696 
697    if (views->src_z) {
698       assert(views->dst_z);
699       key.surfaces[0].loc = FRAG_RESULT_DEPTH;
700       key.surfaces[0].type = nir_type_float32;
701       key.surfaces[0].src_samples = pan_image_view_get_nr_samples(views->src_z);
702       key.surfaces[0].dst_samples = pan_image_view_get_nr_samples(views->dst_z);
703       key.surfaces[0].dim = views->src_z->dim;
704       key.surfaces[0].array =
705          views->src_z->first_layer != views->src_z->last_layer;
706    }
707 
708    if (views->src_s) {
709       assert(views->dst_s);
710       key.surfaces[1].loc = FRAG_RESULT_STENCIL;
711       key.surfaces[1].type = nir_type_uint32;
712       key.surfaces[1].src_samples = pan_image_view_get_nr_samples(views->src_s);
713       key.surfaces[1].dst_samples = pan_image_view_get_nr_samples(views->dst_s);
714       key.surfaces[1].dim = views->src_s->dim;
715       key.surfaces[1].array =
716          views->src_s->first_layer != views->src_s->last_layer;
717    }
718 
719    for (unsigned i = 0; i < views->rt_count; i++) {
720       if (!views->src_rts[i])
721          continue;
722 
723       assert(views->dst_rts[i]);
724       key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
725       key.surfaces[i].type =
726          util_format_is_pure_uint(views->src_rts[i]->format) ? nir_type_uint32
727          : util_format_is_pure_sint(views->src_rts[i]->format)
728             ? nir_type_int32
729             : nir_type_float32;
730       key.surfaces[i].src_samples =
731          pan_image_view_get_nr_samples(views->src_rts[i]);
732       key.surfaces[i].dst_samples =
733          pan_image_view_get_nr_samples(views->dst_rts[i]);
734       key.surfaces[i].dim = views->src_rts[i]->dim;
735       key.surfaces[i].array =
736          views->src_rts[i]->first_layer != views->src_rts[i]->last_layer;
737    }
738 
739    return key;
740 }
741 
742 #if PAN_ARCH <= 7
743 static mali_ptr
pan_blitter_get_rsd(struct pan_blitter_cache * cache,struct pan_blitter_views * views)744 pan_blitter_get_rsd(struct pan_blitter_cache *cache,
745                     struct pan_blitter_views *views)
746 {
747    struct pan_blit_rsd_key rsd_key = {0};
748 
749    assert(!views->rt_count || (!views->src_z && !views->src_s));
750 
751    struct pan_blit_shader_key blit_key = pan_blitter_get_key(views);
752 
753    if (views->src_z) {
754       assert(views->dst_z);
755       rsd_key.z.format = views->dst_z->format;
756       rsd_key.z.type = blit_key.surfaces[0].type;
757       rsd_key.z.src_samples = blit_key.surfaces[0].src_samples;
758       rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples;
759       rsd_key.z.dim = blit_key.surfaces[0].dim;
760       rsd_key.z.array = blit_key.surfaces[0].array;
761    }
762 
763    if (views->src_s) {
764       assert(views->dst_s);
765       rsd_key.s.format = views->dst_s->format;
766       rsd_key.s.type = blit_key.surfaces[1].type;
767       rsd_key.s.src_samples = blit_key.surfaces[1].src_samples;
768       rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples;
769       rsd_key.s.dim = blit_key.surfaces[1].dim;
770       rsd_key.s.array = blit_key.surfaces[1].array;
771    }
772 
773    for (unsigned i = 0; i < views->rt_count; i++) {
774       if (!views->src_rts[i])
775          continue;
776 
777       assert(views->dst_rts[i]);
778       rsd_key.rts[i].format = views->dst_rts[i]->format;
779       rsd_key.rts[i].type = blit_key.surfaces[i].type;
780       rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples;
781       rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples;
782       rsd_key.rts[i].dim = blit_key.surfaces[i].dim;
783       rsd_key.rts[i].array = blit_key.surfaces[i].array;
784    }
785 
786    pthread_mutex_lock(&cache->rsds.lock);
787    struct hash_entry *he =
788       _mesa_hash_table_search(cache->rsds.rsds, &rsd_key);
789    struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
790    if (rsd)
791       goto out;
792 
793    rsd = rzalloc(cache->rsds.rsds, struct pan_blit_rsd_data);
794    rsd->key = rsd_key;
795 
796 #if PAN_ARCH == 4
797    struct panfrost_ptr rsd_ptr =
798       pan_pool_alloc_desc(cache->rsds.pool, RENDERER_STATE);
799 #else
800    unsigned bd_count = PAN_ARCH >= 5 ? MAX2(views->rt_count, 1) : 0;
801    struct panfrost_ptr rsd_ptr = pan_pool_alloc_desc_aggregate(
802       cache->rsds.pool, PAN_DESC(RENDERER_STATE),
803       PAN_DESC_ARRAY(bd_count, BLEND));
804 #endif
805 
806    mali_ptr blend_shaders[8] = {0};
807 
808    const struct pan_blit_shader_data *blit_shader =
809       pan_blitter_get_blit_shader(cache, &blit_key);
810 
811 #if PAN_ARCH <= 5
812    pan_blitter_get_blend_shaders(cache,
813                                  views->rt_count, views->dst_rts, blit_shader,
814                                  blend_shaders);
815 #endif
816 
817    pan_blitter_emit_rsd(blit_shader, views, blend_shaders, rsd_ptr.cpu);
818    rsd->address = rsd_ptr.gpu;
819    _mesa_hash_table_insert(cache->rsds.rsds, &rsd->key, rsd);
820 
821 out:
822    pthread_mutex_unlock(&cache->rsds.lock);
823    return rsd->address;
824 }
825 #endif
826 
827 static struct pan_blitter_views
pan_preload_get_views(const struct pan_fb_info * fb,bool zs,struct pan_image_view * patched_s)828 pan_preload_get_views(const struct pan_fb_info *fb, bool zs,
829                       struct pan_image_view *patched_s)
830 {
831    struct pan_blitter_views views = {0};
832 
833    if (zs) {
834       if (fb->zs.preload.z)
835          views.src_z = views.dst_z = fb->zs.view.zs;
836 
837       if (fb->zs.preload.s) {
838          const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
839          enum pipe_format fmt = util_format_get_depth_only(view->format);
840 
841          switch (view->format) {
842          case PIPE_FORMAT_Z24_UNORM_S8_UINT:
843             fmt = PIPE_FORMAT_X24S8_UINT;
844             break;
845          case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
846             fmt = PIPE_FORMAT_X32_S8X24_UINT;
847             break;
848          default:
849             fmt = view->format;
850             break;
851          }
852 
853          if (fmt != view->format) {
854             *patched_s = *view;
855             patched_s->format = fmt;
856             views.src_s = views.dst_s = patched_s;
857          } else {
858             views.src_s = views.dst_s = view;
859          }
860       }
861    } else {
862       for (unsigned i = 0; i < fb->rt_count; i++) {
863          if (fb->rts[i].preload) {
864             views.src_rts[i] = fb->rts[i].view;
865             views.dst_rts[i] = fb->rts[i].view;
866          }
867       }
868 
869       views.rt_count = fb->rt_count;
870    }
871 
872    return views;
873 }
874 
875 static bool
pan_preload_needed(const struct pan_fb_info * fb,bool zs)876 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
877 {
878    if (zs) {
879       if (fb->zs.preload.z || fb->zs.preload.s)
880          return true;
881    } else {
882       for (unsigned i = 0; i < fb->rt_count; i++) {
883          if (fb->rts[i].preload)
884             return true;
885       }
886    }
887 
888    return false;
889 }
890 
891 static mali_ptr
pan_blitter_emit_varying(struct pan_pool * pool)892 pan_blitter_emit_varying(struct pan_pool *pool)
893 {
894    struct panfrost_ptr varying = pan_pool_alloc_desc(pool, ATTRIBUTE);
895 
896    pan_pack(varying.cpu, ATTRIBUTE, cfg) {
897       cfg.buffer_index = 0;
898       cfg.offset_enable = PAN_ARCH <= 5;
899       cfg.format =
900          GENX(panfrost_format_from_pipe_format)(PIPE_FORMAT_R32G32B32_FLOAT)->hw;
901 
902 #if PAN_ARCH >= 9
903       cfg.attribute_type = MALI_ATTRIBUTE_TYPE_1D;
904       cfg.table = PAN_BLIT_TABLE_ATTRIBUTE_BUFFER;
905       cfg.frequency = MALI_ATTRIBUTE_FREQUENCY_VERTEX;
906       cfg.stride = 4 * sizeof(float);
907 #endif
908    }
909 
910    return varying.gpu;
911 }
912 
913 static mali_ptr
pan_blitter_emit_varying_buffer(struct pan_pool * pool,mali_ptr coordinates)914 pan_blitter_emit_varying_buffer(struct pan_pool *pool, mali_ptr coordinates)
915 {
916 #if PAN_ARCH >= 9
917    struct panfrost_ptr varying_buffer = pan_pool_alloc_desc(pool, BUFFER);
918 
919    pan_pack(varying_buffer.cpu, BUFFER, cfg) {
920       cfg.address = coordinates;
921       cfg.size = 4 * sizeof(float) * 4;
922    }
923 #else
924    /* Bifrost needs an empty desc to mark end of prefetching */
925    bool padding_buffer = PAN_ARCH >= 6;
926 
927    struct panfrost_ptr varying_buffer = pan_pool_alloc_desc_array(
928       pool, (padding_buffer ? 2 : 1), ATTRIBUTE_BUFFER);
929 
930    pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
931       cfg.pointer = coordinates;
932       cfg.stride = 4 * sizeof(float);
933       cfg.size = cfg.stride * 4;
934    }
935 
936    if (padding_buffer) {
937       pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
938                ATTRIBUTE_BUFFER, cfg)
939          ;
940    }
941 #endif
942 
943    return varying_buffer.gpu;
944 }
945 
946 static mali_ptr
pan_blitter_emit_sampler(struct pan_pool * pool,bool nearest_filter)947 pan_blitter_emit_sampler(struct pan_pool *pool, bool nearest_filter)
948 {
949    struct panfrost_ptr sampler = pan_pool_alloc_desc(pool, SAMPLER);
950 
951    pan_pack(sampler.cpu, SAMPLER, cfg) {
952       cfg.seamless_cube_map = false;
953       cfg.normalized_coordinates = false;
954       cfg.minify_nearest = nearest_filter;
955       cfg.magnify_nearest = nearest_filter;
956    }
957 
958    return sampler.gpu;
959 }
960 
961 static mali_ptr
pan_blitter_emit_textures(struct pan_pool * pool,unsigned tex_count,const struct pan_image_view ** views)962 pan_blitter_emit_textures(struct pan_pool *pool, unsigned tex_count,
963                           const struct pan_image_view **views)
964 {
965 #if PAN_ARCH >= 6
966    struct panfrost_ptr textures =
967       pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
968 
969    for (unsigned i = 0; i < tex_count; i++) {
970       void *texture = textures.cpu + (pan_size(TEXTURE) * i);
971       size_t payload_size =
972          GENX(panfrost_estimate_texture_payload_size)(views[i]);
973       struct panfrost_ptr surfaces =
974          pan_pool_alloc_aligned(pool, payload_size, 64);
975 
976       GENX(panfrost_new_texture)(views[i], texture, &surfaces);
977    }
978 
979    return textures.gpu;
980 #else
981    mali_ptr textures[8] = {0};
982 
983    for (unsigned i = 0; i < tex_count; i++) {
984       size_t sz = pan_size(TEXTURE) +
985                   GENX(panfrost_estimate_texture_payload_size)(views[i]);
986       struct panfrost_ptr texture =
987          pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
988       struct panfrost_ptr surfaces = {
989          .cpu = texture.cpu + pan_size(TEXTURE),
990          .gpu = texture.gpu + pan_size(TEXTURE),
991       };
992 
993       GENX(panfrost_new_texture)(views[i], texture.cpu, &surfaces);
994       textures[i] = texture.gpu;
995    }
996 
997    return pan_pool_upload_aligned(pool, textures, tex_count * sizeof(mali_ptr),
998                                   sizeof(mali_ptr));
999 #endif
1000 }
1001 
1002 static mali_ptr
pan_preload_emit_textures(struct pan_pool * pool,const struct pan_fb_info * fb,bool zs,unsigned * tex_count_out)1003 pan_preload_emit_textures(struct pan_pool *pool, const struct pan_fb_info *fb,
1004                           bool zs, unsigned *tex_count_out)
1005 {
1006    const struct pan_image_view *views[8];
1007    struct pan_image_view patched_s_view;
1008    unsigned tex_count = 0;
1009 
1010    if (zs) {
1011       if (fb->zs.preload.z)
1012          views[tex_count++] = fb->zs.view.zs;
1013 
1014       if (fb->zs.preload.s) {
1015          const struct pan_image_view *view = fb->zs.view.s ?: fb->zs.view.zs;
1016          enum pipe_format fmt = util_format_get_depth_only(view->format);
1017 
1018          switch (view->format) {
1019          case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1020             fmt = PIPE_FORMAT_X24S8_UINT;
1021             break;
1022          case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1023             fmt = PIPE_FORMAT_X32_S8X24_UINT;
1024             break;
1025          default:
1026             fmt = view->format;
1027             break;
1028          }
1029 
1030          if (fmt != view->format) {
1031             patched_s_view = *view;
1032             patched_s_view.format = fmt;
1033             view = &patched_s_view;
1034          }
1035          views[tex_count++] = view;
1036       }
1037    } else {
1038       for (unsigned i = 0; i < fb->rt_count; i++) {
1039          if (fb->rts[i].preload)
1040             views[tex_count++] = fb->rts[i].view;
1041       }
1042    }
1043 
1044    *tex_count_out = tex_count;
1045 
1046    return pan_blitter_emit_textures(pool, tex_count, views);
1047 }
1048 
1049 #if PAN_ARCH >= 8
1050 /* TODO: cache */
1051 static mali_ptr
pan_blitter_emit_zs(struct pan_pool * pool,bool z,bool s)1052 pan_blitter_emit_zs(struct pan_pool *pool, bool z, bool s)
1053 {
1054    struct panfrost_ptr zsd = pan_pool_alloc_desc(pool, DEPTH_STENCIL);
1055 
1056    pan_pack(zsd.cpu, DEPTH_STENCIL, cfg) {
1057       cfg.depth_function = MALI_FUNC_ALWAYS;
1058       cfg.depth_write_enable = z;
1059 
1060       if (z)
1061          cfg.depth_source = MALI_DEPTH_SOURCE_SHADER;
1062 
1063       cfg.stencil_test_enable = s;
1064       cfg.stencil_from_shader = s;
1065 
1066       cfg.front_compare_function = MALI_FUNC_ALWAYS;
1067       cfg.front_stencil_fail = MALI_STENCIL_OP_REPLACE;
1068       cfg.front_depth_fail = MALI_STENCIL_OP_REPLACE;
1069       cfg.front_depth_pass = MALI_STENCIL_OP_REPLACE;
1070       cfg.front_write_mask = 0xFF;
1071       cfg.front_value_mask = 0xFF;
1072 
1073       cfg.back_compare_function = MALI_FUNC_ALWAYS;
1074       cfg.back_stencil_fail = MALI_STENCIL_OP_REPLACE;
1075       cfg.back_depth_fail = MALI_STENCIL_OP_REPLACE;
1076       cfg.back_depth_pass = MALI_STENCIL_OP_REPLACE;
1077       cfg.back_write_mask = 0xFF;
1078       cfg.back_value_mask = 0xFF;
1079 
1080       cfg.depth_cull_enable = false;
1081    }
1082 
1083    return zsd.gpu;
1084 }
1085 #else
1086 static mali_ptr
pan_blitter_emit_viewport(struct pan_pool * pool,uint16_t minx,uint16_t miny,uint16_t maxx,uint16_t maxy)1087 pan_blitter_emit_viewport(struct pan_pool *pool, uint16_t minx, uint16_t miny,
1088                           uint16_t maxx, uint16_t maxy)
1089 {
1090    struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
1091 
1092    pan_pack(vp.cpu, VIEWPORT, cfg) {
1093       cfg.scissor_minimum_x = minx;
1094       cfg.scissor_minimum_y = miny;
1095       cfg.scissor_maximum_x = maxx;
1096       cfg.scissor_maximum_y = maxy;
1097    }
1098 
1099    return vp.gpu;
1100 }
1101 #endif
1102 
1103 static void
pan_preload_emit_dcd(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coordinates,mali_ptr tsd,void * out,bool always_write)1104 pan_preload_emit_dcd(struct pan_blitter_cache *cache,
1105                      struct pan_pool *pool, struct pan_fb_info *fb, bool zs,
1106                      mali_ptr coordinates, mali_ptr tsd, void *out,
1107                      bool always_write)
1108 {
1109    unsigned tex_count = 0;
1110    mali_ptr textures = pan_preload_emit_textures(pool, fb, zs, &tex_count);
1111    mali_ptr samplers = pan_blitter_emit_sampler(pool, true);
1112    mali_ptr varyings = pan_blitter_emit_varying(pool);
1113    mali_ptr varying_buffers =
1114       pan_blitter_emit_varying_buffer(pool, coordinates);
1115 
1116    /* Tiles updated by blit shaders are still considered clean (separate
1117     * for colour and Z/S), allowing us to suppress unnecessary writeback
1118     */
1119    UNUSED bool clean_fragment_write = !always_write;
1120 
1121    /* Image view used when patching stencil formats for combined
1122     * depth/stencil preloads.
1123     */
1124    struct pan_image_view patched_s;
1125 
1126    struct pan_blitter_views views = pan_preload_get_views(fb, zs, &patched_s);
1127 
1128 #if PAN_ARCH <= 7
1129    pan_pack(out, DRAW, cfg) {
1130       uint16_t minx = 0, miny = 0, maxx, maxy;
1131 
1132       if (PAN_ARCH == 4) {
1133          maxx = fb->width - 1;
1134          maxy = fb->height - 1;
1135       } else {
1136          /* Align on 32x32 tiles */
1137          minx = fb->extent.minx & ~31;
1138          miny = fb->extent.miny & ~31;
1139          maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
1140          maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
1141       }
1142 
1143       cfg.thread_storage = tsd;
1144       cfg.state = pan_blitter_get_rsd(cache, &views);
1145 
1146       cfg.position = coordinates;
1147       cfg.viewport = pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1148 
1149       cfg.varyings = varyings;
1150       cfg.varying_buffers = varying_buffers;
1151       cfg.textures = textures;
1152       cfg.samplers = samplers;
1153 
1154 #if PAN_ARCH >= 6
1155       cfg.clean_fragment_write = clean_fragment_write;
1156 #endif
1157    }
1158 #else
1159    struct panfrost_ptr T;
1160    unsigned nr_tables = PAN_BLIT_NUM_RESOURCE_TABLES;
1161 
1162    /* Although individual resources need only 16 byte alignment, the
1163     * resource table as a whole must be 64-byte aligned.
1164     */
1165    T = pan_pool_alloc_aligned(pool, nr_tables * pan_size(RESOURCE), 64);
1166    memset(T.cpu, 0, nr_tables * pan_size(RESOURCE));
1167 
1168    panfrost_make_resource_table(T, PAN_BLIT_TABLE_TEXTURE, textures, tex_count);
1169    panfrost_make_resource_table(T, PAN_BLIT_TABLE_SAMPLER, samplers, 1);
1170    panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE, varyings, 1);
1171    panfrost_make_resource_table(T, PAN_BLIT_TABLE_ATTRIBUTE_BUFFER,
1172                                 varying_buffers, 1);
1173 
1174    struct pan_blit_shader_key key = pan_blitter_get_key(&views);
1175    const struct pan_blit_shader_data *blit_shader =
1176       pan_blitter_get_blit_shader(cache, &key);
1177 
1178    bool z = fb->zs.preload.z;
1179    bool s = fb->zs.preload.s;
1180    bool ms = pan_blitter_is_ms(&views);
1181 
1182    struct panfrost_ptr spd = pan_pool_alloc_desc(pool, SHADER_PROGRAM);
1183    pan_pack(spd.cpu, SHADER_PROGRAM, cfg) {
1184       cfg.stage = MALI_SHADER_STAGE_FRAGMENT;
1185       cfg.fragment_coverage_bitmask_type = MALI_COVERAGE_BITMASK_TYPE_GL;
1186       cfg.register_allocation = MALI_SHADER_REGISTER_ALLOCATION_32_PER_THREAD;
1187       cfg.binary = blit_shader->address;
1188       cfg.preload.r48_r63 = blit_shader->info.preload >> 48;
1189    }
1190 
1191    unsigned bd_count = views.rt_count;
1192    struct panfrost_ptr blend = pan_pool_alloc_desc_array(pool, bd_count, BLEND);
1193 
1194    if (!zs) {
1195       pan_blitter_emit_blends(blit_shader, &views, NULL, blend.cpu);
1196    }
1197 
1198    pan_pack(out, DRAW, cfg) {
1199       if (zs) {
1200          /* ZS_EMIT requires late update/kill */
1201          cfg.zs_update_operation = MALI_PIXEL_KILL_FORCE_LATE;
1202          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_LATE;
1203          cfg.blend_count = 0;
1204       } else {
1205          /* Skipping ATEST requires forcing Z/S */
1206          cfg.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
1207          cfg.pixel_kill_operation = MALI_PIXEL_KILL_FORCE_EARLY;
1208 
1209          cfg.blend = blend.gpu;
1210          cfg.blend_count = bd_count;
1211          cfg.render_target_mask = 0x1;
1212       }
1213 
1214       cfg.allow_forward_pixel_to_kill = !zs;
1215       cfg.allow_forward_pixel_to_be_killed = true;
1216       cfg.depth_stencil = pan_blitter_emit_zs(pool, z, s);
1217       cfg.sample_mask = 0xFFFF;
1218       cfg.multisample_enable = ms;
1219       cfg.evaluate_per_sample = ms;
1220       cfg.maximum_z = 1.0;
1221       cfg.clean_fragment_write = clean_fragment_write;
1222       cfg.shader.resources = T.gpu | nr_tables;
1223       cfg.shader.shader = spd.gpu;
1224       cfg.shader.thread_storage = tsd;
1225    }
1226 #endif
1227 }
1228 
1229 #if PAN_ARCH >= 6
1230 static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool * desc_pool,struct pan_fb_info * fb)1231 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1232                                    struct pan_fb_info *fb)
1233 {
1234    if (fb->bifrost.pre_post.dcds.gpu)
1235       return;
1236 
1237    fb->bifrost.pre_post.dcds = pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1238 }
1239 
1240 static void
pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1241 pan_preload_emit_pre_frame_dcd(struct pan_blitter_cache *cache,
1242                                struct pan_pool *desc_pool,
1243                                struct pan_fb_info *fb, bool zs, mali_ptr coords,
1244                                mali_ptr tsd)
1245 {
1246    unsigned dcd_idx = zs ? 1 : 0;
1247    pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1248    assert(fb->bifrost.pre_post.dcds.cpu);
1249    void *dcd = fb->bifrost.pre_post.dcds.cpu + (dcd_idx * pan_size(DRAW));
1250 
1251    /* We only use crc_rt to determine whether to force writes for updating
1252     * the CRCs, so use a conservative tile size (16x16).
1253     */
1254    int crc_rt = GENX(pan_select_crc_rt)(fb, 16 * 16);
1255 
1256    bool always_write = false;
1257 
1258    /* If CRC data is currently invalid and this batch will make it valid,
1259     * write even clean tiles to make sure CRC data is updated. */
1260    if (crc_rt >= 0) {
1261       bool *valid = fb->rts[crc_rt].crc_valid;
1262       bool full = !fb->extent.minx && !fb->extent.miny &&
1263                   fb->extent.maxx == (fb->width - 1) &&
1264                   fb->extent.maxy == (fb->height - 1);
1265 
1266       if (full && !(*valid))
1267          always_write = true;
1268    }
1269 
1270    pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd, dcd,
1271                         always_write);
1272    if (zs) {
1273       enum pipe_format fmt = fb->zs.view.zs
1274                                 ? fb->zs.view.zs->planes[0]->layout.format
1275                                 : fb->zs.view.s->planes[0]->layout.format;
1276       bool always = false;
1277 
1278       /* If we're dealing with a combined ZS resource and only one
1279        * component is cleared, we need to reload the whole surface
1280        * because the zs_clean_pixel_write_enable flag is set in that
1281        * case.
1282        */
1283       if (util_format_is_depth_and_stencil(fmt) &&
1284           fb->zs.clear.z != fb->zs.clear.s)
1285          always = true;
1286 
1287       /* We could use INTERSECT on Bifrost v7 too, but
1288        * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1289        * buffer one or more tiles ahead, making ZS data immediately
1290        * available for any ZS tests taking place in other shaders.
1291        * Thing's haven't been benchmarked to determine what's
1292        * preferable (saving bandwidth vs having ZS preloaded
1293        * earlier), so let's leave it like that for now.
1294        */
1295       fb->bifrost.pre_post.modes[dcd_idx] =
1296          PAN_ARCH > 6
1297             ? MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS
1298          : always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1299                   : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1300    } else {
1301       fb->bifrost.pre_post.modes[dcd_idx] =
1302          always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS
1303                       : MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1304    }
1305 }
1306 #else
1307 static struct panfrost_ptr
pan_preload_emit_tiler_job(struct pan_blitter_cache * cache,struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1308 pan_preload_emit_tiler_job(struct pan_blitter_cache *cache, struct pan_pool *desc_pool,
1309                            struct pan_fb_info *fb, bool zs, mali_ptr coords,
1310                            mali_ptr tsd)
1311 {
1312    struct panfrost_ptr job = pan_pool_alloc_desc(desc_pool, TILER_JOB);
1313 
1314    pan_preload_emit_dcd(cache, desc_pool, fb, zs, coords, tsd,
1315                         pan_section_ptr(job.cpu, TILER_JOB, DRAW), false);
1316 
1317    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1318       cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1319       cfg.index_count = 4;
1320       cfg.job_task_split = 6;
1321    }
1322 
1323    pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1324       cfg.constant = 1.0f;
1325    }
1326 
1327    void *invoc = pan_section_ptr(job.cpu, TILER_JOB, INVOCATION);
1328    panfrost_pack_work_groups_compute(invoc, 1, 4, 1, 1, 1, 1, true, false);
1329 
1330    return job;
1331 }
1332 #endif
1333 
1334 static struct panfrost_ptr
pan_preload_fb_part(struct pan_blitter_cache * cache,struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd)1335 pan_preload_fb_part(struct pan_blitter_cache *cache, struct pan_pool *pool,
1336                     struct pan_fb_info *fb, bool zs, mali_ptr coords,
1337                     mali_ptr tsd)
1338 {
1339    struct panfrost_ptr job = {0};
1340 
1341 #if PAN_ARCH >= 6
1342    pan_preload_emit_pre_frame_dcd(cache, pool, fb, zs, coords, tsd);
1343 #else
1344    job = pan_preload_emit_tiler_job(cache, pool, fb, zs, coords, tsd);
1345 #endif
1346    return job;
1347 }
1348 
1349 unsigned
GENX(pan_preload_fb)1350 GENX(pan_preload_fb)(struct pan_blitter_cache *cache, struct pan_pool *pool,
1351                      struct pan_fb_info *fb, unsigned layer_idx, mali_ptr tsd,
1352                      struct panfrost_ptr *jobs)
1353 {
1354    bool preload_zs = pan_preload_needed(fb, true);
1355    bool preload_rts = pan_preload_needed(fb, false);
1356    mali_ptr coords;
1357 
1358    if (!preload_zs && !preload_rts)
1359       return 0;
1360 
1361    float rect[] = {
1362       0.0,       0.0,        layer_idx, 1.0,
1363       fb->width, 0.0,        layer_idx, 1.0,
1364       0.0,       fb->height, layer_idx, 1.0,
1365       fb->width, fb->height, layer_idx, 1.0,
1366    };
1367 
1368    coords = pan_pool_upload_aligned(pool, rect, sizeof(rect), 64);
1369 
1370    unsigned njobs = 0;
1371    if (preload_zs) {
1372       struct panfrost_ptr job =
1373          pan_preload_fb_part(cache, pool, fb, true, coords, tsd);
1374       if (jobs && job.cpu)
1375          jobs[njobs++] = job;
1376    }
1377 
1378    if (preload_rts) {
1379       struct panfrost_ptr job =
1380          pan_preload_fb_part(cache, pool, fb, false, coords, tsd);
1381       if (jobs && job.cpu)
1382          jobs[njobs++] = job;
1383    }
1384 
1385    return njobs;
1386 }
1387 
1388 DERIVE_HASH_TABLE(pan_blit_shader_key);
1389 DERIVE_HASH_TABLE(pan_blit_blend_shader_key);
1390 DERIVE_HASH_TABLE(pan_blit_rsd_key);
1391 
1392 static void
pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache * cache)1393 pan_blitter_prefill_blit_shader_cache(struct pan_blitter_cache *cache)
1394 {
1395    static const struct pan_blit_shader_key prefill[] = {
1396       {
1397          .surfaces[0] =
1398             {
1399                .loc = FRAG_RESULT_DEPTH,
1400                .type = nir_type_float32,
1401                .dim = MALI_TEXTURE_DIMENSION_2D,
1402                .src_samples = 1,
1403                .dst_samples = 1,
1404             },
1405       },
1406       {
1407          .surfaces[1] =
1408             {
1409                .loc = FRAG_RESULT_STENCIL,
1410                .type = nir_type_uint32,
1411                .dim = MALI_TEXTURE_DIMENSION_2D,
1412                .src_samples = 1,
1413                .dst_samples = 1,
1414             },
1415       },
1416       {
1417          .surfaces[0] =
1418             {
1419                .loc = FRAG_RESULT_DATA0,
1420                .type = nir_type_float32,
1421                .dim = MALI_TEXTURE_DIMENSION_2D,
1422                .src_samples = 1,
1423                .dst_samples = 1,
1424             },
1425       },
1426    };
1427 
1428    for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1429       pan_blitter_get_blit_shader(cache, &prefill[i]);
1430 }
1431 
1432 void
GENX(pan_blitter_cache_init)1433 GENX(pan_blitter_cache_init)(struct pan_blitter_cache *cache,
1434                              unsigned gpu_id,
1435                              struct pan_blend_shader_cache *blend_shader_cache,
1436                              struct pan_pool *bin_pool,
1437                              struct pan_pool *desc_pool)
1438 {
1439    cache->gpu_id = gpu_id;
1440    cache->shaders.blit = pan_blit_shader_key_table_create(NULL);
1441    cache->shaders.blend = pan_blit_blend_shader_key_table_create(NULL);
1442    cache->shaders.pool = bin_pool;
1443    pthread_mutex_init(&cache->shaders.lock, NULL);
1444    pan_blitter_prefill_blit_shader_cache(cache);
1445 
1446    cache->rsds.pool = desc_pool;
1447    cache->rsds.rsds = pan_blit_rsd_key_table_create(NULL);
1448    pthread_mutex_init(&cache->rsds.lock, NULL);
1449    cache->blend_shader_cache = blend_shader_cache;
1450 }
1451 
1452 void
GENX(pan_blitter_cache_cleanup)1453 GENX(pan_blitter_cache_cleanup)(struct pan_blitter_cache *cache)
1454 {
1455    _mesa_hash_table_destroy(cache->shaders.blit, NULL);
1456    _mesa_hash_table_destroy(cache->shaders.blend, NULL);
1457    pthread_mutex_destroy(&cache->shaders.lock);
1458    _mesa_hash_table_destroy(cache->rsds.rsds, NULL);
1459    pthread_mutex_destroy(&cache->rsds.lock);
1460 }
1461