xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/jm/panvk_vX_cmd_draw.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_cmd_meta.h"
19 #include "panvk_device.h"
20 #include "panvk_entrypoints.h"
21 #include "panvk_image.h"
22 #include "panvk_image_view.h"
23 #include "panvk_instance.h"
24 #include "panvk_priv_bo.h"
25 #include "panvk_shader.h"
26 
27 #include "pan_desc.h"
28 #include "pan_earlyzs.h"
29 #include "pan_encoder.h"
30 #include "pan_format.h"
31 #include "pan_jc.h"
32 #include "pan_props.h"
33 #include "pan_shader.h"
34 
35 #include "vk_format.h"
36 #include "vk_meta.h"
37 #include "vk_pipeline_layout.h"
38 
39 struct panvk_draw_info {
40    unsigned first_index;
41    unsigned index_count;
42    unsigned index_size;
43    unsigned first_vertex;
44    unsigned vertex_count;
45    unsigned vertex_range;
46    unsigned padded_vertex_count;
47    unsigned first_instance;
48    unsigned instance_count;
49    int vertex_offset;
50    unsigned offset_start;
51    uint32_t layer_id;
52    struct mali_invocation_packed invocation;
53    struct {
54       mali_ptr varyings;
55       mali_ptr attributes;
56       mali_ptr attribute_bufs;
57    } vs;
58    struct {
59       mali_ptr rsd;
60       mali_ptr varyings;
61    } fs;
62    mali_ptr push_uniforms;
63    mali_ptr varying_bufs;
64    mali_ptr position;
65    mali_ptr indices;
66    union {
67       mali_ptr psiz;
68       float line_width;
69    };
70    mali_ptr tls;
71    mali_ptr fb;
72    const struct pan_tiler_context *tiler_ctx;
73    mali_ptr viewport;
74    struct {
75       struct panfrost_ptr vertex_copy_desc;
76       struct panfrost_ptr frag_copy_desc;
77       union {
78          struct {
79             struct panfrost_ptr vertex;
80             struct panfrost_ptr tiler;
81          };
82          struct panfrost_ptr idvs;
83       };
84    } jobs;
85 };
86 
87 #define is_dirty(__cmdbuf, __name)                                             \
88    BITSET_TEST((__cmdbuf)->vk.dynamic_graphics_state.dirty,                    \
89                MESA_VK_DYNAMIC_##__name)
90 
91 static VkResult
panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)92 panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf,
93                                struct panvk_draw_info *draw)
94 {
95    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
96    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
97 
98    struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
99    struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
100    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
101    struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
102    struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
103    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
104 
105    unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0;
106    if (sysvals->vs.first_vertex != draw->offset_start ||
107        sysvals->vs.base_vertex != base_vertex ||
108        sysvals->vs.base_instance != draw->first_instance ||
109        sysvals->layer_id != draw->layer_id ||
110        sysvals->fs.multisampled != (fbinfo->nr_samples > 1)) {
111       sysvals->vs.first_vertex = draw->offset_start;
112       sysvals->vs.base_vertex = base_vertex;
113       sysvals->vs.base_instance = draw->first_instance;
114       sysvals->layer_id = draw->layer_id;
115       sysvals->fs.multisampled = fbinfo->nr_samples > 1;
116       cmdbuf->state.gfx.push_uniforms = 0;
117    }
118 
119    if (is_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
120       for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++)
121          sysvals->blend.constants[i] =
122             CLAMP(cb->blend_constants[i], 0.0f, 1.0f);
123       cmdbuf->state.gfx.push_uniforms = 0;
124    }
125 
126    if (is_dirty(cmdbuf, VP_VIEWPORTS)) {
127       VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0];
128 
129       /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
130        * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
131        * end of the section, the spec defines:
132        *
133        * px = width
134        * py = height
135        * pz = maxDepth - minDepth
136        */
137       sysvals->viewport.scale.x = 0.5f * viewport->width;
138       sysvals->viewport.scale.y = 0.5f * viewport->height;
139       sysvals->viewport.scale.z = (viewport->maxDepth - viewport->minDepth);
140 
141       /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
142        * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
143        * end of the section, the spec defines:
144        *
145        * ox = x + width/2
146        * oy = y + height/2
147        * oz = minDepth
148        */
149       sysvals->viewport.offset.x = (0.5f * viewport->width) + viewport->x;
150       sysvals->viewport.offset.y = (0.5f * viewport->height) + viewport->y;
151       sysvals->viewport.offset.z = viewport->minDepth;
152       cmdbuf->state.gfx.push_uniforms = 0;
153    }
154 
155    VkResult result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state,
156                                                            vs, vs_desc_state);
157    if (result != VK_SUCCESS)
158       return result;
159 
160    sysvals->desc.vs_dyn_ssbos = vs_desc_state->dyn_ssbos;
161    result = panvk_per_arch(cmd_prepare_dyn_ssbos)(cmdbuf, desc_state, fs,
162                                                   fs_desc_state);
163    if (result != VK_SUCCESS)
164       return result;
165 
166    sysvals->desc.fs_dyn_ssbos = fs_desc_state->dyn_ssbos;
167 
168    for (uint32_t i = 0; i < MAX_SETS; i++) {
169       uint32_t used_set_mask =
170          vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
171 
172       if (used_set_mask & BITFIELD_BIT(i))
173          sysvals->desc.sets[i] = desc_state->sets[i]->descs.dev;
174    }
175 
176    return VK_SUCCESS;
177 }
178 
179 static bool
has_depth_att(struct panvk_cmd_buffer * cmdbuf)180 has_depth_att(struct panvk_cmd_buffer *cmdbuf)
181 {
182    return (cmdbuf->state.gfx.render.bound_attachments &
183            MESA_VK_RP_ATTACHMENT_DEPTH_BIT) != 0;
184 }
185 
186 static bool
has_stencil_att(struct panvk_cmd_buffer * cmdbuf)187 has_stencil_att(struct panvk_cmd_buffer *cmdbuf)
188 {
189    return (cmdbuf->state.gfx.render.bound_attachments &
190            MESA_VK_RP_ATTACHMENT_STENCIL_BIT) != 0;
191 }
192 
193 static bool
writes_depth(struct panvk_cmd_buffer * cmdbuf)194 writes_depth(struct panvk_cmd_buffer *cmdbuf)
195 {
196    const struct vk_depth_stencil_state *ds =
197       &cmdbuf->vk.dynamic_graphics_state.ds;
198 
199    return has_depth_att(cmdbuf) && ds->depth.test_enable &&
200           ds->depth.write_enable && ds->depth.compare_op != VK_COMPARE_OP_NEVER;
201 }
202 
203 static bool
writes_stencil(struct panvk_cmd_buffer * cmdbuf)204 writes_stencil(struct panvk_cmd_buffer *cmdbuf)
205 {
206    const struct vk_depth_stencil_state *ds =
207       &cmdbuf->vk.dynamic_graphics_state.ds;
208 
209    return has_stencil_att(cmdbuf) && ds->stencil.test_enable &&
210           ((ds->stencil.front.write_mask &&
211             (ds->stencil.front.op.fail != VK_STENCIL_OP_KEEP ||
212              ds->stencil.front.op.pass != VK_STENCIL_OP_KEEP ||
213              ds->stencil.front.op.depth_fail != VK_STENCIL_OP_KEEP)) ||
214            (ds->stencil.back.write_mask &&
215             (ds->stencil.back.op.fail != VK_STENCIL_OP_KEEP ||
216              ds->stencil.back.op.pass != VK_STENCIL_OP_KEEP ||
217              ds->stencil.back.op.depth_fail != VK_STENCIL_OP_KEEP)));
218 }
219 
220 static bool
ds_test_always_passes(struct panvk_cmd_buffer * cmdbuf)221 ds_test_always_passes(struct panvk_cmd_buffer *cmdbuf)
222 {
223    const struct vk_depth_stencil_state *ds =
224       &cmdbuf->vk.dynamic_graphics_state.ds;
225 
226    if (!has_depth_att(cmdbuf))
227       return true;
228 
229    if (ds->depth.test_enable && ds->depth.compare_op != VK_COMPARE_OP_ALWAYS)
230       return false;
231 
232    if (ds->stencil.test_enable &&
233        (ds->stencil.front.op.compare != VK_COMPARE_OP_ALWAYS ||
234         ds->stencil.back.op.compare != VK_COMPARE_OP_ALWAYS))
235       return false;
236 
237    return true;
238 }
239 
240 static inline enum mali_func
translate_compare_func(VkCompareOp comp)241 translate_compare_func(VkCompareOp comp)
242 {
243    STATIC_ASSERT(VK_COMPARE_OP_NEVER == (VkCompareOp)MALI_FUNC_NEVER);
244    STATIC_ASSERT(VK_COMPARE_OP_LESS == (VkCompareOp)MALI_FUNC_LESS);
245    STATIC_ASSERT(VK_COMPARE_OP_EQUAL == (VkCompareOp)MALI_FUNC_EQUAL);
246    STATIC_ASSERT(VK_COMPARE_OP_LESS_OR_EQUAL == (VkCompareOp)MALI_FUNC_LEQUAL);
247    STATIC_ASSERT(VK_COMPARE_OP_GREATER == (VkCompareOp)MALI_FUNC_GREATER);
248    STATIC_ASSERT(VK_COMPARE_OP_NOT_EQUAL == (VkCompareOp)MALI_FUNC_NOT_EQUAL);
249    STATIC_ASSERT(VK_COMPARE_OP_GREATER_OR_EQUAL ==
250                  (VkCompareOp)MALI_FUNC_GEQUAL);
251    STATIC_ASSERT(VK_COMPARE_OP_ALWAYS == (VkCompareOp)MALI_FUNC_ALWAYS);
252 
253    return (enum mali_func)comp;
254 }
255 
256 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)257 translate_stencil_op(VkStencilOp in)
258 {
259    switch (in) {
260    case VK_STENCIL_OP_KEEP:
261       return MALI_STENCIL_OP_KEEP;
262    case VK_STENCIL_OP_ZERO:
263       return MALI_STENCIL_OP_ZERO;
264    case VK_STENCIL_OP_REPLACE:
265       return MALI_STENCIL_OP_REPLACE;
266    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
267       return MALI_STENCIL_OP_INCR_SAT;
268    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
269       return MALI_STENCIL_OP_DECR_SAT;
270    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
271       return MALI_STENCIL_OP_INCR_WRAP;
272    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
273       return MALI_STENCIL_OP_DECR_WRAP;
274    case VK_STENCIL_OP_INVERT:
275       return MALI_STENCIL_OP_INVERT;
276    default:
277       unreachable("Invalid stencil op");
278    }
279 }
280 
281 static bool
fs_required(struct panvk_cmd_buffer * cmdbuf)282 fs_required(struct panvk_cmd_buffer *cmdbuf)
283 {
284    const struct pan_shader_info *fs_info =
285       cmdbuf->state.gfx.fs.shader ? &cmdbuf->state.gfx.fs.shader->info : NULL;
286    const struct vk_dynamic_graphics_state *dyns =
287       &cmdbuf->vk.dynamic_graphics_state;
288    const struct vk_color_blend_state *cb = &dyns->cb;
289 
290    if (!fs_info)
291       return false;
292 
293    /* If we generally have side effects */
294    if (fs_info->fs.sidefx)
295       return true;
296 
297    /* If colour is written we need to execute */
298    for (unsigned i = 0; i < cb->attachment_count; ++i) {
299       if ((cb->color_write_enables & BITFIELD_BIT(i)) &&
300           cb->attachments[i].write_mask)
301          return true;
302    }
303 
304    /* If alpha-to-coverage is enabled, we need to run the fragment shader even
305     * if we don't have a color attachment, so depth/stencil updates can be
306     * discarded if alpha, and thus coverage, is 0. */
307    if (dyns->ms.alpha_to_coverage_enable)
308       return true;
309 
310    /* If depth is written and not implied we need to execute.
311     * TODO: Predicate on Z/S writes being enabled */
312    return (fs_info->fs.writes_depth || fs_info->fs.writes_stencil);
313 }
314 
315 static VkResult
panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)316 panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf,
317                           struct panvk_draw_info *draw)
318 {
319    bool dirty =
320       is_dirty(cmdbuf, RS_RASTERIZER_DISCARD_ENABLE) ||
321       is_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE) ||
322       is_dirty(cmdbuf, RS_DEPTH_BIAS_ENABLE) ||
323       is_dirty(cmdbuf, RS_DEPTH_BIAS_FACTORS) ||
324       is_dirty(cmdbuf, CB_LOGIC_OP_ENABLE) || is_dirty(cmdbuf, CB_LOGIC_OP) ||
325       is_dirty(cmdbuf, CB_ATTACHMENT_COUNT) ||
326       is_dirty(cmdbuf, CB_COLOR_WRITE_ENABLES) ||
327       is_dirty(cmdbuf, CB_BLEND_ENABLES) ||
328       is_dirty(cmdbuf, CB_BLEND_EQUATIONS) ||
329       is_dirty(cmdbuf, CB_WRITE_MASKS) ||
330       is_dirty(cmdbuf, CB_BLEND_CONSTANTS) ||
331       is_dirty(cmdbuf, DS_DEPTH_TEST_ENABLE) ||
332       is_dirty(cmdbuf, DS_DEPTH_WRITE_ENABLE) ||
333       is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
334       is_dirty(cmdbuf, DS_DEPTH_COMPARE_OP) ||
335       is_dirty(cmdbuf, DS_STENCIL_TEST_ENABLE) ||
336       is_dirty(cmdbuf, DS_STENCIL_OP) ||
337       is_dirty(cmdbuf, DS_STENCIL_COMPARE_MASK) ||
338       is_dirty(cmdbuf, DS_STENCIL_WRITE_MASK) ||
339       is_dirty(cmdbuf, DS_STENCIL_REFERENCE) ||
340       is_dirty(cmdbuf, MS_RASTERIZATION_SAMPLES) ||
341       is_dirty(cmdbuf, MS_SAMPLE_MASK) ||
342       is_dirty(cmdbuf, MS_ALPHA_TO_COVERAGE_ENABLE) ||
343       is_dirty(cmdbuf, MS_ALPHA_TO_ONE_ENABLE) || !cmdbuf->state.gfx.fs.rsd;
344 
345    if (!dirty) {
346       draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
347       return VK_SUCCESS;
348    }
349 
350    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
351    const struct vk_dynamic_graphics_state *dyns =
352       &cmdbuf->vk.dynamic_graphics_state;
353    const struct vk_rasterization_state *rs = &dyns->rs;
354    const struct vk_color_blend_state *cb = &dyns->cb;
355    const struct vk_depth_stencil_state *ds = &dyns->ds;
356    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
357    const struct pan_shader_info *fs_info = fs ? &fs->info : NULL;
358    unsigned bd_count = MAX2(cb->attachment_count, 1);
359    bool test_s = has_stencil_att(cmdbuf) && ds->stencil.test_enable;
360    bool test_z = has_depth_att(cmdbuf) && ds->depth.test_enable;
361    bool writes_z = writes_depth(cmdbuf);
362    bool writes_s = writes_stencil(cmdbuf);
363    bool needs_fs = fs_required(cmdbuf);
364 
365    struct panfrost_ptr ptr = panvk_cmd_alloc_desc_aggregate(
366       cmdbuf, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(bd_count, BLEND));
367    if (!ptr.gpu)
368       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
369 
370    struct mali_renderer_state_packed *rsd = ptr.cpu;
371    struct mali_blend_packed *bds = ptr.cpu + pan_size(RENDERER_STATE);
372    struct panvk_blend_info binfo = {0};
373 
374    mali_ptr fs_code = panvk_shader_get_dev_addr(fs);
375 
376    if (fs_info != NULL) {
377       panvk_per_arch(blend_emit_descs)(
378          dev, cb, cmdbuf->state.gfx.render.color_attachments.fmts,
379          cmdbuf->state.gfx.render.color_attachments.samples, fs_info, fs_code,
380          bds, &binfo);
381    } else {
382       for (unsigned i = 0; i < bd_count; i++) {
383          pan_pack(&bds[i], BLEND, cfg) {
384             cfg.enable = false;
385             cfg.internal.mode = MALI_BLEND_MODE_OFF;
386          }
387       }
388    }
389 
390    pan_pack(rsd, RENDERER_STATE, cfg) {
391       bool alpha_to_coverage = dyns->ms.alpha_to_coverage_enable;
392 
393       if (needs_fs) {
394          pan_shader_prepare_rsd(fs_info, fs_code, &cfg);
395 
396          if (binfo.shader_loads_blend_const) {
397             /* Preload the blend constant if the blend shader depends on it. */
398             cfg.preload.uniform_count = MAX2(
399                cfg.preload.uniform_count,
400                DIV_ROUND_UP(256 + sizeof(struct panvk_graphics_sysvals), 8));
401          }
402 
403          uint8_t rt_written = fs_info->outputs_written >> FRAG_RESULT_DATA0;
404          uint8_t rt_mask = cmdbuf->state.gfx.render.bound_attachments &
405                            MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS;
406          cfg.properties.allow_forward_pixel_to_kill =
407             fs_info->fs.can_fpk && !(rt_mask & ~rt_written) &&
408             !alpha_to_coverage && !binfo.any_dest_read;
409 
410          bool writes_zs = writes_z || writes_s;
411          bool zs_always_passes = ds_test_always_passes(cmdbuf);
412          bool oq = false; /* TODO: Occlusion queries */
413 
414          struct pan_earlyzs_state earlyzs =
415             pan_earlyzs_get(pan_earlyzs_analyze(fs_info), writes_zs || oq,
416                             alpha_to_coverage, zs_always_passes);
417 
418          cfg.properties.pixel_kill_operation = earlyzs.kill;
419          cfg.properties.zs_update_operation = earlyzs.update;
420       } else {
421          cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
422          cfg.properties.allow_forward_pixel_to_kill = true;
423          cfg.properties.allow_forward_pixel_to_be_killed = true;
424          cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
425       }
426 
427       bool msaa = dyns->ms.rasterization_samples > 1;
428       cfg.multisample_misc.multisample_enable = msaa;
429       cfg.multisample_misc.sample_mask =
430          msaa ? dyns->ms.sample_mask : UINT16_MAX;
431 
432       cfg.multisample_misc.depth_function =
433          test_z ? translate_compare_func(ds->depth.compare_op)
434                 : MALI_FUNC_ALWAYS;
435 
436       cfg.multisample_misc.depth_write_mask = writes_z;
437       cfg.multisample_misc.fixed_function_near_discard =
438          !rs->depth_clamp_enable;
439       cfg.multisample_misc.fixed_function_far_discard = !rs->depth_clamp_enable;
440       cfg.multisample_misc.shader_depth_range_fixed = true;
441 
442       cfg.stencil_mask_misc.stencil_enable = test_s;
443       cfg.stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
444       cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
445       cfg.stencil_mask_misc.front_facing_depth_bias = rs->depth_bias.enable;
446       cfg.stencil_mask_misc.back_facing_depth_bias = rs->depth_bias.enable;
447       cfg.stencil_mask_misc.single_sampled_lines =
448          dyns->ms.rasterization_samples <= 1;
449 
450       cfg.depth_units = rs->depth_bias.constant * 2.0f;
451       cfg.depth_factor = rs->depth_bias.slope;
452       cfg.depth_bias_clamp = rs->depth_bias.clamp;
453 
454       cfg.stencil_front.mask = ds->stencil.front.compare_mask;
455       cfg.stencil_back.mask = ds->stencil.back.compare_mask;
456 
457       cfg.stencil_mask_misc.stencil_mask_front = ds->stencil.front.write_mask;
458       cfg.stencil_mask_misc.stencil_mask_back = ds->stencil.back.write_mask;
459 
460       cfg.stencil_front.reference_value = ds->stencil.front.reference;
461       cfg.stencil_back.reference_value = ds->stencil.back.reference;
462 
463       if (test_s) {
464          cfg.stencil_front.compare_function =
465             translate_compare_func(ds->stencil.front.op.compare);
466          cfg.stencil_front.stencil_fail =
467             translate_stencil_op(ds->stencil.front.op.fail);
468          cfg.stencil_front.depth_fail =
469             translate_stencil_op(ds->stencil.front.op.depth_fail);
470          cfg.stencil_front.depth_pass =
471             translate_stencil_op(ds->stencil.front.op.pass);
472          cfg.stencil_back.compare_function =
473             translate_compare_func(ds->stencil.back.op.compare);
474          cfg.stencil_back.stencil_fail =
475             translate_stencil_op(ds->stencil.back.op.fail);
476          cfg.stencil_back.depth_fail =
477             translate_stencil_op(ds->stencil.back.op.depth_fail);
478          cfg.stencil_back.depth_pass =
479             translate_stencil_op(ds->stencil.back.op.pass);
480       }
481    }
482 
483    cmdbuf->state.gfx.fs.rsd = ptr.gpu;
484    draw->fs.rsd = cmdbuf->state.gfx.fs.rsd;
485    return VK_SUCCESS;
486 }
487 
488 static VkResult
panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)489 panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf,
490                                  struct panvk_draw_info *draw)
491 {
492    struct panvk_batch *batch = cmdbuf->cur_batch;
493    VkResult result =
494       panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf, draw->layer_id);
495    if (result != VK_SUCCESS)
496       return result;
497 
498    draw->tiler_ctx = &batch->tiler.ctx;
499    return VK_SUCCESS;
500 }
501 
502 static mali_pixel_format
panvk_varying_hw_format(gl_shader_stage stage,gl_varying_slot loc,enum pipe_format pfmt)503 panvk_varying_hw_format(gl_shader_stage stage, gl_varying_slot loc,
504                         enum pipe_format pfmt)
505 {
506    switch (loc) {
507    case VARYING_SLOT_PNTC:
508    case VARYING_SLOT_PSIZ:
509 #if PAN_ARCH <= 6
510       return (MALI_R16F << 12) | panfrost_get_default_swizzle(1);
511 #else
512       return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000;
513 #endif
514    case VARYING_SLOT_POS:
515 #if PAN_ARCH <= 6
516       return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4);
517 #else
518       return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA;
519 #endif
520    default:
521       if (pfmt != PIPE_FORMAT_NONE)
522          return GENX(panfrost_format_from_pipe_format)(pfmt)->hw;
523 
524 #if PAN_ARCH >= 7
525       return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000;
526 #else
527       return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0);
528 #endif
529    }
530 }
531 
532 static VkResult
panvk_draw_prepare_varyings(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)533 panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf,
534                             struct panvk_draw_info *draw)
535 {
536    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
537    const struct panvk_shader_link *link = &cmdbuf->state.gfx.link;
538    struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
539       cmdbuf, PANVK_VARY_BUF_MAX + 1, ATTRIBUTE_BUFFER);
540    if (!bufs.gpu)
541       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
542 
543    struct mali_attribute_buffer_packed *buf_descs = bufs.cpu;
544    const struct vk_input_assembly_state *ia =
545       &cmdbuf->vk.dynamic_graphics_state.ia;
546    bool writes_point_size =
547       vs->info.vs.writes_point_size &&
548       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
549    unsigned vertex_count = draw->padded_vertex_count * draw->instance_count;
550    mali_ptr psiz_buf = 0;
551 
552    for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) {
553       unsigned buf_size = vertex_count * link->buf_strides[i];
554       mali_ptr buf_addr =
555          buf_size ? panvk_cmd_alloc_dev_mem(cmdbuf, varying, buf_size, 64).gpu
556                   : 0;
557       if (buf_size && !buf_addr)
558          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
559 
560       pan_pack(&buf_descs[i], ATTRIBUTE_BUFFER, cfg) {
561          cfg.stride = link->buf_strides[i];
562          cfg.size = buf_size;
563          cfg.pointer = buf_addr;
564       }
565 
566       if (i == PANVK_VARY_BUF_POSITION)
567          draw->position = buf_addr;
568 
569       if (i == PANVK_VARY_BUF_PSIZ)
570          psiz_buf = buf_addr;
571    }
572 
573    /* We need an empty entry to stop prefetching on Bifrost */
574    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * PANVK_VARY_BUF_MAX), 0,
575           pan_size(ATTRIBUTE_BUFFER));
576 
577    if (writes_point_size)
578       draw->psiz = psiz_buf;
579    else if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
580             ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
581       draw->line_width = cmdbuf->vk.dynamic_graphics_state.rs.line.width;
582    else
583       draw->line_width = 1.0f;
584 
585    draw->varying_bufs = bufs.gpu;
586    draw->vs.varyings = panvk_priv_mem_dev_addr(link->vs.attribs);
587    draw->fs.varyings = panvk_priv_mem_dev_addr(link->fs.attribs);
588    return VK_SUCCESS;
589 }
590 
591 static void
panvk_draw_emit_attrib_buf(const struct panvk_draw_info * draw,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,void * desc)592 panvk_draw_emit_attrib_buf(const struct panvk_draw_info *draw,
593                            const struct vk_vertex_binding_state *buf_info,
594                            const struct panvk_attrib_buf *buf, void *desc)
595 {
596    mali_ptr addr = buf->address & ~63ULL;
597    unsigned size = buf->size + (buf->address & 63);
598    unsigned divisor = draw->padded_vertex_count * buf_info->divisor;
599    bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
600    void *buf_ext = desc + pan_size(ATTRIBUTE_BUFFER);
601 
602    /* TODO: support instanced arrays */
603    if (draw->instance_count <= 1) {
604       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
605          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
606          cfg.stride = per_instance ? 0 : buf_info->stride;
607          cfg.pointer = addr;
608          cfg.size = size;
609       }
610    } else if (!per_instance) {
611       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
612          cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
613          cfg.divisor = draw->padded_vertex_count;
614          cfg.stride = buf_info->stride;
615          cfg.pointer = addr;
616          cfg.size = size;
617       }
618    } else if (!divisor) {
619       /* instance_divisor == 0 means all instances share the same value.
620        * Make it a 1D array with a zero stride.
621        */
622       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
623          cfg.type = MALI_ATTRIBUTE_TYPE_1D;
624          cfg.stride = 0;
625          cfg.pointer = addr;
626          cfg.size = size;
627       }
628    } else if (util_is_power_of_two_or_zero(divisor)) {
629       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
630          cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
631          cfg.stride = buf_info->stride;
632          cfg.pointer = addr;
633          cfg.size = size;
634          cfg.divisor_r = __builtin_ctz(divisor);
635       }
636    } else {
637       unsigned divisor_r = 0, divisor_e = 0;
638       unsigned divisor_num =
639          panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e);
640       pan_pack(desc, ATTRIBUTE_BUFFER, cfg) {
641          cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
642          cfg.stride = buf_info->stride;
643          cfg.pointer = addr;
644          cfg.size = size;
645          cfg.divisor_r = divisor_r;
646          cfg.divisor_e = divisor_e;
647       }
648 
649       pan_pack(buf_ext, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
650          cfg.divisor_numerator = divisor_num;
651          cfg.divisor = buf_info->divisor;
652       }
653 
654       buf_ext = NULL;
655    }
656 
657    /* If the buffer extension wasn't used, memset(0) */
658    if (buf_ext)
659       memset(buf_ext, 0, pan_size(ATTRIBUTE_BUFFER));
660 }
661 
662 static void
panvk_draw_emit_attrib(const struct panvk_draw_info * draw,const struct vk_vertex_attribute_state * attrib_info,const struct vk_vertex_binding_state * buf_info,const struct panvk_attrib_buf * buf,void * desc)663 panvk_draw_emit_attrib(const struct panvk_draw_info *draw,
664                        const struct vk_vertex_attribute_state *attrib_info,
665                        const struct vk_vertex_binding_state *buf_info,
666                        const struct panvk_attrib_buf *buf, void *desc)
667 {
668    bool per_instance = buf_info->input_rate == VK_VERTEX_INPUT_RATE_INSTANCE;
669    enum pipe_format f = vk_format_to_pipe_format(attrib_info->format);
670    unsigned buf_idx = attrib_info->binding;
671 
672    pan_pack(desc, ATTRIBUTE, cfg) {
673       cfg.buffer_index = buf_idx * 2;
674       cfg.offset = attrib_info->offset + (buf->address & 63);
675       cfg.offset_enable = true;
676 
677       if (per_instance)
678          cfg.offset += draw->first_instance * buf_info->stride;
679 
680       cfg.format = GENX(panfrost_format_from_pipe_format)(f)->hw;
681    }
682 }
683 
684 static VkResult
panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)685 panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
686                               struct panvk_draw_info *draw)
687 {
688    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
689    const struct vk_vertex_input_state *vi =
690       cmdbuf->vk.dynamic_graphics_state.vi;
691    unsigned num_imgs = vs->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_IMG];
692    unsigned num_vs_attribs = util_last_bit(vi->attributes_valid);
693    unsigned num_vbs = util_last_bit(vi->bindings_valid);
694    unsigned attrib_count =
695       num_imgs ? MAX_VS_ATTRIBS + num_imgs : num_vs_attribs;
696    bool dirty =
697       is_dirty(cmdbuf, VI) || is_dirty(cmdbuf, VI_BINDINGS_VALID) ||
698       is_dirty(cmdbuf, VI_BINDING_STRIDES) ||
699       (num_imgs && !cmdbuf->state.gfx.vs.desc.img_attrib_table) ||
700       (cmdbuf->state.gfx.vb.count && !cmdbuf->state.gfx.vs.attrib_bufs) ||
701       (attrib_count && !cmdbuf->state.gfx.vs.attribs);
702 
703    if (!dirty)
704       return VK_SUCCESS;
705 
706    unsigned attrib_buf_count = (num_vbs + num_imgs) * 2;
707    struct panfrost_ptr bufs = panvk_cmd_alloc_desc_array(
708       cmdbuf, attrib_buf_count + 1, ATTRIBUTE_BUFFER);
709    struct mali_attribute_buffer_packed *attrib_buf_descs = bufs.cpu;
710    struct panfrost_ptr attribs =
711       panvk_cmd_alloc_desc_array(cmdbuf, attrib_count, ATTRIBUTE);
712    struct mali_attribute_packed *attrib_descs = attribs.cpu;
713 
714    if (!bufs.gpu || (attrib_count && !attribs.gpu))
715       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
716 
717    for (unsigned i = 0; i < num_vbs; i++) {
718       if (vi->bindings_valid & BITFIELD_BIT(i)) {
719          panvk_draw_emit_attrib_buf(draw, &vi->bindings[i],
720                                     &cmdbuf->state.gfx.vb.bufs[i],
721                                     &attrib_buf_descs[i * 2]);
722       } else {
723          memset(&attrib_buf_descs[i * 2], 0, sizeof(*attrib_buf_descs) * 2);
724       }
725    }
726 
727    for (unsigned i = 0; i < num_vs_attribs; i++) {
728       if (vi->attributes_valid & BITFIELD_BIT(i)) {
729          unsigned buf_idx = vi->attributes[i].binding;
730          panvk_draw_emit_attrib(
731             draw, &vi->attributes[i], &vi->bindings[buf_idx],
732             &cmdbuf->state.gfx.vb.bufs[buf_idx], &attrib_descs[i]);
733       } else {
734          memset(&attrib_descs[i], 0, sizeof(attrib_descs[0]));
735       }
736    }
737 
738    /* A NULL entry is needed to stop prefecting on Bifrost */
739    memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0,
740           pan_size(ATTRIBUTE_BUFFER));
741 
742    cmdbuf->state.gfx.vs.attrib_bufs = bufs.gpu;
743    cmdbuf->state.gfx.vs.attribs = attribs.gpu;
744 
745    if (num_imgs) {
746       cmdbuf->state.gfx.vs.desc.img_attrib_table =
747          attribs.gpu + (MAX_VS_ATTRIBS * pan_size(ATTRIBUTE));
748       cmdbuf->state.gfx.vs.desc.tables[PANVK_BIFROST_DESC_TABLE_IMG] =
749          bufs.gpu + (num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2);
750    }
751 
752    return VK_SUCCESS;
753 }
754 
755 static void
panvk_draw_prepare_attributes(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)756 panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf,
757                               struct panvk_draw_info *draw)
758 {
759    panvk_draw_prepare_vs_attribs(cmdbuf, draw);
760    draw->vs.attributes = cmdbuf->state.gfx.vs.attribs;
761    draw->vs.attribute_bufs = cmdbuf->state.gfx.vs.attrib_bufs;
762 }
763 
764 static void
panvk_emit_viewport(const struct vk_viewport_state * vp,void * vpd)765 panvk_emit_viewport(const struct vk_viewport_state *vp, void *vpd)
766 {
767    assert(vp->viewport_count == 1);
768 
769    const VkViewport *viewport = &vp->viewports[0];
770    const VkRect2D *scissor = &vp->scissors[0];
771 
772    /* The spec says "width must be greater than 0.0" */
773    assert(viewport->width >= 0);
774    int minx = (int)viewport->x;
775    int maxx = (int)(viewport->x + viewport->width);
776 
777    /* Viewport height can be negative */
778    int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height));
779    int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height));
780 
781    assert(scissor->offset.x >= 0 && scissor->offset.y >= 0);
782    minx = MAX2(scissor->offset.x, minx);
783    miny = MAX2(scissor->offset.y, miny);
784    maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx);
785    maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy);
786 
787    /* Make sure we don't end up with a max < min when width/height is 0 */
788    maxx = maxx > minx ? maxx - 1 : maxx;
789    maxy = maxy > miny ? maxy - 1 : maxy;
790 
791    /* Clamp viewport scissor to valid range */
792    minx = CLAMP(minx, 0, UINT16_MAX);
793    maxx = CLAMP(maxx, 0, UINT16_MAX);
794    miny = CLAMP(miny, 0, UINT16_MAX);
795    maxy = CLAMP(maxy, 0, UINT16_MAX);
796 
797    assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f);
798    assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f);
799 
800    pan_pack(vpd, VIEWPORT, cfg) {
801       cfg.scissor_minimum_x = minx;
802       cfg.scissor_minimum_y = miny;
803       cfg.scissor_maximum_x = maxx;
804       cfg.scissor_maximum_y = maxy;
805       cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth);
806       cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth);
807    }
808 }
809 
810 static VkResult
panvk_draw_prepare_viewport(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)811 panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf,
812                             struct panvk_draw_info *draw)
813 {
814    /* When rasterizerDiscardEnable is active, it is allowed to have viewport and
815     * scissor disabled.
816     * As a result, we define an empty one.
817     */
818    if (!cmdbuf->state.gfx.vpd || is_dirty(cmdbuf, VP_VIEWPORTS) ||
819        is_dirty(cmdbuf, VP_SCISSORS)) {
820       struct panfrost_ptr vp = panvk_cmd_alloc_desc(cmdbuf, VIEWPORT);
821       if (!vp.gpu)
822          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
823 
824       const struct vk_viewport_state *vps =
825          &cmdbuf->vk.dynamic_graphics_state.vp;
826 
827       if (vps->viewport_count > 0)
828          panvk_emit_viewport(vps, vp.cpu);
829       cmdbuf->state.gfx.vpd = vp.gpu;
830    }
831 
832    draw->viewport = cmdbuf->state.gfx.vpd;
833    return VK_SUCCESS;
834 }
835 
836 static void
panvk_emit_vertex_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_info * draw,void * dcd)837 panvk_emit_vertex_dcd(struct panvk_cmd_buffer *cmdbuf,
838                       const struct panvk_draw_info *draw, void *dcd)
839 {
840    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
841    const struct panvk_shader_desc_state *vs_desc_state =
842       &cmdbuf->state.gfx.vs.desc;
843 
844    pan_pack(dcd, DRAW, cfg) {
845       cfg.state = panvk_priv_mem_dev_addr(vs->rsd);
846       cfg.attributes = draw->vs.attributes;
847       cfg.attribute_buffers = draw->vs.attribute_bufs;
848       cfg.varyings = draw->vs.varyings;
849       cfg.varying_buffers = draw->varying_bufs;
850       cfg.thread_storage = draw->tls;
851       cfg.offset_start = draw->offset_start;
852       cfg.instance_size =
853          draw->instance_count > 1 ? draw->padded_vertex_count : 1;
854       cfg.uniform_buffers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
855       cfg.push_uniforms = draw->push_uniforms;
856       cfg.textures = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
857       cfg.samplers = vs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
858    }
859 }
860 
861 static VkResult
panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)862 panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf,
863                               struct panvk_draw_info *draw)
864 {
865    struct panvk_batch *batch = cmdbuf->cur_batch;
866    struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, COMPUTE_JOB);
867    if (!ptr.gpu)
868       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
869 
870    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
871    draw->jobs.vertex = ptr;
872 
873    memcpy(pan_section_ptr(ptr.cpu, COMPUTE_JOB, INVOCATION), &draw->invocation,
874           pan_size(INVOCATION));
875 
876    pan_section_pack(ptr.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
877       cfg.job_task_split = 5;
878    }
879 
880    panvk_emit_vertex_dcd(cmdbuf, draw,
881                          pan_section_ptr(ptr.cpu, COMPUTE_JOB, DRAW));
882    return VK_SUCCESS;
883 }
884 
885 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)886 translate_prim_topology(VkPrimitiveTopology in)
887 {
888    /* Test VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA separately, as it's not
889     * part of the VkPrimitiveTopology enum.
890     */
891    if (in == VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA)
892       return MALI_DRAW_MODE_TRIANGLES;
893 
894    switch (in) {
895    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
896       return MALI_DRAW_MODE_POINTS;
897    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
898       return MALI_DRAW_MODE_LINES;
899    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
900       return MALI_DRAW_MODE_LINE_STRIP;
901    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
902       return MALI_DRAW_MODE_TRIANGLES;
903    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
904       return MALI_DRAW_MODE_TRIANGLE_STRIP;
905    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
906       return MALI_DRAW_MODE_TRIANGLE_FAN;
907    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
908    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
909    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
910    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
911    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
912    default:
913       unreachable("Invalid primitive type");
914    }
915 }
916 
917 static void
panvk_emit_tiler_primitive(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_info * draw,void * prim)918 panvk_emit_tiler_primitive(struct panvk_cmd_buffer *cmdbuf,
919                            const struct panvk_draw_info *draw, void *prim)
920 {
921    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
922    const struct vk_input_assembly_state *ia =
923       &cmdbuf->vk.dynamic_graphics_state.ia;
924    bool writes_point_size =
925       vs->info.vs.writes_point_size &&
926       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
927    bool secondary_shader = vs->info.vs.secondary_enable && fs_required(cmdbuf);
928 
929    pan_pack(prim, PRIMITIVE, cfg) {
930       cfg.draw_mode = translate_prim_topology(ia->primitive_topology);
931       if (writes_point_size)
932          cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16;
933 
934       cfg.first_provoking_vertex = true;
935       if (ia->primitive_restart_enable)
936          cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT;
937       cfg.job_task_split = 6;
938 
939       if (draw->index_size) {
940          cfg.index_count = draw->index_count;
941          cfg.indices = draw->indices;
942          cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start;
943 
944          switch (draw->index_size) {
945          case 4:
946             cfg.index_type = MALI_INDEX_TYPE_UINT32;
947             break;
948          case 2:
949             cfg.index_type = MALI_INDEX_TYPE_UINT16;
950             break;
951          case 1:
952             cfg.index_type = MALI_INDEX_TYPE_UINT8;
953             break;
954          default:
955             unreachable("Invalid index size");
956          }
957       } else {
958          cfg.index_count = draw->vertex_count;
959          cfg.index_type = MALI_INDEX_TYPE_NONE;
960       }
961 
962       cfg.secondary_shader = secondary_shader;
963    }
964 }
965 
966 static void
panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_info * draw,void * primsz)967 panvk_emit_tiler_primitive_size(struct panvk_cmd_buffer *cmdbuf,
968                                 const struct panvk_draw_info *draw,
969                                 void *primsz)
970 {
971    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
972    const struct vk_input_assembly_state *ia =
973       &cmdbuf->vk.dynamic_graphics_state.ia;
974    bool writes_point_size =
975       vs->info.vs.writes_point_size &&
976       ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
977 
978    pan_pack(primsz, PRIMITIVE_SIZE, cfg) {
979       if (writes_point_size) {
980          cfg.size_array = draw->psiz;
981       } else {
982          cfg.constant = draw->line_width;
983       }
984    }
985 }
986 
987 static void
panvk_emit_tiler_dcd(struct panvk_cmd_buffer * cmdbuf,const struct panvk_draw_info * draw,void * dcd)988 panvk_emit_tiler_dcd(struct panvk_cmd_buffer *cmdbuf,
989                      const struct panvk_draw_info *draw, void *dcd)
990 {
991    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
992    const struct vk_rasterization_state *rs =
993       &cmdbuf->vk.dynamic_graphics_state.rs;
994    const struct vk_input_assembly_state *ia =
995       &cmdbuf->vk.dynamic_graphics_state.ia;
996 
997    pan_pack(dcd, DRAW, cfg) {
998       cfg.front_face_ccw = rs->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE;
999       cfg.cull_front_face = (rs->cull_mode & VK_CULL_MODE_FRONT_BIT) != 0;
1000       cfg.cull_back_face = (rs->cull_mode & VK_CULL_MODE_BACK_BIT) != 0;
1001       cfg.position = draw->position;
1002       cfg.state = draw->fs.rsd;
1003       cfg.attributes = fs_desc_state->img_attrib_table;
1004       cfg.attribute_buffers =
1005          fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_IMG];
1006       cfg.viewport = draw->viewport;
1007       cfg.varyings = draw->fs.varyings;
1008       cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0;
1009       cfg.thread_storage = draw->tls;
1010 
1011       /* For all primitives but lines DRAW.flat_shading_vertex must
1012        * be set to 0 and the provoking vertex is selected with the
1013        * PRIMITIVE.first_provoking_vertex field.
1014        */
1015       if (ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_LIST ||
1016           ia->primitive_topology == VK_PRIMITIVE_TOPOLOGY_LINE_STRIP)
1017          cfg.flat_shading_vertex = true;
1018 
1019       cfg.offset_start = draw->offset_start;
1020       cfg.instance_size =
1021          draw->instance_count > 1 ? draw->padded_vertex_count : 1;
1022       cfg.uniform_buffers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_UBO];
1023       cfg.push_uniforms = draw->push_uniforms;
1024       cfg.textures = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_TEXTURE];
1025       cfg.samplers = fs_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER];
1026 
1027       /* TODO: occlusion queries */
1028    }
1029 }
1030 
1031 static VkResult
panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1032 panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf,
1033                              struct panvk_draw_info *draw)
1034 {
1035    struct panvk_batch *batch = cmdbuf->cur_batch;
1036    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1037    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1038    struct panfrost_ptr ptr;
1039    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1040       cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
1041 
1042    if (result != VK_SUCCESS)
1043       return result;
1044 
1045    if (ptr.cpu)
1046       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1047 
1048    draw->jobs.frag_copy_desc = ptr;
1049 
1050    ptr = panvk_cmd_alloc_desc(cmdbuf, TILER_JOB);
1051    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1052    draw->jobs.tiler = ptr;
1053 
1054    memcpy(pan_section_ptr(ptr.cpu, TILER_JOB, INVOCATION), &draw->invocation,
1055           pan_size(INVOCATION));
1056 
1057    panvk_emit_tiler_primitive(cmdbuf, draw,
1058                               pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE));
1059 
1060    panvk_emit_tiler_primitive_size(
1061       cmdbuf, draw, pan_section_ptr(ptr.cpu, TILER_JOB, PRIMITIVE_SIZE));
1062 
1063    panvk_emit_tiler_dcd(cmdbuf, draw,
1064                         pan_section_ptr(ptr.cpu, TILER_JOB, DRAW));
1065 
1066    pan_section_pack(ptr.cpu, TILER_JOB, TILER, cfg) {
1067       cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
1068                                   : draw->tiler_ctx->bifrost.desc;
1069    }
1070 
1071    pan_section_pack(ptr.cpu, TILER_JOB, PADDING, padding)
1072       ;
1073 
1074    return VK_SUCCESS;
1075 }
1076 
1077 static VkResult
panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1078 panvk_draw_prepare_idvs_job(struct panvk_cmd_buffer *cmdbuf,
1079                             struct panvk_draw_info *draw)
1080 {
1081    struct panvk_batch *batch = cmdbuf->cur_batch;
1082    struct panfrost_ptr ptr = panvk_cmd_alloc_desc(cmdbuf, INDEXED_VERTEX_JOB);
1083    if (!ptr.gpu)
1084       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1085 
1086    util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1087    draw->jobs.idvs = ptr;
1088 
1089    memcpy(pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, INVOCATION),
1090           &draw->invocation, pan_size(INVOCATION));
1091 
1092    panvk_emit_tiler_primitive(
1093       cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE));
1094 
1095    panvk_emit_tiler_primitive_size(
1096       cmdbuf, draw,
1097       pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, PRIMITIVE_SIZE));
1098 
1099    pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, TILER, cfg) {
1100       cfg.address = PAN_ARCH >= 9 ? draw->tiler_ctx->valhall.desc
1101                                   : draw->tiler_ctx->bifrost.desc;
1102    }
1103 
1104    pan_section_pack(ptr.cpu, INDEXED_VERTEX_JOB, PADDING, _) {
1105    }
1106 
1107    panvk_emit_tiler_dcd(
1108       cmdbuf, draw,
1109       pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, FRAGMENT_DRAW));
1110 
1111    panvk_emit_vertex_dcd(
1112       cmdbuf, draw, pan_section_ptr(ptr.cpu, INDEXED_VERTEX_JOB, VERTEX_DRAW));
1113    return VK_SUCCESS;
1114 }
1115 
1116 static VkResult
panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1117 panvk_draw_prepare_vs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1118                                     struct panvk_draw_info *draw)
1119 {
1120    struct panvk_batch *batch = cmdbuf->cur_batch;
1121    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1122    const struct panvk_shader_desc_state *vs_desc_state =
1123       &cmdbuf->state.gfx.vs.desc;
1124    const struct vk_vertex_input_state *vi =
1125       cmdbuf->vk.dynamic_graphics_state.vi;
1126    unsigned num_vbs = util_last_bit(vi->bindings_valid);
1127    struct panfrost_ptr ptr;
1128    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1129       cmdbuf, vs, &cmdbuf->state.gfx.desc_state, vs_desc_state,
1130       num_vbs * pan_size(ATTRIBUTE_BUFFER) * 2, &ptr);
1131    if (result != VK_SUCCESS)
1132       return result;
1133 
1134    if (ptr.cpu)
1135       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1136 
1137    draw->jobs.vertex_copy_desc = ptr;
1138    return VK_SUCCESS;
1139 }
1140 
1141 static VkResult
panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1142 panvk_draw_prepare_fs_copy_desc_job(struct panvk_cmd_buffer *cmdbuf,
1143                                     struct panvk_draw_info *draw)
1144 {
1145    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1146    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1147    struct panvk_batch *batch = cmdbuf->cur_batch;
1148    struct panfrost_ptr ptr;
1149    VkResult result = panvk_per_arch(meta_get_copy_desc_job)(
1150       cmdbuf, fs, &cmdbuf->state.gfx.desc_state, fs_desc_state, 0, &ptr);
1151 
1152    if (result != VK_SUCCESS)
1153       return result;
1154 
1155    if (ptr.cpu)
1156       util_dynarray_append(&batch->jobs, void *, ptr.cpu);
1157 
1158    draw->jobs.frag_copy_desc = ptr;
1159    return VK_SUCCESS;
1160 }
1161 
1162 void
panvk_per_arch(cmd_preload_fb_after_batch_split)1163 panvk_per_arch(cmd_preload_fb_after_batch_split)(struct panvk_cmd_buffer *cmdbuf)
1164 {
1165    for (unsigned i = 0; i < cmdbuf->state.gfx.render.fb.info.rt_count; i++) {
1166       if (cmdbuf->state.gfx.render.fb.info.rts[i].view) {
1167          cmdbuf->state.gfx.render.fb.info.rts[i].clear = false;
1168          cmdbuf->state.gfx.render.fb.info.rts[i].preload = true;
1169       }
1170    }
1171 
1172    if (cmdbuf->state.gfx.render.fb.info.zs.view.zs) {
1173       cmdbuf->state.gfx.render.fb.info.zs.clear.z = false;
1174       cmdbuf->state.gfx.render.fb.info.zs.preload.z = true;
1175    }
1176 
1177    if (cmdbuf->state.gfx.render.fb.info.zs.view.s ||
1178        (cmdbuf->state.gfx.render.fb.info.zs.view.zs &&
1179         util_format_is_depth_and_stencil(
1180            cmdbuf->state.gfx.render.fb.info.zs.view.zs->format))) {
1181       cmdbuf->state.gfx.render.fb.info.zs.clear.s = false;
1182       cmdbuf->state.gfx.render.fb.info.zs.preload.s = true;
1183    }
1184 }
1185 
1186 static VkResult
panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer * cmd)1187 panvk_cmd_prepare_draw_link_shaders(struct panvk_cmd_buffer *cmd)
1188 {
1189    struct panvk_cmd_graphics_state *gfx = &cmd->state.gfx;
1190 
1191    if (gfx->linked)
1192       return VK_SUCCESS;
1193 
1194    VkResult result = panvk_per_arch(link_shaders)(
1195       &cmd->desc_pool, gfx->vs.shader, gfx->fs.shader, &gfx->link);
1196    if (result != VK_SUCCESS) {
1197       vk_command_buffer_set_error(&cmd->vk, result);
1198       return result;
1199    }
1200 
1201    gfx->linked = true;
1202    return VK_SUCCESS;
1203 }
1204 
1205 static void
panvk_cmd_draw(struct panvk_cmd_buffer * cmdbuf,struct panvk_draw_info * draw)1206 panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw)
1207 {
1208    struct panvk_batch *batch = cmdbuf->cur_batch;
1209    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
1210    const struct panvk_shader *fs = cmdbuf->state.gfx.fs.shader;
1211    struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
1212    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
1213    struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
1214    uint32_t layer_count = cmdbuf->state.gfx.render.layer_count;
1215    const struct vk_rasterization_state *rs =
1216       &cmdbuf->vk.dynamic_graphics_state.rs;
1217    bool idvs = vs->info.vs.idvs;
1218    VkResult result;
1219 
1220    /* If there's no vertex shader, we can skip the draw. */
1221    if (!panvk_priv_mem_dev_addr(vs->rsd))
1222       return;
1223 
1224    /* There are only 16 bits in the descriptor for the job ID. Each job has a
1225     * pilot shader dealing with descriptor copies, and we need one
1226     * <vertex,tiler> pair per draw.
1227     */
1228    if (batch->vtc_jc.job_index + (4 * layer_count) >= UINT16_MAX) {
1229       panvk_per_arch(cmd_close_batch)(cmdbuf);
1230       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1231       batch = panvk_per_arch(cmd_open_batch)(cmdbuf);
1232    }
1233 
1234    result = panvk_cmd_prepare_draw_link_shaders(cmdbuf);
1235    if (result != VK_SUCCESS)
1236       return;
1237 
1238    if (!rs->rasterizer_discard_enable) {
1239       result = panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1240       if (result != VK_SUCCESS)
1241          return;
1242    }
1243 
1244    result = panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1245    if (result != VK_SUCCESS)
1246       return;
1247 
1248    panvk_draw_prepare_attributes(cmdbuf, draw);
1249 
1250    uint32_t used_set_mask =
1251       vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
1252 
1253    result =
1254       panvk_per_arch(cmd_prepare_push_descs)(cmdbuf, desc_state, used_set_mask);
1255    if (result != VK_SUCCESS)
1256       return;
1257 
1258    result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1259       cmdbuf, &cmdbuf->state.gfx.desc_state, vs, vs_desc_state);
1260    if (result != VK_SUCCESS)
1261       return;
1262 
1263    panvk_draw_prepare_vs_copy_desc_job(cmdbuf, draw);
1264 
1265    unsigned copy_desc_job_id =
1266       draw->jobs.vertex_copy_desc.gpu
1267          ? pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false,
1268                           0, 0, &draw->jobs.vertex_copy_desc, false)
1269          : 0;
1270 
1271    bool vs_writes_pos =
1272       cmdbuf->state.gfx.link.buf_strides[PANVK_VARY_BUF_POSITION] > 0;
1273    bool needs_tiling = !rs->rasterizer_discard_enable && vs_writes_pos;
1274 
1275    /* No need to setup the FS desc tables if the FS is not executed. */
1276    if (needs_tiling && fs_required(cmdbuf)) {
1277       result = panvk_per_arch(cmd_prepare_shader_desc_tables)(
1278          cmdbuf, &cmdbuf->state.gfx.desc_state, fs, fs_desc_state);
1279       if (result != VK_SUCCESS)
1280          return;
1281 
1282       result = panvk_draw_prepare_fs_copy_desc_job(cmdbuf, draw);
1283       if (result != VK_SUCCESS)
1284          return;
1285 
1286       if (draw->jobs.frag_copy_desc.gpu) {
1287          /* We don't need to add frag_copy_desc as a dependency because the
1288           * tiler job doesn't execute the fragment shader, the fragment job
1289           * will, and the tiler/fragment synchronization happens at the batch
1290           * level. */
1291          pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_COMPUTE, false, false, 0,
1292                         0, &draw->jobs.frag_copy_desc, false);
1293       }
1294    }
1295 
1296    /* TODO: indexed draws */
1297    draw->tls = batch->tls.gpu;
1298    draw->fb = batch->fb.desc.gpu;
1299 
1300    panfrost_pack_work_groups_compute(&draw->invocation, 1, draw->vertex_range,
1301                                      draw->instance_count, 1, 1, 1, true,
1302                                      false);
1303 
1304    result = panvk_draw_prepare_fs_rsd(cmdbuf, draw);
1305    if (result != VK_SUCCESS)
1306       return;
1307 
1308    result = panvk_draw_prepare_viewport(cmdbuf, draw);
1309    if (result != VK_SUCCESS)
1310       return;
1311 
1312    batch->tlsinfo.tls.size = MAX3(vs->info.tls_size, fs ? fs->info.tls_size : 0,
1313                                   batch->tlsinfo.tls.size);
1314 
1315    for (uint32_t i = 0; i < layer_count; i++) {
1316       draw->layer_id = i;
1317       result = panvk_draw_prepare_varyings(cmdbuf, draw);
1318       if (result != VK_SUCCESS)
1319          return;
1320 
1321       result = panvk_cmd_prepare_draw_sysvals(cmdbuf, draw);
1322       if (result != VK_SUCCESS)
1323          return;
1324 
1325       cmdbuf->state.gfx.push_uniforms = panvk_per_arch(
1326          cmd_prepare_push_uniforms)(cmdbuf, &cmdbuf->state.gfx.sysvals,
1327                                     sizeof(cmdbuf->state.gfx.sysvals));
1328       if (!cmdbuf->state.gfx.push_uniforms)
1329          return;
1330 
1331       draw->push_uniforms = cmdbuf->state.gfx.push_uniforms;
1332       result = panvk_draw_prepare_tiler_context(cmdbuf, draw);
1333       if (result != VK_SUCCESS)
1334          return;
1335 
1336       if (idvs) {
1337          result = panvk_draw_prepare_idvs_job(cmdbuf, draw);
1338          if (result != VK_SUCCESS)
1339             return;
1340 
1341          pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_INDEXED_VERTEX, false,
1342                         false, 0, copy_desc_job_id, &draw->jobs.idvs, false);
1343       } else {
1344          result = panvk_draw_prepare_vertex_job(cmdbuf, draw);
1345          if (result != VK_SUCCESS)
1346             return;
1347 
1348          unsigned vjob_id =
1349             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_VERTEX, false, false,
1350                            0, copy_desc_job_id, &draw->jobs.vertex, false);
1351 
1352          if (needs_tiling) {
1353             panvk_draw_prepare_tiler_job(cmdbuf, draw);
1354             pan_jc_add_job(&batch->vtc_jc, MALI_JOB_TYPE_TILER, false, false,
1355                            vjob_id, 0, &draw->jobs.tiler, false);
1356          }
1357       }
1358    }
1359 
1360    /* Clear the dirty flags all at once */
1361    vk_dynamic_graphics_state_clear_dirty(&cmdbuf->vk.dynamic_graphics_state);
1362    cmdbuf->state.gfx.dirty = 0;
1363 }
1364 
1365 static unsigned
padded_vertex_count(struct panvk_cmd_buffer * cmdbuf,uint32_t vertex_count,uint32_t instance_count)1366 padded_vertex_count(struct panvk_cmd_buffer *cmdbuf, uint32_t vertex_count,
1367                     uint32_t instance_count)
1368 {
1369    if (instance_count == 1)
1370       return vertex_count;
1371 
1372    bool idvs = cmdbuf->state.gfx.vs.shader->info.vs.idvs;
1373 
1374    /* Index-Driven Vertex Shading requires different instances to
1375     * have different cache lines for position results. Each vertex
1376     * position is 16 bytes and the Mali cache line is 64 bytes, so
1377     * the instance count must be aligned to 4 vertices.
1378     */
1379    if (idvs)
1380       vertex_count = ALIGN_POT(vertex_count, 4);
1381 
1382    return panfrost_padded_vertex_count(vertex_count);
1383 }
1384 
1385 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDraw)1386 panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount,
1387                         uint32_t instanceCount, uint32_t firstVertex,
1388                         uint32_t firstInstance)
1389 {
1390    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1391 
1392    if (instanceCount == 0 || vertexCount == 0)
1393       return;
1394 
1395    struct panvk_draw_info draw = {
1396       .first_vertex = firstVertex,
1397       .vertex_count = vertexCount,
1398       .vertex_range = vertexCount,
1399       .first_instance = firstInstance,
1400       .instance_count = instanceCount,
1401       .padded_vertex_count =
1402          padded_vertex_count(cmdbuf, vertexCount, instanceCount),
1403       .offset_start = firstVertex,
1404    };
1405 
1406    panvk_cmd_draw(cmdbuf, &draw);
1407 }
1408 
1409 static void
panvk_index_minmax_search(struct panvk_cmd_buffer * cmdbuf,uint32_t start,uint32_t count,bool restart,uint32_t * min,uint32_t * max)1410 panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start,
1411                           uint32_t count, bool restart, uint32_t *min,
1412                           uint32_t *max)
1413 {
1414    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1415    struct panvk_instance *instance =
1416       to_panvk_instance(dev->vk.physical->instance);
1417    void *ptr =
1418       cmdbuf->state.gfx.ib.buffer->host_ptr + cmdbuf->state.gfx.ib.offset;
1419 
1420    assert(cmdbuf->state.gfx.ib.buffer);
1421    assert(cmdbuf->state.gfx.ib.buffer->bo);
1422    assert(cmdbuf->state.gfx.ib.buffer->host_ptr);
1423 
1424    if (!(instance->debug_flags & PANVK_DEBUG_NO_KNOWN_WARN)) {
1425       fprintf(
1426          stderr,
1427          "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n");
1428    }
1429 
1430    *max = 0;
1431 
1432    /* TODO: Use panfrost_minmax_cache */
1433    /* TODO: Read full cacheline of data to mitigate the uncached
1434     * mapping slowness.
1435     */
1436    switch (cmdbuf->state.gfx.ib.index_size * 8) {
1437 #define MINMAX_SEARCH_CASE(sz)                                                 \
1438    case sz: {                                                                  \
1439       uint##sz##_t *indices = ptr;                                             \
1440       *min = UINT##sz##_MAX;                                                   \
1441       for (uint32_t i = 0; i < count; i++) {                                   \
1442          if (restart && indices[i + start] == UINT##sz##_MAX)                  \
1443             continue;                                                          \
1444          *min = MIN2(indices[i + start], *min);                                \
1445          *max = MAX2(indices[i + start], *max);                                \
1446       }                                                                        \
1447       break;                                                                   \
1448    }
1449       MINMAX_SEARCH_CASE(32)
1450       MINMAX_SEARCH_CASE(16)
1451       MINMAX_SEARCH_CASE(8)
1452 #undef MINMAX_SEARCH_CASE
1453    default:
1454       unreachable("Invalid index size");
1455    }
1456 }
1457 
1458 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexed)1459 panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer,
1460                                uint32_t indexCount, uint32_t instanceCount,
1461                                uint32_t firstIndex, int32_t vertexOffset,
1462                                uint32_t firstInstance)
1463 {
1464    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1465    uint32_t min_vertex, max_vertex;
1466 
1467    if (instanceCount == 0 || indexCount == 0)
1468       return;
1469 
1470    const struct vk_input_assembly_state *ia =
1471       &cmdbuf->vk.dynamic_graphics_state.ia;
1472    bool primitive_restart = ia->primitive_restart_enable;
1473 
1474    panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart,
1475                              &min_vertex, &max_vertex);
1476 
1477    unsigned vertex_range = max_vertex - min_vertex + 1;
1478    struct panvk_draw_info draw = {
1479       .index_size = cmdbuf->state.gfx.ib.index_size,
1480       .first_index = firstIndex,
1481       .index_count = indexCount,
1482       .vertex_offset = vertexOffset,
1483       .first_instance = firstInstance,
1484       .instance_count = instanceCount,
1485       .vertex_range = vertex_range,
1486       .vertex_count = indexCount + abs(vertexOffset),
1487       .padded_vertex_count =
1488          padded_vertex_count(cmdbuf, vertex_range, instanceCount),
1489       .offset_start = min_vertex + vertexOffset,
1490       .indices = panvk_buffer_gpu_ptr(cmdbuf->state.gfx.ib.buffer,
1491                                       cmdbuf->state.gfx.ib.offset) +
1492                  (firstIndex * cmdbuf->state.gfx.ib.index_size),
1493    };
1494 
1495    panvk_cmd_draw(cmdbuf, &draw);
1496 }
1497 
1498 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndirect)1499 panvk_per_arch(CmdDrawIndirect)(VkCommandBuffer commandBuffer, VkBuffer _buffer,
1500                                 VkDeviceSize offset, uint32_t drawCount,
1501                                 uint32_t stride)
1502 {
1503    panvk_stub();
1504 }
1505 
1506 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdDrawIndexedIndirect)1507 panvk_per_arch(CmdDrawIndexedIndirect)(VkCommandBuffer commandBuffer,
1508                                        VkBuffer _buffer, VkDeviceSize offset,
1509                                        uint32_t drawCount, uint32_t stride)
1510 {
1511    panvk_stub();
1512 }
1513 
1514 static void
panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer * cmdbuf,const VkRenderingInfo * pRenderingInfo)1515 panvk_cmd_begin_rendering_init_state(struct panvk_cmd_buffer *cmdbuf,
1516                                      const VkRenderingInfo *pRenderingInfo)
1517 {
1518    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1519    struct panvk_physical_device *phys_dev =
1520       to_panvk_physical_device(dev->vk.physical);
1521    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1522    uint32_t att_width = 0, att_height = 0;
1523 
1524    cmdbuf->state.gfx.render.flags = pRenderingInfo->flags;
1525 
1526    /* Resuming from a suspended pass, the state should be unchanged. */
1527    if (cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT)
1528       return;
1529 
1530    cmdbuf->state.gfx.render.fb.bo_count = 0;
1531    memset(cmdbuf->state.gfx.render.fb.bos, 0,
1532           sizeof(cmdbuf->state.gfx.render.fb.bos));
1533    memset(cmdbuf->state.gfx.render.fb.crc_valid, 0,
1534           sizeof(cmdbuf->state.gfx.render.fb.crc_valid));
1535    memset(&cmdbuf->state.gfx.render.color_attachments, 0,
1536           sizeof(cmdbuf->state.gfx.render.color_attachments));
1537    memset(&cmdbuf->state.gfx.render.z_attachment, 0,
1538           sizeof(cmdbuf->state.gfx.render.z_attachment));
1539    memset(&cmdbuf->state.gfx.render.s_attachment, 0,
1540           sizeof(cmdbuf->state.gfx.render.s_attachment));
1541    cmdbuf->state.gfx.render.bound_attachments = 0;
1542 
1543    cmdbuf->state.gfx.render.layer_count = pRenderingInfo->layerCount;
1544    *fbinfo = (struct pan_fb_info){
1545       .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
1546       .nr_samples = 1,
1547       .rt_count = pRenderingInfo->colorAttachmentCount,
1548    };
1549 
1550    assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
1551 
1552    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1553       const VkRenderingAttachmentInfo *att =
1554          &pRenderingInfo->pColorAttachments[i];
1555       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1556 
1557       if (!iview)
1558          continue;
1559 
1560       struct panvk_image *img =
1561          container_of(iview->vk.image, struct panvk_image, vk);
1562       const VkExtent3D iview_size =
1563          vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1564 
1565       cmdbuf->state.gfx.render.bound_attachments |=
1566          MESA_VK_RP_ATTACHMENT_COLOR_BIT(i);
1567       cmdbuf->state.gfx.render.color_attachments.fmts[i] = iview->vk.format;
1568       cmdbuf->state.gfx.render.color_attachments.samples[i] = img->vk.samples;
1569       att_width = MAX2(iview_size.width, att_width);
1570       att_height = MAX2(iview_size.height, att_height);
1571 
1572       cmdbuf->state.gfx.render.fb.bos[cmdbuf->state.gfx.render.fb.bo_count++] =
1573          img->bo;
1574       fbinfo->rts[i].view = &iview->pview;
1575       fbinfo->rts[i].crc_valid = &cmdbuf->state.gfx.render.fb.crc_valid[i];
1576       fbinfo->nr_samples =
1577          MAX2(fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1578 
1579       if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1580          enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
1581          union pipe_color_union *col =
1582             (union pipe_color_union *)&att->clearValue.color;
1583 
1584          fbinfo->rts[i].clear = true;
1585          pan_pack_color(phys_dev->formats.blendable, fbinfo->rts[i].clear_value,
1586                         col, fmt, false);
1587       } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1588          fbinfo->rts[i].preload = true;
1589       }
1590 
1591       if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1592          struct panvk_resolve_attachment *resolve_info =
1593             &cmdbuf->state.gfx.render.color_attachments.resolve[i];
1594          VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
1595 
1596          resolve_info->mode = att->resolveMode;
1597          resolve_info->src_iview = iview;
1598          resolve_info->dst_iview = resolve_iview;
1599       }
1600    }
1601 
1602    if (pRenderingInfo->pDepthAttachment &&
1603        pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
1604       const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
1605       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1606       struct panvk_image *img =
1607          container_of(iview->vk.image, struct panvk_image, vk);
1608       const VkExtent3D iview_size =
1609          vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1610 
1611       if (iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1612          cmdbuf->state.gfx.render.bound_attachments |=
1613             MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
1614          att_width = MAX2(iview_size.width, att_width);
1615          att_height = MAX2(iview_size.height, att_height);
1616 
1617          cmdbuf->state.gfx.render.fb
1618             .bos[cmdbuf->state.gfx.render.fb.bo_count++] = img->bo;
1619          fbinfo->zs.view.zs = &iview->pview;
1620          fbinfo->nr_samples = MAX2(
1621             fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1622 
1623          if (vk_format_has_stencil(img->vk.format))
1624             fbinfo->zs.preload.s = true;
1625 
1626          if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1627             fbinfo->zs.clear.z = true;
1628             fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
1629          } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1630             fbinfo->zs.preload.z = true;
1631          }
1632 
1633          if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1634             struct panvk_resolve_attachment *resolve_info =
1635                &cmdbuf->state.gfx.render.z_attachment.resolve;
1636             VK_FROM_HANDLE(panvk_image_view, resolve_iview,
1637                            att->resolveImageView);
1638 
1639             resolve_info->mode = att->resolveMode;
1640             resolve_info->src_iview = iview;
1641             resolve_info->dst_iview = resolve_iview;
1642          }
1643       }
1644    }
1645 
1646    if (pRenderingInfo->pStencilAttachment &&
1647        pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
1648       const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
1649       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
1650       struct panvk_image *img =
1651          container_of(iview->vk.image, struct panvk_image, vk);
1652       const VkExtent3D iview_size =
1653          vk_image_mip_level_extent(&img->vk, iview->vk.base_mip_level);
1654 
1655       if (iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1656          cmdbuf->state.gfx.render.bound_attachments |=
1657             MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
1658          att_width = MAX2(iview_size.width, att_width);
1659          att_height = MAX2(iview_size.height, att_height);
1660 
1661          cmdbuf->state.gfx.render.fb
1662             .bos[cmdbuf->state.gfx.render.fb.bo_count++] = img->bo;
1663 
1664          if (drm_is_afbc(img->pimage.layout.modifier)) {
1665             assert(fbinfo->zs.view.zs == &iview->pview || !fbinfo->zs.view.zs);
1666             fbinfo->zs.view.zs = &iview->pview;
1667          } else {
1668             fbinfo->zs.view.s =
1669                &iview->pview != fbinfo->zs.view.zs ? &iview->pview : NULL;
1670          }
1671 
1672          fbinfo->zs.view.s =
1673             &iview->pview != fbinfo->zs.view.zs ? &iview->pview : NULL;
1674          fbinfo->nr_samples = MAX2(
1675             fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
1676 
1677          if (vk_format_has_depth(img->vk.format)) {
1678             assert(fbinfo->zs.view.zs == NULL ||
1679                    &iview->pview == fbinfo->zs.view.zs);
1680             fbinfo->zs.view.zs = &iview->pview;
1681 
1682             fbinfo->zs.preload.s = false;
1683             fbinfo->zs.clear.s = false;
1684             if (!fbinfo->zs.clear.z)
1685                fbinfo->zs.preload.z = true;
1686          }
1687 
1688          if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1689             fbinfo->zs.clear.s = true;
1690             fbinfo->zs.clear_value.stencil =
1691                att->clearValue.depthStencil.stencil;
1692          } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
1693             fbinfo->zs.preload.s = true;
1694          }
1695 
1696          if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
1697             struct panvk_resolve_attachment *resolve_info =
1698                &cmdbuf->state.gfx.render.s_attachment.resolve;
1699             VK_FROM_HANDLE(panvk_image_view, resolve_iview,
1700                            att->resolveImageView);
1701 
1702             resolve_info->mode = att->resolveMode;
1703             resolve_info->src_iview = iview;
1704             resolve_info->dst_iview = resolve_iview;
1705          }
1706       }
1707    }
1708 
1709    if (fbinfo->zs.view.zs) {
1710       const struct util_format_description *fdesc =
1711          util_format_description(fbinfo->zs.view.zs->format);
1712       bool needs_depth = fbinfo->zs.clear.z | fbinfo->zs.preload.z |
1713                          util_format_has_depth(fdesc);
1714       bool needs_stencil = fbinfo->zs.clear.s | fbinfo->zs.preload.s |
1715                            util_format_has_stencil(fdesc);
1716       enum pipe_format new_fmt =
1717          util_format_get_blocksize(fbinfo->zs.view.zs->format) == 4
1718             ? PIPE_FORMAT_Z24_UNORM_S8_UINT
1719             : PIPE_FORMAT_Z32_FLOAT_S8X24_UINT;
1720 
1721       if (needs_depth && needs_stencil &&
1722           fbinfo->zs.view.zs->format != new_fmt) {
1723          cmdbuf->state.gfx.render.zs_pview = *fbinfo->zs.view.zs;
1724          cmdbuf->state.gfx.render.zs_pview.format = new_fmt;
1725          fbinfo->zs.view.zs = &cmdbuf->state.gfx.render.zs_pview;
1726       }
1727    }
1728 
1729    fbinfo->extent.minx = pRenderingInfo->renderArea.offset.x;
1730    fbinfo->extent.maxx = pRenderingInfo->renderArea.offset.x +
1731                          pRenderingInfo->renderArea.extent.width - 1;
1732    fbinfo->extent.miny = pRenderingInfo->renderArea.offset.y;
1733    fbinfo->extent.maxy = pRenderingInfo->renderArea.offset.y +
1734                          pRenderingInfo->renderArea.extent.height - 1;
1735 
1736    if (cmdbuf->state.gfx.render.bound_attachments) {
1737       fbinfo->width = att_width;
1738       fbinfo->height = att_height;
1739    } else {
1740       fbinfo->width = fbinfo->extent.maxx + 1;
1741       fbinfo->height = fbinfo->extent.maxy + 1;
1742    }
1743 
1744    assert(fbinfo->width && fbinfo->height);
1745 
1746    /* We need to re-emit the FS RSD when the color attachments change. */
1747    cmdbuf->state.gfx.fs.rsd = 0;
1748 }
1749 
1750 static void
preload_render_area_border(struct panvk_cmd_buffer * cmdbuf,const VkRenderingInfo * render_info)1751 preload_render_area_border(struct panvk_cmd_buffer *cmdbuf,
1752                            const VkRenderingInfo *render_info)
1753 {
1754    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1755    bool render_area_is_32x32_aligned =
1756       ((fbinfo->extent.minx | fbinfo->extent.miny) % 32) == 0 &&
1757       (fbinfo->extent.maxx + 1 == fbinfo->width ||
1758        (fbinfo->extent.maxx % 32) == 31) &&
1759       (fbinfo->extent.maxy + 1 == fbinfo->height ||
1760        (fbinfo->extent.maxy % 32) == 31);
1761 
1762    /* If the render area is aligned on a 32x32 section, we're good. */
1763    if (render_area_is_32x32_aligned)
1764       return;
1765 
1766    /* We force preloading for all active attachments to preverse content falling
1767     * outside the render area, but we need to compensate with attachment clears
1768     * for attachments that were initially cleared.
1769     */
1770    uint32_t bound_atts = cmdbuf->state.gfx.render.bound_attachments;
1771    VkClearAttachment clear_atts[MAX_RTS + 2];
1772    uint32_t clear_att_count = 0;
1773 
1774    for (uint32_t i = 0; i < render_info->colorAttachmentCount; i++) {
1775       if (bound_atts & MESA_VK_RP_ATTACHMENT_COLOR_BIT(i)) {
1776          if (fbinfo->rts[i].clear) {
1777             const VkRenderingAttachmentInfo *att =
1778                &render_info->pColorAttachments[i];
1779 
1780             clear_atts[clear_att_count++] = (VkClearAttachment){
1781                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1782                .colorAttachment = i,
1783                .clearValue = att->clearValue,
1784             };
1785          }
1786 
1787          fbinfo->rts[i].preload = true;
1788          fbinfo->rts[i].clear = false;
1789       }
1790    }
1791 
1792    if (bound_atts & MESA_VK_RP_ATTACHMENT_DEPTH_BIT) {
1793       if (fbinfo->zs.clear.z) {
1794          const VkRenderingAttachmentInfo *att = render_info->pDepthAttachment;
1795 
1796          clear_atts[clear_att_count++] = (VkClearAttachment){
1797             .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
1798             .clearValue = att->clearValue,
1799          };
1800       }
1801 
1802       fbinfo->zs.preload.z = true;
1803       fbinfo->zs.clear.z = false;
1804    }
1805 
1806    if (bound_atts & MESA_VK_RP_ATTACHMENT_STENCIL_BIT) {
1807       if (fbinfo->zs.clear.s) {
1808          const VkRenderingAttachmentInfo *att = render_info->pStencilAttachment;
1809 
1810          clear_atts[clear_att_count++] = (VkClearAttachment){
1811             .aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
1812             .clearValue = att->clearValue,
1813          };
1814       }
1815 
1816       fbinfo->zs.preload.s = true;
1817       fbinfo->zs.clear.s = false;
1818    }
1819 
1820    if (clear_att_count) {
1821       VkClearRect clear_rect = {
1822          .rect = render_info->renderArea,
1823          .baseArrayLayer = 0,
1824          .layerCount = render_info->layerCount,
1825       };
1826 
1827       panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
1828                                           clear_att_count, clear_atts, 1,
1829                                           &clear_rect);
1830    }
1831 }
1832 
1833 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBeginRendering)1834 panvk_per_arch(CmdBeginRendering)(VkCommandBuffer commandBuffer,
1835                                   const VkRenderingInfo *pRenderingInfo)
1836 {
1837    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1838 
1839    panvk_cmd_begin_rendering_init_state(cmdbuf, pRenderingInfo);
1840 
1841    bool resuming = cmdbuf->state.gfx.render.flags & VK_RENDERING_RESUMING_BIT;
1842 
1843    /* If we're not resuming, cur_batch should be NULL.
1844     * However, this currently isn't true because of how events are implemented.
1845     * XXX: Rewrite events to not close and open batch and add an assert here.
1846     */
1847    if (cmdbuf->cur_batch && !resuming)
1848       panvk_per_arch(cmd_close_batch)(cmdbuf);
1849 
1850    /* The opened batch might have been disrupted by a compute job.
1851     * We need to preload in that case. */
1852    if (resuming && !cmdbuf->cur_batch)
1853       panvk_per_arch(cmd_preload_fb_after_batch_split)(cmdbuf);
1854 
1855    if (!cmdbuf->cur_batch)
1856       panvk_per_arch(cmd_open_batch)(cmdbuf);
1857 
1858    if (!resuming)
1859       preload_render_area_border(cmdbuf, pRenderingInfo);
1860 }
1861 
1862 static void
resolve_attachments(struct panvk_cmd_buffer * cmdbuf)1863 resolve_attachments(struct panvk_cmd_buffer *cmdbuf)
1864 {
1865    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1866    bool needs_resolve = false;
1867 
1868    unsigned bound_atts = cmdbuf->state.gfx.render.bound_attachments;
1869    unsigned color_att_count =
1870       util_last_bit(bound_atts & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS);
1871    VkRenderingAttachmentInfo color_atts[MAX_RTS];
1872    for (uint32_t i = 0; i < color_att_count; i++) {
1873       const struct panvk_resolve_attachment *resolve_info =
1874          &cmdbuf->state.gfx.render.color_attachments.resolve[i];
1875 
1876       color_atts[i] = (VkRenderingAttachmentInfo){
1877          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1878          .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1879          .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1880          .resolveMode = resolve_info->mode,
1881          .resolveImageView =
1882             panvk_image_view_to_handle(resolve_info->dst_iview),
1883          .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1884       };
1885 
1886       if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1887          needs_resolve = true;
1888    }
1889 
1890    const struct panvk_resolve_attachment *resolve_info =
1891       &cmdbuf->state.gfx.render.z_attachment.resolve;
1892    VkRenderingAttachmentInfo z_att = {
1893       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1894       .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1895       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1896       .resolveMode = resolve_info->mode,
1897       .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
1898       .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1899    };
1900 
1901    if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1902       needs_resolve = true;
1903 
1904    VkRenderingAttachmentInfo s_att = {
1905       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1906       .imageView = panvk_image_view_to_handle(resolve_info->src_iview),
1907       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1908       .resolveMode = resolve_info->mode,
1909       .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
1910       .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
1911    };
1912 
1913    if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
1914       needs_resolve = true;
1915 
1916    if (!needs_resolve)
1917       return;
1918 
1919    const VkRenderingInfo render_info = {
1920       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1921       .renderArea = {
1922          .offset.x = fbinfo->extent.minx,
1923          .offset.y = fbinfo->extent.miny,
1924          .extent.width = fbinfo->extent.maxx - fbinfo->extent.minx + 1,
1925          .extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
1926       },
1927       .layerCount = cmdbuf->state.gfx.render.layer_count,
1928       .viewMask = 0,
1929       .colorAttachmentCount = color_att_count,
1930       .pColorAttachments = color_atts,
1931       .pDepthAttachment = &z_att,
1932       .pStencilAttachment = &s_att,
1933    };
1934 
1935    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
1936    struct panvk_cmd_meta_graphics_save_ctx save = {0};
1937 
1938    panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save);
1939    vk_meta_resolve_rendering(&cmdbuf->vk, &dev->meta, &render_info);
1940    panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save);
1941 }
1942 
1943 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdEndRendering)1944 panvk_per_arch(CmdEndRendering)(VkCommandBuffer commandBuffer)
1945 {
1946    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1947 
1948    if (!(cmdbuf->state.gfx.render.flags & VK_RENDERING_SUSPENDING_BIT)) {
1949       struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
1950       bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s;
1951       for (unsigned i = 0; i < fbinfo->rt_count; i++)
1952          clear |= fbinfo->rts[i].clear;
1953 
1954       if (clear)
1955          panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1956 
1957       panvk_per_arch(cmd_close_batch)(cmdbuf);
1958       cmdbuf->cur_batch = NULL;
1959       resolve_attachments(cmdbuf);
1960    }
1961 }
1962 
1963 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindVertexBuffers)1964 panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer,
1965                                      uint32_t firstBinding,
1966                                      uint32_t bindingCount,
1967                                      const VkBuffer *pBuffers,
1968                                      const VkDeviceSize *pOffsets)
1969 {
1970    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1971 
1972    assert(firstBinding + bindingCount <= MAX_VBS);
1973 
1974    for (uint32_t i = 0; i < bindingCount; i++) {
1975       VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
1976 
1977       cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
1978          panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
1979       cmdbuf->state.gfx.vb.bufs[firstBinding + i].size =
1980          panvk_buffer_range(buffer, pOffsets[i], VK_WHOLE_SIZE);
1981    }
1982 
1983    cmdbuf->state.gfx.vb.count =
1984       MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
1985    cmdbuf->state.gfx.vs.attrib_bufs = 0;
1986 }
1987 
1988 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindIndexBuffer)1989 panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer,
1990                                    VkBuffer buffer, VkDeviceSize offset,
1991                                    VkIndexType indexType)
1992 {
1993    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1994    VK_FROM_HANDLE(panvk_buffer, buf, buffer);
1995 
1996    cmdbuf->state.gfx.ib.buffer = buf;
1997    cmdbuf->state.gfx.ib.offset = offset;
1998    cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
1999 }
2000