xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_hw_pass.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30 
31 #include "hwdef/rogue_hw_defs.h"
32 #include "hwdef/rogue_hw_utils.h"
33 #include "pvr_hw_pass.h"
34 #include "pvr_private.h"
35 #include "util/bitset.h"
36 #include "util/list.h"
37 #include "util/macros.h"
38 #include "util/u_math.h"
39 #include "vk_alloc.h"
40 #include "vk_format.h"
41 #include "vk_log.h"
42 
43 struct pvr_render_int_subpass {
44    /* Points to the input subpass. This is set to NULL when the subpass is
45     * unscheduled.
46     */
47    struct pvr_render_subpass *subpass;
48 
49    /* Count of other subpasses which have this subpass as a dependency. */
50    uint32_t out_subpass_count;
51 
52    /* Pointers to the other subpasses which have this subpass as a dependency.
53     */
54    struct pvr_render_int_subpass **out_subpasses;
55 
56    /* Count of subpasses on which this subpass is dependent and which haven't
57     * been scheduled yet.
58     */
59    uint32_t in_subpass_count;
60 };
61 
62 struct pvr_renderpass_resource {
63    /* Resource type allocated for render target. */
64    enum usc_mrt_resource_type type;
65 
66    union {
67       /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
68       struct {
69          /* The output register to use. */
70          uint32_t output_reg;
71 
72          /* The offset in bytes within the output register. */
73          uint32_t offset;
74       } reg;
75 
76       /* If type == USC_MRT_RESOURCE_TYPE_MEMORY.  */
77       struct {
78          /* The index of the tile buffer to use. */
79          uint32_t tile_buffer;
80 
81          /* The offset (in dwords) within the tile buffer. */
82          uint32_t offset_dw;
83       } mem;
84    };
85 };
86 
87 struct pvr_render_int_attachment {
88    /* Points to the corresponding input attachment. */
89    struct pvr_render_pass_attachment *attachment;
90 
91    /* True if this attachment is referenced in the currently open render. */
92    bool is_used;
93 
94    /* Operation to use when this attachment is non-resident and referenced as a
95     * color or depth attachment.
96     */
97    VkAttachmentLoadOp load_op;
98 
99    /* Operation to use for the stencil component when this attachment is
100     * non-resident and referenced as a color or depth attachment.
101     */
102    VkAttachmentLoadOp stencil_load_op;
103 
104    /* Count of uses of this attachment in unscheduled subpasses. */
105    uint32_t remaining_count;
106 
107    /* Count of uses of the stencil component of this attachment in unscheduled
108     * subpasses.
109     */
110    uint32_t stencil_remaining_count;
111 
112    /* If this attachment has currently allocated on-chip storage then details of
113     * the allocated location.
114     */
115    struct usc_mrt_resource resource;
116 
117    /* Index of the subpass in the current render where the attachment is first
118     * used. VK_ATTACHMENT_UNUSED if the attachment isn't used in the current
119     * render.
120     */
121    int32_t first_use;
122 
123    /* Index of the subpass in the current render where the attachment is last
124     * used.
125     */
126    int32_t last_use;
127 
128    /* Index of the subpass (global) where the attachment is last read. */
129    int32_t last_read;
130 
131    /* If this attachment has currently allocated on-chip storage then the entry
132     * in context.active_surf_list.
133     */
134    struct list_head link;
135 
136    /* During pvr_close_render: if this attachment has allocated on-chip storage
137     * then the index in pvr_renderpass_hwsetup_render.eot_setup.mrt_resources
138     * with details of the storage location. Otherwise -1.
139     */
140    int32_t mrt_idx;
141 
142    /* Index of the last render where the attachment was the source of an MSAA
143     * resolve.
144     */
145    int32_t last_resolve_src_render;
146 
147    /* Index of the last render where the attachment was the destination of an
148     * MSAA resolve.
149     */
150    int32_t last_resolve_dst_render;
151 
152    /* true if the attachment is used with a z replicate in the current render.
153     */
154    bool z_replicate;
155 
156    /* true if this attachment can be resolved by the PBE. */
157    bool is_pbe_downscalable;
158 
159    /* true if this attachment requires an EOT attachment. */
160    bool eot_surf_required;
161 };
162 
163 /* Which parts of the output registers/a tile buffer are currently allocated. */
164 struct pvr_renderpass_alloc_buffer {
165    /* Bit array. A bit is set if the corresponding dword is allocated. */
166    BITSET_DECLARE(allocs, 8U);
167 };
168 
169 struct pvr_renderpass_alloc {
170    /* Which pixel output registers are allocated. */
171    struct pvr_renderpass_alloc_buffer output_reg;
172 
173    /* Range of allocated output registers. */
174    uint32_t output_regs_count;
175 
176    /* Number of tile buffers allocated. */
177    uint32_t tile_buffers_count;
178 
179    /* Which parts of each tile buffer are allocated. Length is
180     * tile_buffers_count.
181     */
182    struct pvr_renderpass_alloc_buffer *tile_buffers;
183 };
184 
185 struct pvr_renderpass_subpass {
186    /* A pointer to the input subpass description. */
187    struct pvr_render_subpass *input_subpass;
188 
189    /* true if the depth attachment for this subpass has z replication enabled.
190     */
191    bool z_replicate;
192 
193    /* Which pixel output registers/tile buffer locations are allocated during
194     * this subpass.
195     */
196    struct pvr_renderpass_alloc alloc;
197 };
198 
199 struct pvr_renderpass_context {
200    /* Internal information about each input attachment. */
201    struct pvr_render_int_attachment *int_attach;
202 
203    /* Internal information about each input subpass. */
204    struct pvr_render_int_subpass *int_subpasses;
205 
206    /* Input structure. */
207    struct pvr_render_pass *pass;
208 
209    /* Output structure. */
210    struct pvr_renderpass_hwsetup *hw_setup;
211 
212    /* In-progress render. */
213    struct pvr_renderpass_hwsetup_render *hw_render;
214 
215    /* Information about each subpass in the current render. */
216    struct pvr_renderpass_subpass *subpasses;
217 
218    /* Which parts of color storage are currently allocated. */
219    struct pvr_renderpass_alloc alloc;
220 
221    /* Attachment which is currently allocated the on-chip depth/stencil. */
222    struct pvr_render_int_attachment *int_ds_attach;
223 
224    /* Attachment which is loaded into the on-chip depth/stencil at the start of
225     * the render.
226     */
227    struct pvr_render_int_attachment *ds_load_surface;
228 
229    /* Attachment which the depth/stencil attachment should be resolved to at the
230     * end of the render.
231     */
232    struct pvr_render_int_attachment *ds_resolve_surface;
233 
234    /* Count of surfaces which are allocated on-chip color storage. */
235    uint32_t active_surfaces;
236 
237    /* List of attachment/ranges which are allocated on-chip color storage. */
238    struct list_head active_surf_list;
239 
240    const VkAllocationCallbacks *allocator;
241 };
242 
243 struct pvr_render_int_subpass_dsts {
244    struct pvr_renderpass_resource *color;
245    struct pvr_renderpass_resource incoming_zrep;
246    struct pvr_renderpass_resource existing_zrep;
247 };
248 
249 struct pvr_render_subpass_depth_params {
250    bool existing_ds_is_input;
251    bool incoming_ds_is_input;
252    uint32_t existing_ds_attach;
253 };
254 
255 struct pvr_renderpass_storage_firstuse_buffer {
256    /* For each pixel output register/tile buffer location: true if the output
257     * register has been allocated in the current render.
258     */
259    bool used[8U];
260 };
261 
262 struct pvr_renderpass_storage_firstuse {
263    /* First use information for pixel output registers. */
264    struct pvr_renderpass_storage_firstuse_buffer output_reg;
265 
266    /* First use information for tile buffers. */
267    struct pvr_renderpass_storage_firstuse_buffer *tile_buffers;
268 };
269 
270 /** Copy information about allocated color storage. */
pvr_copy_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)271 static VkResult pvr_copy_alloc(struct pvr_renderpass_context *ctx,
272                                struct pvr_renderpass_alloc *dst,
273                                struct pvr_renderpass_alloc *src)
274 {
275    dst->output_reg = src->output_reg;
276    dst->output_regs_count = src->output_regs_count;
277 
278    dst->tile_buffers_count = src->tile_buffers_count;
279    if (dst->tile_buffers_count > 0U) {
280       dst->tile_buffers =
281          vk_alloc(ctx->allocator,
282                   sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count,
283                   8,
284                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
285       if (!dst->tile_buffers)
286          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
287 
288       memcpy(dst->tile_buffers,
289              src->tile_buffers,
290              sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count);
291    } else {
292       dst->tile_buffers = NULL;
293    }
294 
295    return VK_SUCCESS;
296 }
297 
298 /** Free information about allocated color storage. */
pvr_free_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc)299 static void pvr_free_alloc(struct pvr_renderpass_context *ctx,
300                            struct pvr_renderpass_alloc *alloc)
301 {
302    if (alloc->tile_buffers)
303       vk_free(ctx->allocator, alloc->tile_buffers);
304 
305    memset(alloc, 0U, sizeof(*alloc));
306 }
307 
pvr_reset_render(struct pvr_renderpass_context * ctx)308 static void pvr_reset_render(struct pvr_renderpass_context *ctx)
309 {
310    ctx->int_ds_attach = NULL;
311    ctx->active_surfaces = 0U;
312    list_inithead(&ctx->active_surf_list);
313 
314    memset(&ctx->alloc.output_reg, 0U, sizeof(ctx->alloc.output_reg));
315    ctx->alloc.output_regs_count = 0U;
316    ctx->alloc.tile_buffers_count = 0U;
317    ctx->alloc.tile_buffers = NULL;
318 
319    ctx->hw_render = NULL;
320    ctx->subpasses = NULL;
321    ctx->ds_load_surface = NULL;
322 }
323 
324 /** Gets the amount of memory to allocate per-core for a tile buffer. */
325 static uint32_t
pvr_get_tile_buffer_size_per_core(const struct pvr_device * device)326 pvr_get_tile_buffer_size_per_core(const struct pvr_device *device)
327 {
328    uint32_t clusters =
329       PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, num_clusters, 1U);
330 
331    /* Round the number of clusters up to the next power of two. */
332    if (!PVR_HAS_FEATURE(&device->pdevice->dev_info, tile_per_usc))
333       clusters = util_next_power_of_two(clusters);
334 
335    /* Tile buffer is (total number of partitions across all clusters) * 16 * 16
336     * (quadrant size in pixels).
337     */
338    return device->pdevice->dev_runtime_info.total_reserved_partition_size *
339           clusters * sizeof(uint32_t);
340 }
341 
342 /**
343  * Gets the amount of memory to allocate for a tile buffer on the current BVNC.
344  */
pvr_get_tile_buffer_size(const struct pvr_device * device)345 uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device)
346 {
347    /* On a multicore system duplicate the buffer for each core. */
348    return pvr_get_tile_buffer_size_per_core(device) *
349           rogue_get_max_num_cores(&device->pdevice->dev_info);
350 }
351 
352 static void
pvr_finalise_mrt_setup(const struct pvr_device * device,struct pvr_renderpass_hwsetup_render * hw_render,struct usc_mrt_setup * mrt)353 pvr_finalise_mrt_setup(const struct pvr_device *device,
354                        struct pvr_renderpass_hwsetup_render *hw_render,
355                        struct usc_mrt_setup *mrt)
356 {
357    mrt->num_output_regs = hw_render->output_regs_count;
358    mrt->num_tile_buffers = hw_render->tile_buffers_count;
359    mrt->tile_buffer_size = pvr_get_tile_buffer_size(device);
360 }
361 
362 /**
363  * Copy information about the number of pixel output registers and tile buffers
364  * required for the current render to the output structure.
365  */
pvr_finalise_po_alloc(const struct pvr_device * device,struct pvr_renderpass_context * ctx)366 static void pvr_finalise_po_alloc(const struct pvr_device *device,
367                                   struct pvr_renderpass_context *ctx)
368 {
369    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
370 
371    /* The number of output registers must be a power of two. */
372    hw_render->output_regs_count =
373       util_next_power_of_two(ctx->alloc.output_regs_count);
374 
375    assert(ctx->alloc.tile_buffers_count <= ctx->pass->max_tilebuffer_count);
376    hw_render->tile_buffers_count = ctx->alloc.tile_buffers_count;
377 
378    /* Copy the number of output registers and tile buffers to each subpass. */
379    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
380       struct pvr_renderpass_hwsetup_subpass *hw_subpass =
381          &hw_render->subpasses[i];
382 
383       pvr_finalise_mrt_setup(device, hw_render, &hw_subpass->setup);
384    }
385 
386    pvr_finalise_mrt_setup(device, hw_render, &hw_render->init_setup);
387    pvr_finalise_mrt_setup(device, hw_render, &hw_render->eot_setup);
388 }
389 
390 /** Mark that device memory must be allocated for an attachment. */
pvr_mark_surface_alloc(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)391 static void pvr_mark_surface_alloc(struct pvr_renderpass_context *ctx,
392                                    struct pvr_render_int_attachment *int_attach)
393 {
394    const uint32_t attach_idx = int_attach - ctx->int_attach;
395 
396    assert(attach_idx < ctx->pass->attachment_count);
397    ctx->hw_setup->surface_allocate[attach_idx] = true;
398 }
399 
400 /**
401  * Check if there is space in a buffer for storing a render target of a
402  * specified size.
403  */
404 static int32_t
pvr_is_space_in_buffer(const struct pvr_device_info * dev_info,struct pvr_renderpass_alloc_buffer * buffer,uint32_t pixel_size)405 pvr_is_space_in_buffer(const struct pvr_device_info *dev_info,
406                        struct pvr_renderpass_alloc_buffer *buffer,
407                        uint32_t pixel_size)
408 {
409    const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info);
410    uint32_t alignment = 1U;
411 
412    if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) {
413       /* For a 64-bit/128-bit source format: the start offset must be even. */
414       if (pixel_size == 2U || pixel_size == 4U)
415          alignment = 2U;
416    }
417 
418    assert(pixel_size <= max_out_regs);
419 
420    for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) {
421       if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U))
422          return i;
423    }
424 
425    return -1;
426 }
427 
428 static VkResult
pvr_surface_setup_render_init(struct pvr_renderpass_context * ctx,struct pvr_renderpass_storage_firstuse * first_use,struct usc_mrt_resource const * resource,struct pvr_render_pass_attachment * attachment,VkAttachmentLoadOp load_op,bool * use_render_init)429 pvr_surface_setup_render_init(struct pvr_renderpass_context *ctx,
430                               struct pvr_renderpass_storage_firstuse *first_use,
431                               struct usc_mrt_resource const *resource,
432                               struct pvr_render_pass_attachment *attachment,
433                               VkAttachmentLoadOp load_op,
434                               bool *use_render_init)
435 {
436    const uint32_t pixel_size =
437       DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
438    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
439    struct pvr_renderpass_storage_firstuse_buffer *buffer;
440    uint32_t start;
441 
442    /* Check if this is the first use of all the allocated registers. */
443    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
444       buffer = &first_use->output_reg;
445       start = resource->reg.output_reg;
446    } else {
447       assert(resource->mem.tile_buffer < ctx->alloc.tile_buffers_count);
448       buffer = &first_use->tile_buffers[resource->mem.tile_buffer];
449       start = resource->mem.offset_dw;
450    }
451 
452    *use_render_init = true;
453    for (uint32_t i = 0U; i < pixel_size; i++) {
454       /* Don't initialize at the render level if the output registers were
455        * previously allocated a different attachment.
456        */
457       if (buffer->used[start + i])
458          *use_render_init = false;
459 
460       /* Don't use render init for future attachments allocated to the same
461        * registers.
462        */
463       buffer->used[start + i] = true;
464    }
465 
466    if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
467       *use_render_init = false;
468 
469    if (*use_render_init) {
470       struct pvr_renderpass_colorinit *new_color_init;
471       struct usc_mrt_resource *new_mrt;
472 
473       /* Initialize the storage at the start of the render. */
474       new_color_init = vk_realloc(ctx->allocator,
475                                   hw_render->color_init,
476                                   sizeof(hw_render->color_init[0U]) *
477                                      (hw_render->color_init_count + 1U),
478                                   8U,
479                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
480       if (!new_color_init)
481          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
482 
483       hw_render->color_init = new_color_init;
484       hw_render->color_init[hw_render->color_init_count].index =
485          attachment->index;
486       hw_render->color_init[hw_render->color_init_count].op = load_op;
487 
488       /* Set the destination for the attachment load/clear. */
489       assert(hw_render->init_setup.num_render_targets ==
490              hw_render->color_init_count);
491 
492       new_mrt = vk_realloc(ctx->allocator,
493                            hw_render->init_setup.mrt_resources,
494                            sizeof(hw_render->init_setup.mrt_resources[0U]) *
495                               (hw_render->init_setup.num_render_targets + 1U),
496                            8U,
497                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
498       if (!new_mrt)
499          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
500 
501       hw_render->init_setup.mrt_resources = new_mrt;
502       hw_render->init_setup
503          .mrt_resources[hw_render->init_setup.num_render_targets] = *resource;
504       hw_render->init_setup.num_render_targets++;
505 
506       hw_render->color_init_count++;
507    }
508 
509    return VK_SUCCESS;
510 }
511 
512 static VkResult
pvr_subpass_setup_render_init(struct pvr_renderpass_context * ctx)513 pvr_subpass_setup_render_init(struct pvr_renderpass_context *ctx)
514 {
515    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
516    struct pvr_renderpass_storage_firstuse first_use = { 0 };
517    bool first_ds = true;
518    VkResult result;
519 
520    if (ctx->alloc.tile_buffers_count > 0U) {
521       first_use.tile_buffers = vk_zalloc(ctx->allocator,
522                                          sizeof(first_use.tile_buffers[0U]) *
523                                             ctx->alloc.tile_buffers_count,
524                                          8,
525                                          VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
526       if (!first_use.tile_buffers)
527          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
528    }
529 
530    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
531       struct pvr_renderpass_hwsetup_subpass *hw_subpass =
532          &hw_render->subpasses[i];
533       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
534       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
535 
536       /* If this is the first depth attachment in the render then clear at the
537        * render level, not the subpass level.
538        */
539       if (first_ds &&
540           (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR ||
541            hw_subpass->stencil_clear)) {
542          struct pvr_render_int_attachment *int_ds_attach;
543 
544          assert(input_subpass->depth_stencil_attachment !=
545                 VK_ATTACHMENT_UNUSED);
546          assert(input_subpass->depth_stencil_attachment <
547                 ctx->pass->attachment_count);
548          int_ds_attach =
549             &ctx->int_attach[input_subpass->depth_stencil_attachment];
550 
551          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
552                 hw_render->ds_attach_idx == int_ds_attach->attachment->index);
553          hw_render->ds_attach_idx = int_ds_attach->attachment->index;
554 
555          if (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)
556             hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
557 
558          if (hw_subpass->stencil_clear) {
559             hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
560             hw_subpass->stencil_clear = false;
561          }
562       }
563 
564       if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
565          first_ds = false;
566 
567       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
568          struct usc_mrt_resource *mrt = &hw_subpass->setup.mrt_resources[j];
569          const uint32_t attach_idx = input_subpass->color_attachments[j];
570          struct pvr_render_int_attachment *int_attach;
571 
572          if (attach_idx == VK_ATTACHMENT_UNUSED)
573             continue;
574 
575          int_attach = &ctx->int_attach[attach_idx];
576 
577          assert(vk_format_get_blocksizebits(int_attach->attachment->vk_format) >
578                 0U);
579 
580          /* Is this the first use of the attachment? */
581          if (int_attach->first_use == (int32_t)i) {
582             /* Set if we should initialize the attachment storage at the
583              * render level.
584              */
585             bool use_render_init;
586             result = pvr_surface_setup_render_init(ctx,
587                                                    &first_use,
588                                                    mrt,
589                                                    int_attach->attachment,
590                                                    hw_subpass->color_initops[j],
591                                                    &use_render_init);
592             if (result != VK_SUCCESS) {
593                if (!first_use.tile_buffers)
594                   free(first_use.tile_buffers);
595 
596                return result;
597             }
598 
599             /* On success don't initialize the attachment at the subpass level.
600              */
601             if (use_render_init)
602                hw_subpass->color_initops[j] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
603          } else {
604             /* This attachment is already present in on-chip storage so don't
605              * do anything.
606              */
607             assert(hw_subpass->color_initops[j] ==
608                    VK_ATTACHMENT_LOAD_OP_DONT_CARE);
609          }
610       }
611    }
612 
613    if (!first_use.tile_buffers)
614       free(first_use.tile_buffers);
615 
616    return VK_SUCCESS;
617 }
618 
619 static void
pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer * buffer,uint32_t start,uint32_t pixel_size)620 pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer *buffer,
621                                      uint32_t start,
622                                      uint32_t pixel_size)
623 {
624    assert(!BITSET_TEST_RANGE(buffer->allocs, start, start + pixel_size - 1U));
625    BITSET_SET_RANGE(buffer->allocs, start, start + pixel_size - 1U);
626 }
627 
628 static VkResult
pvr_mark_storage_allocated(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)629 pvr_mark_storage_allocated(struct pvr_renderpass_context *ctx,
630                            struct pvr_renderpass_alloc *alloc,
631                            struct pvr_render_pass_attachment *attachment,
632                            struct pvr_renderpass_resource *resource)
633 {
634    /* Number of dwords to allocate for the attachment. */
635    const uint32_t pixel_size =
636       DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
637 
638    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
639       /* Update the locations used in the pixel output registers. */
640       pvr_mark_storage_allocated_in_buffer(&alloc->output_reg,
641                                            resource->reg.output_reg,
642                                            pixel_size);
643 
644       /* Update the range of pixel output registers used. */
645       alloc->output_regs_count =
646          MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size);
647    } else {
648       assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
649 
650       if (resource->mem.tile_buffer >= alloc->tile_buffers_count) {
651          /* Grow the number of tile buffers. */
652          struct pvr_renderpass_alloc_buffer *new_tile_buffers = vk_realloc(
653             ctx->allocator,
654             alloc->tile_buffers,
655             sizeof(alloc->tile_buffers[0U]) * (resource->mem.tile_buffer + 1U),
656             8U,
657             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
658          if (!new_tile_buffers)
659             return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
660 
661          alloc->tile_buffers = new_tile_buffers;
662          memset(
663             &alloc->tile_buffers[alloc->tile_buffers_count],
664             0U,
665             sizeof(alloc->tile_buffers[0U]) *
666                (resource->mem.tile_buffer + 1U - alloc->tile_buffers_count));
667          alloc->tile_buffers_count = resource->mem.tile_buffer + 1U;
668          assert(alloc->tile_buffers_count <= ctx->pass->max_tilebuffer_count);
669       }
670 
671       /* Update the locations used in the tile buffer. */
672       pvr_mark_storage_allocated_in_buffer(
673          &alloc->tile_buffers[resource->mem.tile_buffer],
674          resource->mem.offset_dw,
675          pixel_size);
676 
677       /* The hardware makes the bit depth of the on-chip storage and memory
678        * storage the same so make sure the memory storage is large enough to
679        * accommodate the largest render target.
680        */
681       alloc->output_regs_count =
682          MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size);
683    }
684 
685    return VK_SUCCESS;
686 }
687 
688 static VkResult
pvr_surface_alloc_color_storage(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)689 pvr_surface_alloc_color_storage(const struct pvr_device_info *dev_info,
690                                 struct pvr_renderpass_context *ctx,
691                                 struct pvr_renderpass_alloc *alloc,
692                                 struct pvr_render_pass_attachment *attachment,
693                                 struct pvr_renderpass_resource *resource)
694 {
695    /* Number of dwords to allocate for the attachment. */
696    const uint32_t pixel_size =
697       DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
698 
699    /* Try allocating pixel output registers. */
700    const int32_t output_reg =
701       pvr_is_space_in_buffer(dev_info, &alloc->output_reg, pixel_size);
702    if (output_reg != -1) {
703       resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
704       resource->reg.output_reg = (uint32_t)output_reg;
705       resource->reg.offset = 0U;
706    } else {
707       uint32_t i;
708 
709       /* Mark the attachment as using a tile buffer. */
710       resource->type = USC_MRT_RESOURCE_TYPE_MEMORY;
711 
712       /* Try allocating from an existing tile buffer. */
713       for (i = 0U; i < alloc->tile_buffers_count; i++) {
714          const int32_t tile_buffer_offset =
715             pvr_is_space_in_buffer(dev_info,
716                                    &alloc->tile_buffers[i],
717                                    pixel_size);
718 
719          if (tile_buffer_offset != -1) {
720             resource->mem.tile_buffer = i;
721             resource->mem.offset_dw = (uint32_t)tile_buffer_offset;
722             break;
723          }
724       }
725 
726       if (i == alloc->tile_buffers_count) {
727          /* Check for reaching the maximum number of tile buffers. */
728          if (alloc->tile_buffers_count == ctx->pass->max_tilebuffer_count)
729             return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
730 
731          /* Use a newly allocated tile buffer. */
732          resource->mem.tile_buffer = i;
733          resource->mem.offset_dw = 0U;
734       }
735    }
736 
737    /* Update which parts of the pixel outputs/tile buffers are used. */
738    return pvr_mark_storage_allocated(ctx, alloc, attachment, resource);
739 }
740 
741 /** Free the storage allocated to an attachment. */
742 static void
pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer * buffer,struct pvr_render_int_attachment * int_attach,uint32_t start)743 pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer *buffer,
744                         struct pvr_render_int_attachment *int_attach,
745                         uint32_t start)
746 {
747    const uint32_t pixel_size = DIV_ROUND_UP(
748       vk_format_get_blocksizebits(int_attach->attachment->vk_format),
749       32U);
750 
751    BITSET_CLEAR_RANGE(buffer->allocs, start, start + pixel_size - 1U);
752 }
753 
754 /** Free the storage allocated to an attachment. */
755 static void
pvr_free_surface_storage(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)756 pvr_free_surface_storage(struct pvr_renderpass_context *ctx,
757                          struct pvr_render_int_attachment *int_attach)
758 {
759    struct usc_mrt_resource *resource = &int_attach->resource;
760    struct pvr_renderpass_alloc *alloc = &ctx->alloc;
761 
762    assert(resource->type != USC_MRT_RESOURCE_TYPE_INVALID);
763 
764    /* Mark the storage as free. */
765    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
766       pvr_free_buffer_storage(&alloc->output_reg,
767                               int_attach,
768                               resource->reg.output_reg);
769    } else {
770       struct pvr_renderpass_alloc_buffer *tile_buffer;
771 
772       assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
773 
774       assert(resource->mem.tile_buffer < alloc->tile_buffers_count);
775       tile_buffer = &alloc->tile_buffers[resource->mem.tile_buffer];
776       pvr_free_buffer_storage(tile_buffer, int_attach, resource->mem.offset_dw);
777    }
778 
779    /* Mark that the attachment doesn't have allocated storage. */
780    resource->type = USC_MRT_RESOURCE_TYPE_INVALID;
781 
782    /* Remove from the list of surfaces with allocated on-chip storage. */
783    assert(ctx->active_surfaces > 0U);
784    ctx->active_surfaces--;
785    list_del(&int_attach->link);
786 }
787 
pvr_reset_surface(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)788 static void pvr_reset_surface(struct pvr_renderpass_context *ctx,
789                               struct pvr_render_int_attachment *int_attach)
790 {
791    /* Reset information about the range of uses. */
792    int_attach->first_use = int_attach->last_use = -1;
793    int_attach->z_replicate = false;
794 
795    pvr_free_surface_storage(ctx, int_attach);
796 }
797 
798 static void
pvr_make_surface_active(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach,uint32_t subpass_num)799 pvr_make_surface_active(struct pvr_renderpass_context *ctx,
800                         struct pvr_render_int_attachment *int_attach,
801                         uint32_t subpass_num)
802 {
803    /* Add to the list of surfaces with on-chip storage. */
804    assert(int_attach->first_use == -1);
805    int_attach->first_use = subpass_num;
806    ctx->active_surfaces++;
807    list_addtail(&int_attach->link, &ctx->active_surf_list);
808 }
809 
810 /**
811  * For a subpass copy details of storage locations for the input/color to the
812  * output structure.
813  */
814 static VkResult
pvr_copy_storage_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)815 pvr_copy_storage_details(struct pvr_renderpass_context *ctx,
816                          struct pvr_renderpass_hwsetup_subpass *hw_subpass,
817                          struct pvr_renderpass_subpass *subpass)
818 {
819    struct pvr_render_subpass *input_subpass = subpass->input_subpass;
820    const uint32_t max_rts =
821       input_subpass->color_count + input_subpass->input_count;
822    VkResult result;
823 
824    if (max_rts == 0)
825       return VK_SUCCESS;
826 
827    hw_subpass->setup.mrt_resources =
828       vk_zalloc(ctx->allocator,
829                 sizeof(hw_subpass->setup.mrt_resources[0U]) * max_rts,
830                 8,
831                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
832    if (!hw_subpass->setup.mrt_resources) {
833       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
834       goto end_copy_storage_details;
835    }
836 
837    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
838       const uint32_t attach_idx = input_subpass->color_attachments[i];
839       struct pvr_render_int_attachment *int_attach;
840 
841       if (attach_idx == VK_ATTACHMENT_UNUSED)
842          continue;
843 
844       int_attach = &ctx->int_attach[attach_idx];
845 
846       /* Record for the subpass where the color attachment is stored. */
847       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
848       hw_subpass->setup.mrt_resources[i] = int_attach->resource;
849    }
850 
851    hw_subpass->setup.num_render_targets = input_subpass->color_count;
852 
853    if (input_subpass->input_count == 0)
854       return VK_SUCCESS;
855 
856    /* For this subpass's input attachments. */
857    hw_subpass->input_access = vk_alloc(ctx->allocator,
858                                        sizeof(hw_subpass->input_access[0U]) *
859                                           input_subpass->input_count,
860                                        8,
861                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
862    if (!hw_subpass->input_access) {
863       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
864       goto end_copy_storage_details;
865    }
866 
867    for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
868       const uint32_t attach_idx = input_subpass->input_attachments[i];
869       struct pvr_render_int_attachment *int_attach;
870 
871       if (attach_idx == VK_ATTACHMENT_UNUSED)
872          continue;
873 
874       int_attach = &ctx->int_attach[attach_idx];
875 
876       if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID) {
877          bool is_color = false;
878 
879          /* Access the input attachment from on-chip storage. */
880          if (int_attach->z_replicate) {
881             hw_subpass->input_access[i].type =
882                PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE;
883          } else {
884             hw_subpass->input_access[i].type =
885                PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP;
886          }
887 
888          /* If this attachment is also a color attachment then point to the
889           * color attachment's resource.
890           */
891          for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
892             if (input_subpass->color_attachments[j] == (int32_t)attach_idx) {
893                hw_subpass->input_access[i].on_chip_rt = j;
894                is_color = true;
895                break;
896             }
897          }
898 
899          if (!is_color) {
900             const uint32_t num_rts = hw_subpass->setup.num_render_targets;
901 
902             hw_subpass->input_access[i].on_chip_rt = num_rts;
903             hw_subpass->setup.num_render_targets++;
904 
905             /* Record the location of the storage for the attachment. */
906             hw_subpass->setup.mrt_resources[num_rts] = int_attach->resource;
907          }
908       } else {
909          /* Access the input attachment from memory. */
910          hw_subpass->input_access[i].type =
911             PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
912          hw_subpass->input_access[i].on_chip_rt = -1;
913       }
914    }
915 
916    return VK_SUCCESS;
917 
918 end_copy_storage_details:
919    if (hw_subpass->input_access) {
920       vk_free(ctx->allocator, hw_subpass->input_access);
921       hw_subpass->input_access = NULL;
922    }
923 
924    if (hw_subpass->setup.mrt_resources) {
925       vk_free(ctx->allocator, hw_subpass->setup.mrt_resources);
926       hw_subpass->setup.mrt_resources = NULL;
927    }
928 
929    return result;
930 }
931 
932 /**
933  * For a subpass copy details of any storage location for a replicated version
934  * of the depth attachment to the output structure.
935  */
936 static VkResult
pvr_copy_z_replicate_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)937 pvr_copy_z_replicate_details(struct pvr_renderpass_context *ctx,
938                              struct pvr_renderpass_hwsetup_subpass *hw_subpass,
939                              struct pvr_renderpass_subpass *subpass)
940 {
941    struct pvr_render_subpass *input_subpass = subpass->input_subpass;
942    struct pvr_render_int_attachment *int_ds_attach;
943    uint32_t z_replicate;
944    bool found = false;
945 
946    assert(input_subpass->depth_stencil_attachment >= 0U &&
947           input_subpass->depth_stencil_attachment <
948              (int32_t)ctx->pass->attachment_count);
949 
950    int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
951 
952    assert(hw_subpass->z_replicate == -1);
953 
954    /* Is the replicated depth also an input attachment? */
955    for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
956       const uint32_t attach_idx = input_subpass->input_attachments[i];
957       struct pvr_render_int_attachment *int_attach;
958 
959       if (attach_idx == VK_ATTACHMENT_UNUSED)
960          continue;
961 
962       int_attach = &ctx->int_attach[attach_idx];
963 
964       if (int_attach == int_ds_attach) {
965          z_replicate = hw_subpass->input_access[i].on_chip_rt;
966          found = true;
967          break;
968       }
969    }
970 
971    if (!found)
972       z_replicate = hw_subpass->setup.num_render_targets;
973 
974    /* If the Z replicate attachment isn't also an input attachment then grow the
975     * array of locations.
976     */
977    assert(z_replicate <= hw_subpass->setup.num_render_targets);
978    if (z_replicate == hw_subpass->setup.num_render_targets) {
979       struct usc_mrt_resource *mrt =
980          vk_realloc(ctx->allocator,
981                     hw_subpass->setup.mrt_resources,
982                     sizeof(hw_subpass->setup.mrt_resources[0U]) *
983                        (hw_subpass->setup.num_render_targets + 1U),
984                     8U,
985                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
986       if (!mrt)
987          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
988 
989       hw_subpass->setup.mrt_resources = mrt;
990       hw_subpass->setup.num_render_targets++;
991    }
992 
993    /* Copy the location of the Z replicate. */
994    assert(int_ds_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
995    hw_subpass->setup.mrt_resources[z_replicate] = int_ds_attach->resource;
996    hw_subpass->z_replicate = z_replicate;
997 
998    return VK_SUCCESS;
999 }
1000 
pvr_dereference_surface(struct pvr_renderpass_context * ctx,int32_t attach_idx,uint32_t subpass_num)1001 static void pvr_dereference_surface(struct pvr_renderpass_context *ctx,
1002                                     int32_t attach_idx,
1003                                     uint32_t subpass_num)
1004 {
1005    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1006 
1007    assert(int_attach->remaining_count > 0U);
1008    int_attach->remaining_count--;
1009 
1010    if (int_attach->remaining_count == 0U) {
1011       if (int_attach->first_use != -1)
1012          int_attach->last_use = subpass_num;
1013 
1014       if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID)
1015          pvr_free_surface_storage(ctx, int_attach);
1016    }
1017 
1018    if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1019       assert(int_attach->stencil_remaining_count > 0U);
1020       int_attach->stencil_remaining_count--;
1021    }
1022 }
1023 
pvr_free_render(struct pvr_renderpass_context * ctx)1024 static void pvr_free_render(struct pvr_renderpass_context *ctx)
1025 {
1026    pvr_free_alloc(ctx, &ctx->alloc);
1027 
1028    if (ctx->subpasses) {
1029       for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++)
1030          pvr_free_alloc(ctx, &ctx->subpasses[i].alloc);
1031 
1032       vk_free(ctx->allocator, ctx->subpasses);
1033       ctx->subpasses = NULL;
1034    }
1035 }
1036 
pvr_render_has_side_effects(struct pvr_renderpass_context * ctx)1037 static bool pvr_render_has_side_effects(struct pvr_renderpass_context *ctx)
1038 {
1039    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1040    struct pvr_render_pass *pass = ctx->pass;
1041 
1042    if ((hw_render->depth_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1043         hw_render->depth_store) ||
1044        (hw_render->stencil_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1045         hw_render->stencil_store)) {
1046       return true;
1047    }
1048 
1049    for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1050       const struct pvr_renderpass_hwsetup_eot_surface *eot_attach =
1051          &hw_render->eot_surfaces[i];
1052       const struct pvr_render_pass_attachment *attachment =
1053          &pass->attachments[eot_attach->attachment_idx];
1054 
1055       if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1056           attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) {
1057          return true;
1058       }
1059 
1060       if (eot_attach->need_resolve)
1061          return true;
1062    }
1063 
1064    return false;
1065 }
1066 
pvr_close_render(const struct pvr_device * device,struct pvr_renderpass_context * ctx)1067 static VkResult pvr_close_render(const struct pvr_device *device,
1068                                  struct pvr_renderpass_context *ctx)
1069 {
1070    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1071    struct pvr_renderpass_hwsetup_eot_surface *eot_attach;
1072    struct usc_mrt_setup *eot_setup;
1073    int32_t mrt_idx;
1074    VkResult result;
1075 
1076    /* Render already closed. */
1077    if (!hw_render)
1078       return VK_SUCCESS;
1079 
1080    /* Setup render and allocate resources for color/depth loads and clears. */
1081    result = pvr_subpass_setup_render_init(ctx);
1082    if (result != VK_SUCCESS)
1083       return result;
1084 
1085    /* Reset surfaces whose last use was in the current render. */
1086    list_for_each_entry_safe (struct pvr_render_int_attachment,
1087                              int_attach,
1088                              &ctx->active_surf_list,
1089                              link) {
1090       if (int_attach->last_use != -1) {
1091          assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1092          pvr_reset_surface(ctx, int_attach);
1093       }
1094    }
1095 
1096    /* Check if the depth attachment has uses in future subpasses. */
1097    if (ctx->int_ds_attach) {
1098       /* Store the depth to the attachment at the end of the render. */
1099       if (ctx->int_ds_attach->remaining_count > 0U)
1100          hw_render->depth_store = true;
1101 
1102       /* Store the stencil to the attachment at the end of the render. */
1103       if (ctx->int_ds_attach->stencil_remaining_count > 0U)
1104          hw_render->stencil_store = true;
1105 
1106       if (hw_render->depth_store || hw_render->stencil_store) {
1107          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
1108                 hw_render->ds_attach_idx ==
1109                    ctx->int_ds_attach->attachment->index);
1110          hw_render->ds_attach_idx = ctx->int_ds_attach->attachment->index;
1111 
1112          /* Allocate memory for the attachment. */
1113          pvr_mark_surface_alloc(ctx, ctx->int_ds_attach);
1114       }
1115 
1116       /* Load the depth and stencil before the next use. */
1117       ctx->int_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1118       ctx->int_ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1119    }
1120 
1121    eot_setup = &hw_render->eot_setup;
1122    memset(eot_setup, 0U, sizeof(*eot_setup));
1123 
1124    /* Set the number of pixel output registers/tile buffers allocated for the
1125     * render and copy the information to all subpasses and the EOT program.
1126     */
1127    pvr_finalise_po_alloc(device, ctx);
1128 
1129    /* If any attachment are used with z replicate then they will be stored to by
1130     * the ISP. So remove them from the list to store to using the PBE.
1131     */
1132    list_for_each_entry_safe (struct pvr_render_int_attachment,
1133                              int_attach,
1134                              &ctx->active_surf_list,
1135                              link) {
1136       if (int_attach->z_replicate)
1137          pvr_reset_surface(ctx, int_attach);
1138    }
1139 
1140    /* Number of surfaces with allocated on-chip storage. */
1141    eot_setup->num_render_targets = ctx->active_surfaces;
1142    eot_setup->mrt_resources = vk_alloc(ctx->allocator,
1143                                        sizeof(eot_setup->mrt_resources[0U]) *
1144                                           eot_setup->num_render_targets,
1145                                        8,
1146                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1147    if (!eot_setup->mrt_resources)
1148       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1149 
1150    /* Record the location of the on-chip storage. */
1151    mrt_idx = 0U;
1152    list_for_each_entry_safe (struct pvr_render_int_attachment,
1153                              int_attach,
1154                              &ctx->active_surf_list,
1155                              link) {
1156       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1157       assert(int_attach->remaining_count > 0U);
1158       if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1159          assert(int_attach->stencil_remaining_count > 0U);
1160 
1161       /* Copy the location of the source data for this attachment. */
1162       eot_setup->mrt_resources[mrt_idx] = int_attach->resource;
1163 
1164       assert(int_attach->mrt_idx == -1);
1165       int_attach->mrt_idx = mrt_idx;
1166 
1167       mrt_idx++;
1168    }
1169    assert(mrt_idx == (int32_t)eot_setup->num_render_targets);
1170 
1171    hw_render->eot_surface_count = 0U;
1172    hw_render->pbe_emits = 0U;
1173 
1174    /* Count the number of surfaces to store to at the end of the subpass. */
1175    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1176       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1177       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1178 
1179       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1180          const uint32_t resolve_output =
1181             input_subpass->resolve_attachments
1182                ? input_subpass->resolve_attachments[j]
1183                : VK_ATTACHMENT_UNUSED;
1184          struct pvr_render_int_attachment *color_attach;
1185 
1186          if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1187             continue;
1188 
1189          color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1190 
1191          if (list_is_linked(&color_attach->link)) {
1192             uint32_t rem_count = resolve_output == VK_ATTACHMENT_UNUSED ? 0U
1193                                                                         : 1U;
1194 
1195             /* If a color attachment is resolved it will have an extra
1196              * remaining usage.
1197              */
1198             if (color_attach->remaining_count > rem_count &&
1199                 !color_attach->eot_surf_required) {
1200                color_attach->eot_surf_required = true;
1201                hw_render->eot_surface_count++;
1202             }
1203          }
1204 
1205          if (resolve_output != VK_ATTACHMENT_UNUSED) {
1206             struct pvr_render_int_attachment *int_resolve_attach =
1207                &ctx->int_attach[resolve_output];
1208 
1209             if (!int_resolve_attach->eot_surf_required) {
1210                int_resolve_attach->eot_surf_required = true;
1211                hw_render->eot_surface_count++;
1212             }
1213          }
1214       }
1215    }
1216 
1217    assert(hw_render->eot_surface_count <= 16U);
1218 
1219    hw_render->eot_surfaces = vk_alloc(ctx->allocator,
1220                                       sizeof(hw_render->eot_surfaces[0U]) *
1221                                          hw_render->eot_surface_count,
1222                                       8,
1223                                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1224    if (!hw_render->eot_surfaces)
1225       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1226 
1227    eot_attach = hw_render->eot_surfaces;
1228 
1229    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1230       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1231       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1232 
1233       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1234          const uint32_t resolve_output =
1235             input_subpass->resolve_attachments
1236                ? input_subpass->resolve_attachments[j]
1237                : VK_ATTACHMENT_UNUSED;
1238          struct pvr_render_int_attachment *color_attach;
1239 
1240          if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1241             continue;
1242 
1243          color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1244 
1245          if (resolve_output != VK_ATTACHMENT_UNUSED) {
1246             struct pvr_render_int_attachment *resolve_src =
1247                &ctx->int_attach[input_subpass->color_attachments[j]];
1248             struct pvr_render_int_attachment *resolve_dst =
1249                &ctx->int_attach[resolve_output];
1250 
1251             assert(resolve_dst->eot_surf_required);
1252             resolve_dst->eot_surf_required = false;
1253 
1254             /* Dereference the source to the resolve. */
1255             assert(resolve_src->remaining_count > 0U);
1256             resolve_src->remaining_count--;
1257 
1258             /* Allocate device memory for the resolve destination. */
1259             pvr_mark_surface_alloc(ctx, resolve_dst);
1260 
1261             /* The attachment has been written so load the attachment the
1262              * next time it is referenced.
1263              */
1264             resolve_dst->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1265 
1266             eot_attach->mrt_idx = resolve_src->mrt_idx;
1267             eot_attach->attachment_idx = resolve_dst->attachment->index;
1268             eot_attach->src_attachment_idx = resolve_src->attachment->index;
1269 
1270             eot_attach->need_resolve = true;
1271 
1272             if (!resolve_src->is_pbe_downscalable) {
1273                /* Resolve src must be stored for transfer resolve. */
1274                assert(resolve_src->remaining_count > 0U);
1275 
1276                eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1277             } else if (resolve_src->remaining_count == 0U) {
1278                eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1279                hw_render->pbe_emits++;
1280             } else {
1281                eot_attach->resolve_type = PVR_RESOLVE_TYPE_INVALID;
1282             }
1283 
1284             eot_attach++;
1285          }
1286 
1287          if (color_attach->eot_surf_required) {
1288             assert(color_attach->remaining_count > 0U);
1289 
1290             pvr_mark_surface_alloc(ctx, color_attach);
1291 
1292             assert(color_attach->mrt_idx >= 0);
1293             assert(color_attach->mrt_idx <
1294                    (int32_t)hw_render->eot_setup.num_render_targets);
1295 
1296             eot_attach->mrt_idx = color_attach->mrt_idx;
1297             eot_attach->attachment_idx = color_attach->attachment->index;
1298             eot_attach->need_resolve = false;
1299             eot_attach++;
1300 
1301             hw_render->pbe_emits++;
1302 
1303             color_attach->eot_surf_required = false;
1304          }
1305       }
1306    }
1307 
1308    assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1309 
1310    /* Count the number of extra resolves we can do through the PBE. */
1311    for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1312       eot_attach = &hw_render->eot_surfaces[i];
1313 
1314       if (eot_attach->need_resolve &&
1315           eot_attach->resolve_type == PVR_RESOLVE_TYPE_INVALID) {
1316          if (hw_render->pbe_emits == PVR_NUM_PBE_EMIT_REGS) {
1317             eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1318          } else {
1319             eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1320             hw_render->pbe_emits++;
1321          }
1322       }
1323    }
1324 
1325    assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1326 
1327    /* Check for side effects in the final render. */
1328    hw_render->has_side_effects = pvr_render_has_side_effects(ctx);
1329 
1330    /* Reset active surfaces. */
1331    list_for_each_entry_safe (struct pvr_render_int_attachment,
1332                              int_attach,
1333                              &ctx->active_surf_list,
1334                              link) {
1335       int_attach->mrt_idx = -1;
1336       pvr_reset_surface(ctx, int_attach);
1337    }
1338 
1339    assert(ctx->active_surfaces == 0U);
1340    assert(list_is_empty(&ctx->active_surf_list));
1341 
1342    pvr_free_render(ctx);
1343    pvr_reset_render(ctx);
1344 
1345    return VK_SUCCESS;
1346 }
1347 
pvr_is_input(struct pvr_render_subpass * subpass,uint32_t attach_idx)1348 static bool pvr_is_input(struct pvr_render_subpass *subpass,
1349                          uint32_t attach_idx)
1350 {
1351    if (attach_idx == VK_ATTACHMENT_UNUSED)
1352       return false;
1353 
1354    for (uint32_t i = 0U; i < subpass->input_count; i++) {
1355       if (subpass->input_attachments[i] == attach_idx)
1356          return true;
1357    }
1358 
1359    return false;
1360 }
1361 
1362 static bool
pvr_depth_zls_conflict(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_ds_attach,bool existing_ds_is_input)1363 pvr_depth_zls_conflict(struct pvr_renderpass_context *ctx,
1364                        struct pvr_render_int_attachment *int_ds_attach,
1365                        bool existing_ds_is_input)
1366 {
1367    if (!ctx->int_ds_attach)
1368       return false;
1369 
1370    /* No conflict if the incoming subpass doesn't have a depth/stencil
1371     * attachment.
1372     */
1373    if (!int_ds_attach)
1374       return false;
1375 
1376    /* No conflict if the incoming depth/stencil attachment is the same as the
1377     * existing one.
1378     */
1379    if (ctx->int_ds_attach == int_ds_attach)
1380       return false;
1381 
1382    /* If the existing depth/stencil attachment is used later, then we can't
1383     * overwrite it.
1384     *
1385     * The exception is if the only use is as an input attachment in the incoming
1386     * subpass in which case we can use the Z replicate feature to save the
1387     * value.
1388     */
1389    if (ctx->int_ds_attach->remaining_count > 0U &&
1390        !(existing_ds_is_input && ctx->int_ds_attach->remaining_count == 1U)) {
1391       return true;
1392    }
1393 
1394    if (ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1395        ctx->int_ds_attach->stencil_remaining_count > 0U) {
1396       return true;
1397    }
1398 
1399    /* We can't load midrender so fail if the new depth/stencil attachment is
1400     * already initialized.
1401     */
1402    if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
1403       return true;
1404 
1405    if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1406        int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
1407       return true;
1408    }
1409 
1410    return false;
1411 }
1412 
1413 static void
pvr_set_surface_resource(struct pvr_render_int_attachment * int_attach,struct pvr_renderpass_resource * resource)1414 pvr_set_surface_resource(struct pvr_render_int_attachment *int_attach,
1415                          struct pvr_renderpass_resource *resource)
1416 {
1417    int_attach->resource.type = resource->type;
1418 
1419    switch (resource->type) {
1420    case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1421       int_attach->resource.reg.output_reg = resource->reg.output_reg;
1422       int_attach->resource.reg.offset = resource->reg.offset;
1423       break;
1424 
1425    case USC_MRT_RESOURCE_TYPE_MEMORY:
1426       int_attach->resource.mem.tile_buffer = resource->mem.tile_buffer;
1427       int_attach->resource.mem.offset_dw = resource->mem.offset_dw;
1428       break;
1429 
1430    default:
1431       break;
1432    }
1433 }
1434 
pvr_equal_resources(struct pvr_renderpass_resource * resource1,struct pvr_renderpass_resource * resource2)1435 static bool pvr_equal_resources(struct pvr_renderpass_resource *resource1,
1436                                 struct pvr_renderpass_resource *resource2)
1437 {
1438    if (resource1->type != resource2->type)
1439       return false;
1440 
1441    switch (resource1->type) {
1442    case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1443       return resource1->reg.output_reg == resource2->reg.output_reg &&
1444              resource1->reg.offset == resource2->reg.offset;
1445 
1446    case USC_MRT_RESOURCE_TYPE_MEMORY:
1447       return resource1->mem.tile_buffer == resource2->mem.tile_buffer &&
1448              resource1->mem.offset_dw == resource2->mem.offset_dw;
1449 
1450    default:
1451       return true;
1452    }
1453 }
1454 
1455 static VkResult
pvr_enable_z_replicate(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_render * hw_render,int32_t replicate_attach_idx,struct pvr_renderpass_resource * replicate_dst)1456 pvr_enable_z_replicate(struct pvr_renderpass_context *ctx,
1457                        struct pvr_renderpass_hwsetup_render *hw_render,
1458                        int32_t replicate_attach_idx,
1459                        struct pvr_renderpass_resource *replicate_dst)
1460 {
1461    struct pvr_render_int_attachment *int_attach =
1462       &ctx->int_attach[replicate_attach_idx];
1463    int32_t first_use = -1;
1464 
1465    /* If Z replication was already enabled for the attachment then nothing more
1466     * to do.
1467     */
1468    if (!int_attach->z_replicate) {
1469       /* Copy details of the storage for the replicated value to the attachment.
1470        */
1471       assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1472       assert(replicate_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
1473       pvr_set_surface_resource(int_attach, replicate_dst);
1474    } else {
1475       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1476       assert(replicate_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
1477    }
1478 
1479    /* Find the first subpass where the attachment is written. */
1480    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1481       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1482       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1483 
1484       if (input_subpass->depth_stencil_attachment == replicate_attach_idx) {
1485          first_use = i;
1486          break;
1487       }
1488    }
1489    assert(first_use >= 0);
1490 
1491    /* For all subpasses from the first write. */
1492    for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1493       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1494       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1495 
1496       /* If the subpass writes to the attachment then enable z replication. */
1497       if (input_subpass->depth_stencil_attachment == replicate_attach_idx &&
1498           !subpass->z_replicate) {
1499          subpass->z_replicate = true;
1500 
1501          if (i != (hw_render->subpass_count - 1U)) {
1502             /* Copy the details of the storage for replicated value. */
1503             const VkResult result =
1504                pvr_copy_z_replicate_details(ctx,
1505                                             &ctx->hw_render->subpasses[i],
1506                                             subpass);
1507             if (result != VK_SUCCESS)
1508                return result;
1509          }
1510       }
1511    }
1512 
1513    if (!int_attach->z_replicate) {
1514       /* Add the storage for the replicated value to locations in use at each
1515        * subpass.
1516        */
1517       for (uint32_t i = first_use; i < (hw_render->subpass_count - 1U); i++) {
1518          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1519 
1520          pvr_mark_storage_allocated(ctx,
1521                                     &subpass->alloc,
1522                                     int_attach->attachment,
1523                                     replicate_dst);
1524       }
1525 
1526       /* Add the depth attachment to the list of surfaces with allocated
1527        * storage.
1528        */
1529       pvr_make_surface_active(ctx, int_attach, first_use);
1530 
1531       int_attach->z_replicate = true;
1532    }
1533 
1534    return VK_SUCCESS;
1535 }
1536 
pvr_is_pending_resolve_dest(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1537 static bool pvr_is_pending_resolve_dest(struct pvr_renderpass_context *ctx,
1538                                         uint32_t attach_idx)
1539 {
1540    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1541 
1542    return int_attach->last_resolve_dst_render != -1 &&
1543           int_attach->last_resolve_dst_render ==
1544              (int32_t)(ctx->hw_setup->render_count - 1U);
1545 }
1546 
pvr_is_pending_resolve_src(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1547 static bool pvr_is_pending_resolve_src(struct pvr_renderpass_context *ctx,
1548                                        uint32_t attach_idx)
1549 {
1550    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1551 
1552    return int_attach->last_resolve_src_render != -1 &&
1553           int_attach->last_resolve_src_render ==
1554              (int32_t)(ctx->hw_setup->render_count - 1U);
1555 }
1556 
pvr_exceeds_pbe_registers(struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass)1557 static bool pvr_exceeds_pbe_registers(struct pvr_renderpass_context *ctx,
1558                                       struct pvr_render_subpass *subpass)
1559 {
1560    int32_t live_outputs[PVR_NUM_PBE_EMIT_REGS];
1561    uint32_t num_live_outputs = 0U;
1562 
1563    /* Count all color outputs so far. */
1564    for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++) {
1565       struct pvr_render_subpass *input_subpass =
1566          ctx->subpasses[i].input_subpass;
1567 
1568       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1569          const uint32_t global_color_attach =
1570             input_subpass->color_attachments[j];
1571          struct pvr_render_int_attachment *int_attach;
1572          bool found = false;
1573 
1574          if (global_color_attach == VK_ATTACHMENT_UNUSED)
1575             continue;
1576 
1577          int_attach = &ctx->int_attach[global_color_attach];
1578 
1579          if (int_attach->last_read <= (int32_t)subpass->index)
1580             continue;
1581 
1582          for (uint32_t k = 0U; k < num_live_outputs; k++) {
1583             if (live_outputs[k] == global_color_attach) {
1584                found = true;
1585                break;
1586             }
1587          }
1588 
1589          if (!found)
1590             live_outputs[num_live_outputs++] = global_color_attach;
1591       }
1592    }
1593 
1594    assert(num_live_outputs <= PVR_NUM_PBE_EMIT_REGS);
1595 
1596    /* Check if adding all the color outputs of the new subpass to the render
1597     * would exceed the limit.
1598     */
1599    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1600       const uint32_t global_color_attach = subpass->color_attachments[i];
1601       struct pvr_render_int_attachment *int_attach;
1602       bool found = false;
1603 
1604       if (global_color_attach == VK_ATTACHMENT_UNUSED)
1605          continue;
1606 
1607       int_attach = &ctx->int_attach[global_color_attach];
1608 
1609       if (int_attach->last_read <= (int32_t)subpass->index)
1610          continue;
1611 
1612       for (uint32_t j = 0U; j < num_live_outputs; j++) {
1613          if (live_outputs[j] == global_color_attach) {
1614             found = true;
1615             break;
1616          }
1617       }
1618 
1619       if (!found) {
1620          if (num_live_outputs >= PVR_NUM_PBE_EMIT_REGS)
1621             return true;
1622 
1623          live_outputs[num_live_outputs++] = global_color_attach;
1624       }
1625    }
1626 
1627    return false;
1628 }
1629 
pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer * dst,struct pvr_renderpass_alloc_buffer * src)1630 static void pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer *dst,
1631                                    struct pvr_renderpass_alloc_buffer *src)
1632 {
1633    for (uint32_t i = 0U; i < ARRAY_SIZE(dst->allocs); i++)
1634       dst->allocs[i] |= src->allocs[i];
1635 }
1636 
pvr_merge_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)1637 static VkResult pvr_merge_alloc(struct pvr_renderpass_context *ctx,
1638                                 struct pvr_renderpass_alloc *dst,
1639                                 struct pvr_renderpass_alloc *src)
1640 {
1641    pvr_merge_alloc_buffer(&dst->output_reg, &src->output_reg);
1642 
1643    dst->output_regs_count =
1644       MAX2(dst->output_regs_count, src->output_regs_count);
1645 
1646    if (dst->tile_buffers_count < src->tile_buffers_count) {
1647       struct pvr_renderpass_alloc_buffer *new_tile_buffers =
1648          vk_realloc(ctx->allocator,
1649                     dst->tile_buffers,
1650                     sizeof(dst->tile_buffers[0U]) * src->tile_buffers_count,
1651                     8U,
1652                     VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1653       if (!new_tile_buffers)
1654          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1655 
1656       dst->tile_buffers = new_tile_buffers;
1657       memset(dst->tile_buffers + dst->tile_buffers_count,
1658              0U,
1659              sizeof(dst->tile_buffers[0U]) *
1660                 (src->tile_buffers_count - dst->tile_buffers_count));
1661       dst->tile_buffers_count = src->tile_buffers_count;
1662    }
1663 
1664    for (uint32_t i = 0U; i < src->tile_buffers_count; i++)
1665       pvr_merge_alloc_buffer(&dst->tile_buffers[i], &src->tile_buffers[i]);
1666 
1667    return VK_SUCCESS;
1668 }
1669 
1670 static VkResult
pvr_is_z_replicate_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,uint32_t attach_idx,struct pvr_renderpass_resource * resource)1671 pvr_is_z_replicate_space_available(const struct pvr_device_info *dev_info,
1672                                    struct pvr_renderpass_context *ctx,
1673                                    struct pvr_renderpass_alloc *alloc,
1674                                    uint32_t attach_idx,
1675                                    struct pvr_renderpass_resource *resource)
1676 {
1677    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1678    struct pvr_render_int_attachment *int_attach;
1679    struct pvr_renderpass_alloc combined_alloc;
1680    uint32_t first_use;
1681    VkResult result;
1682 
1683    /* If z replication was already enabled by a previous subpass then storage
1684     * will already be allocated.
1685     */
1686    assert(attach_idx < ctx->pass->attachment_count);
1687 
1688    int_attach = &ctx->int_attach[attach_idx];
1689    if (int_attach->z_replicate) {
1690       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1691       return VK_SUCCESS;
1692    }
1693 
1694    /* Find the subpass where the depth is first written. */
1695    if (hw_render) {
1696       first_use = hw_render->subpass_count;
1697       for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1698          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1699          struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1700 
1701          if (input_subpass->depth_stencil_attachment == (int32_t)attach_idx) {
1702             first_use = i;
1703             break;
1704          }
1705       }
1706    }
1707 
1708    /* Get the registers used in any subpass after the depth is first written.
1709     * Start with registers used in the incoming subpass.
1710     */
1711    result = pvr_copy_alloc(ctx, &combined_alloc, alloc);
1712    if (result != VK_SUCCESS)
1713       return result;
1714 
1715    if (hw_render) {
1716       /* Merge in registers used in previous subpasses. */
1717       for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1718          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1719 
1720          result = pvr_merge_alloc(ctx, &combined_alloc, &subpass->alloc);
1721          if (result != VK_SUCCESS) {
1722             pvr_free_alloc(ctx, &combined_alloc);
1723             return result;
1724          }
1725       }
1726    }
1727 
1728    result = pvr_surface_alloc_color_storage(dev_info,
1729                                             ctx,
1730                                             &combined_alloc,
1731                                             int_attach->attachment,
1732                                             resource);
1733 
1734    pvr_free_alloc(ctx, &combined_alloc);
1735    if (result != VK_SUCCESS)
1736       return result;
1737 
1738    return pvr_mark_storage_allocated(ctx,
1739                                      alloc,
1740                                      int_attach->attachment,
1741                                      resource);
1742 }
1743 
1744 static VkResult
pvr_is_subpass_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_renderpass_alloc * alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1745 pvr_is_subpass_space_available(const struct pvr_device_info *dev_info,
1746                                struct pvr_renderpass_context *ctx,
1747                                struct pvr_render_subpass *subpass,
1748                                struct pvr_render_subpass_depth_params *sp_depth,
1749                                struct pvr_renderpass_alloc *alloc,
1750                                struct pvr_render_int_subpass_dsts *sp_dsts)
1751 {
1752    VkResult result;
1753 
1754    /* Mark pointers in return structures as not allocated. */
1755    sp_dsts->color = NULL;
1756    alloc->tile_buffers = NULL;
1757 
1758    /* Allocate space for which locations are in use after this subpass. */
1759    result = pvr_copy_alloc(ctx, alloc, &ctx->alloc);
1760    if (result != VK_SUCCESS)
1761       return result;
1762 
1763    /* Allocate space to store our results. */
1764    if (subpass->color_count > 0U) {
1765       sp_dsts->color =
1766          vk_alloc(ctx->allocator,
1767                   sizeof(sp_dsts->color[0U]) * subpass->color_count,
1768                   8,
1769                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1770       if (!sp_dsts->color) {
1771          result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1772          goto err_free_alloc;
1773       }
1774    } else {
1775       sp_dsts->color = NULL;
1776    }
1777 
1778    sp_dsts->existing_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1779    sp_dsts->incoming_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1780 
1781    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1782       const uint32_t attach_idx = subpass->color_attachments[i];
1783       struct pvr_render_int_attachment *int_attach;
1784 
1785       if (attach_idx == VK_ATTACHMENT_UNUSED)
1786          continue;
1787 
1788       int_attach = &ctx->int_attach[attach_idx];
1789 
1790       assert(vk_format_get_blocksizebits(int_attach->attachment->vk_format) >
1791              0U);
1792 
1793       /* Is the attachment not allocated on-chip storage? */
1794       if (int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID) {
1795          result = pvr_surface_alloc_color_storage(dev_info,
1796                                                   ctx,
1797                                                   alloc,
1798                                                   int_attach->attachment,
1799                                                   &sp_dsts->color[i]);
1800          if (result != VK_SUCCESS)
1801             goto err_free_alloc;
1802 
1803          /* Avoid merging subpasses which result in tile buffers having to be
1804           * used. The benefit of merging must be weighed against the cost of
1805           * writing/reading to tile buffers.
1806           */
1807          if (ctx->hw_render &&
1808              sp_dsts->color[i].type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
1809             result = vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
1810             goto err_free_alloc;
1811          }
1812       } else {
1813          sp_dsts->color[i].type = USC_MRT_RESOURCE_TYPE_INVALID;
1814       }
1815    }
1816 
1817    if (sp_depth->existing_ds_is_input) {
1818       result = pvr_is_z_replicate_space_available(dev_info,
1819                                                   ctx,
1820                                                   alloc,
1821                                                   sp_depth->existing_ds_attach,
1822                                                   &sp_dsts->existing_zrep);
1823       if (result != VK_SUCCESS)
1824          goto err_free_alloc;
1825    }
1826 
1827    if (sp_depth->incoming_ds_is_input) {
1828       if (sp_depth->existing_ds_attach != subpass->depth_stencil_attachment) {
1829          result = pvr_is_z_replicate_space_available(
1830             dev_info,
1831             ctx,
1832             alloc,
1833             subpass->depth_stencil_attachment,
1834             &sp_dsts->incoming_zrep);
1835          if (result != VK_SUCCESS)
1836             goto err_free_alloc;
1837       } else {
1838          sp_dsts->incoming_zrep = sp_dsts->existing_zrep;
1839       }
1840    }
1841 
1842    return VK_SUCCESS;
1843 
1844 err_free_alloc:
1845    pvr_free_alloc(ctx, alloc);
1846    if (sp_dsts->color)
1847       vk_free(ctx->allocator, sp_dsts->color);
1848 
1849    sp_dsts->color = NULL;
1850 
1851    return result;
1852 }
1853 
1854 static bool
pvr_can_combine_with_render(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_render_int_attachment * int_ds_attach,struct pvr_renderpass_alloc * new_alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1855 pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
1856                             struct pvr_renderpass_context *ctx,
1857                             struct pvr_render_subpass *subpass,
1858                             struct pvr_render_subpass_depth_params *sp_depth,
1859                             struct pvr_render_int_attachment *int_ds_attach,
1860                             struct pvr_renderpass_alloc *new_alloc,
1861                             struct pvr_render_int_subpass_dsts *sp_dsts)
1862 {
1863    VkResult result;
1864    bool ret;
1865 
1866    /* Mark pointers in return structures as not allocated. */
1867    sp_dsts->color = NULL;
1868    new_alloc->tile_buffers = NULL;
1869 
1870    /* The hardware doesn't support replicating the stencil, so we need to store
1871     * the depth to memory if a stencil attachment is used as an input
1872     * attachment.
1873     */
1874    if (sp_depth->existing_ds_is_input &&
1875        ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1876       return false;
1877    }
1878 
1879    if (sp_depth->incoming_ds_is_input && int_ds_attach &&
1880        int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1881        ctx->hw_render) {
1882       return false;
1883    }
1884 
1885    /* Can't mix multiple sample counts into same render. */
1886    if (ctx->hw_render &&
1887        ctx->hw_render->sample_count != subpass->sample_count) {
1888       return false;
1889    }
1890 
1891    /* If the depth is used by both the render and the incoming subpass and
1892     * either the existing depth must be saved or the new depth must be loaded
1893     * then we can't merge.
1894     */
1895    ret = pvr_depth_zls_conflict(ctx,
1896                                 int_ds_attach,
1897                                 sp_depth->existing_ds_is_input);
1898    if (ret)
1899       return false;
1900 
1901    /* Check if any of the subpass's dependencies are marked that the two
1902     * subpasses can't be in the same render.
1903     */
1904    for (uint32_t i = 0U; i < subpass->dep_count; i++) {
1905       const uint32_t dep = subpass->dep_list[i];
1906       if (subpass->flush_on_dep[i] && ctx->hw_setup->subpass_map[dep].render ==
1907                                          (ctx->hw_setup->render_count - 1U)) {
1908          return false;
1909       }
1910    }
1911 
1912    /* Check if one of the input/color attachments is written by an MSAA resolve
1913     * in an existing subpass in the current render.
1914     */
1915    for (uint32_t i = 0U; i < subpass->input_count; i++) {
1916       const uint32_t attach_idx = subpass->input_attachments[i];
1917       if (attach_idx != VK_ATTACHMENT_UNUSED &&
1918           pvr_is_pending_resolve_dest(ctx, attach_idx)) {
1919          return false;
1920       }
1921    }
1922 
1923    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1924       if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED &&
1925           (pvr_is_pending_resolve_dest(ctx, subpass->color_attachments[i]) ||
1926            pvr_is_pending_resolve_src(ctx, subpass->color_attachments[i]))) {
1927          return false;
1928       }
1929 
1930       if (subpass->resolve_attachments &&
1931           subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED &&
1932           pvr_is_pending_resolve_dest(ctx, subpass->resolve_attachments[i])) {
1933          return false;
1934       }
1935    }
1936 
1937    /* No chance of exceeding PBE registers in a single subpass. */
1938    if (ctx->hw_render) {
1939       ret = pvr_exceeds_pbe_registers(ctx, subpass);
1940       if (ret)
1941          return false;
1942    }
1943 
1944    /* Check we can allocate storage for the new subpass's color attachments and
1945     * any z replications.
1946     */
1947    result = pvr_is_subpass_space_available(dev_info,
1948                                            ctx,
1949                                            subpass,
1950                                            sp_depth,
1951                                            new_alloc,
1952                                            sp_dsts);
1953    if (result != VK_SUCCESS)
1954       return false;
1955 
1956    return true;
1957 }
1958 
1959 static VkResult
pvr_merge_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * input_subpass,struct pvr_renderpass_hwsetup_subpass ** const hw_subpass_out)1960 pvr_merge_subpass(const struct pvr_device *device,
1961                   struct pvr_renderpass_context *ctx,
1962                   struct pvr_render_subpass *input_subpass,
1963                   struct pvr_renderpass_hwsetup_subpass **const hw_subpass_out)
1964 {
1965    struct pvr_renderpass_hwsetup_subpass *new_hw_subpasses;
1966    struct pvr_renderpass_hwsetup_subpass *hw_subpass;
1967    struct pvr_render_int_attachment *int_ds_attach;
1968    struct pvr_renderpass_hwsetup_render *hw_render;
1969    struct pvr_render_subpass_depth_params sp_depth;
1970    struct pvr_renderpass_subpass *new_subpasses;
1971    struct pvr_render_int_subpass_dsts sp_dsts;
1972    struct pvr_renderpass_subpass *subpass;
1973    struct pvr_renderpass_alloc alloc;
1974    VkResult result;
1975    bool ret;
1976 
1977    /* Depth attachment for the incoming subpass. */
1978    if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
1979       int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
1980    else
1981       int_ds_attach = NULL;
1982 
1983    /* Attachment ID for the existing depth attachment. */
1984    if (ctx->int_ds_attach)
1985       sp_depth.existing_ds_attach = ctx->int_ds_attach - ctx->int_attach;
1986    else
1987       sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
1988 
1989    /* Is the incoming depth attachment used as an input to the incoming subpass?
1990     */
1991    sp_depth.incoming_ds_is_input =
1992       pvr_is_input(input_subpass, input_subpass->depth_stencil_attachment);
1993 
1994    /* Is the current depth attachment used as an input to the incoming subpass?
1995     */
1996    sp_depth.existing_ds_is_input =
1997       pvr_is_input(input_subpass, sp_depth.existing_ds_attach);
1998 
1999    /* Can the incoming subpass be combined with the existing render? Also checks
2000     * if space is available for the subpass results and return the allocated
2001     * locations.
2002     */
2003    ret = pvr_can_combine_with_render(&device->pdevice->dev_info,
2004                                      ctx,
2005                                      input_subpass,
2006                                      &sp_depth,
2007                                      int_ds_attach,
2008                                      &alloc,
2009                                      &sp_dsts);
2010    if (!ret) {
2011       result = pvr_close_render(device, ctx);
2012       if (result != VK_SUCCESS)
2013          goto end_merge_subpass;
2014 
2015       sp_depth.existing_ds_is_input = false;
2016       sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
2017 
2018       /* Allocate again in a new render. */
2019       result = pvr_is_subpass_space_available(&device->pdevice->dev_info,
2020                                               ctx,
2021                                               input_subpass,
2022                                               &sp_depth,
2023                                               &alloc,
2024                                               &sp_dsts);
2025       assert(result != VK_ERROR_TOO_MANY_OBJECTS);
2026       if (result != VK_SUCCESS)
2027          goto end_merge_subpass;
2028    }
2029 
2030    /* If there isn't an in-progress render then allocate one. */
2031    if (!ctx->hw_render) {
2032       struct pvr_renderpass_hwsetup *hw_setup = ctx->hw_setup;
2033       struct pvr_renderpass_hwsetup_render *new_hw_render = vk_realloc(
2034          ctx->allocator,
2035          hw_setup->renders,
2036          sizeof(hw_setup->renders[0U]) * (hw_setup->render_count + 1U),
2037          8U,
2038          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2039       if (!new_hw_render) {
2040          result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2041          goto end_merge_subpass;
2042       }
2043 
2044       hw_setup->renders = new_hw_render;
2045 
2046       ctx->hw_render = &hw_setup->renders[hw_setup->render_count];
2047       memset(ctx->hw_render, 0U, sizeof(*hw_render));
2048       ctx->hw_render->ds_attach_idx = VK_ATTACHMENT_UNUSED;
2049       hw_setup->render_count++;
2050       ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2051       ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2052       ctx->hw_render->sample_count = input_subpass->sample_count;
2053    }
2054 
2055    /* Allocate a new subpass in the in-progress render. */
2056    hw_render = ctx->hw_render;
2057 
2058    new_hw_subpasses = vk_realloc(ctx->allocator,
2059                                  hw_render->subpasses,
2060                                  sizeof(hw_render->subpasses[0U]) *
2061                                     (hw_render->subpass_count + 1U),
2062                                  8U,
2063                                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2064    if (!new_hw_subpasses) {
2065       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2066       goto end_merge_subpass;
2067    }
2068 
2069    hw_render->subpasses = new_hw_subpasses;
2070    hw_subpass = &hw_render->subpasses[hw_render->subpass_count];
2071 
2072    new_subpasses =
2073       vk_realloc(ctx->allocator,
2074                  ctx->subpasses,
2075                  sizeof(ctx->subpasses[0U]) * (hw_render->subpass_count + 1U),
2076                  8U,
2077                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2078    if (!new_subpasses) {
2079       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2080       goto end_merge_subpass;
2081    }
2082 
2083    ctx->subpasses = new_subpasses;
2084 
2085    subpass = &ctx->subpasses[hw_render->subpass_count];
2086    subpass->input_subpass = input_subpass;
2087    subpass->z_replicate = false;
2088 
2089    /* Save the allocation state at the subpass. */
2090    result = pvr_copy_alloc(ctx, &subpass->alloc, &alloc);
2091    if (result != VK_SUCCESS)
2092       goto end_merge_subpass;
2093 
2094    hw_render->subpass_count++;
2095 
2096    memset(hw_subpass, 0U, sizeof(*hw_subpass));
2097    hw_subpass->index = input_subpass->index;
2098    hw_subpass->z_replicate = -1;
2099    hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2100 
2101    if (int_ds_attach && ctx->int_ds_attach != int_ds_attach) {
2102       bool setup_render_ds = false;
2103       bool stencil_load = false;
2104       bool depth_load = false;
2105 
2106       if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2107          depth_load = true;
2108          setup_render_ds = true;
2109          hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2110          hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_LOAD;
2111 
2112          assert(!ctx->ds_load_surface);
2113          ctx->ds_load_surface = int_ds_attach;
2114       } else if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
2115          hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_CLEAR;
2116       }
2117 
2118       if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2119          if (int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2120             stencil_load = true;
2121             setup_render_ds = true;
2122             hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2123          } else if (int_ds_attach->stencil_load_op ==
2124                     VK_ATTACHMENT_LOAD_OP_CLEAR) {
2125             hw_subpass->stencil_clear = true;
2126          }
2127       }
2128 
2129       /* If the depth is loaded then allocate external memory for the depth
2130        * attachment.
2131        */
2132       if (depth_load || stencil_load)
2133          pvr_mark_surface_alloc(ctx, int_ds_attach);
2134 
2135       if (setup_render_ds) {
2136          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED);
2137          hw_render->ds_attach_idx = int_ds_attach->attachment->index;
2138       }
2139 
2140       ctx->int_ds_attach = int_ds_attach;
2141    }
2142 
2143    /* Set up the initialization operations for subpasses. */
2144    hw_subpass->color_initops = vk_alloc(ctx->allocator,
2145                                         sizeof(hw_subpass->color_initops[0U]) *
2146                                            input_subpass->color_count,
2147                                         8,
2148                                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2149    if (!hw_subpass->color_initops) {
2150       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2151       goto end_merge_subpass;
2152    }
2153 
2154    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2155       const uint32_t attach_idx = input_subpass->color_attachments[i];
2156       struct pvr_render_int_attachment *int_attach;
2157 
2158       if (attach_idx == VK_ATTACHMENT_UNUSED)
2159          continue;
2160 
2161       int_attach = &ctx->int_attach[attach_idx];
2162 
2163       if (int_attach->first_use == -1) {
2164          hw_subpass->color_initops[i] = int_attach->load_op;
2165 
2166          /* If the attachment is loaded then off-chip memory must be
2167           * allocated for it.
2168           */
2169          if (int_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
2170             pvr_mark_surface_alloc(ctx, int_attach);
2171 
2172          /* The attachment has been written so load the attachment the next
2173           * time it is referenced.
2174           */
2175          int_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2176       } else {
2177          hw_subpass->color_initops[i] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2178       }
2179    }
2180 
2181    /* Copy the destinations allocated for the color attachments. */
2182    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2183       const uint32_t attach_idx = input_subpass->color_attachments[i];
2184       struct pvr_render_int_attachment *int_attach;
2185       struct pvr_renderpass_resource *attach_dst;
2186 
2187       if (attach_idx == VK_ATTACHMENT_UNUSED)
2188          continue;
2189 
2190       int_attach = &ctx->int_attach[attach_idx];
2191       attach_dst = &sp_dsts.color[i];
2192 
2193       if (int_attach->first_use == -1) {
2194          assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
2195          assert(attach_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
2196          pvr_set_surface_resource(int_attach, attach_dst);
2197 
2198          /* If this attachment is being used for the first time then add it
2199           * to the active list.
2200           */
2201          pvr_make_surface_active(ctx,
2202                                  int_attach,
2203                                  hw_render->subpass_count - 1U);
2204       } else {
2205          assert(attach_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
2206       }
2207    }
2208 
2209    /* We can't directly read the on-chip depth so mark subpasses where the depth
2210     * is written to replicate the value into part of the color storage.
2211     */
2212    if (sp_depth.existing_ds_is_input) {
2213       result = pvr_enable_z_replicate(ctx,
2214                                       hw_render,
2215                                       sp_depth.existing_ds_attach,
2216                                       &sp_dsts.existing_zrep);
2217       if (result != VK_SUCCESS)
2218          goto end_merge_subpass;
2219    }
2220 
2221    if (sp_depth.incoming_ds_is_input) {
2222       if (input_subpass->depth_stencil_attachment !=
2223           sp_depth.existing_ds_attach) {
2224          result =
2225             pvr_enable_z_replicate(ctx,
2226                                    hw_render,
2227                                    input_subpass->depth_stencil_attachment,
2228                                    &sp_dsts.incoming_zrep);
2229          if (result != VK_SUCCESS)
2230             goto end_merge_subpass;
2231       } else {
2232          assert(pvr_equal_resources(&sp_dsts.existing_zrep,
2233                                     &sp_dsts.incoming_zrep));
2234       }
2235    }
2236 
2237    /* Copy the locations of color/input attachments to the output structure.
2238     * N.B. Need to do this after Z replication in case the replicated depth is
2239     * an input attachment for the incoming subpass.
2240     */
2241    result = pvr_copy_storage_details(ctx, hw_subpass, subpass);
2242    if (result != VK_SUCCESS)
2243       goto end_merge_subpass;
2244 
2245    if (subpass->z_replicate) {
2246       result = pvr_copy_z_replicate_details(ctx, hw_subpass, subpass);
2247       if (result != VK_SUCCESS)
2248          goto end_merge_subpass;
2249    }
2250 
2251    /* Copy the allocation at the subpass. This will then be updated if this was
2252     * last use of any attachment.
2253     */
2254    pvr_free_alloc(ctx, &ctx->alloc);
2255    ctx->alloc = alloc;
2256 
2257    /* Free information about subpass destinations. */
2258    if (sp_dsts.color)
2259       vk_free(ctx->allocator, sp_dsts.color);
2260 
2261    *hw_subpass_out = hw_subpass;
2262 
2263    return VK_SUCCESS;
2264 
2265 end_merge_subpass:
2266    if (sp_dsts.color)
2267       vk_free(ctx->allocator, sp_dsts.color);
2268 
2269    pvr_free_alloc(ctx, &alloc);
2270 
2271    return result;
2272 }
2273 
2274 static void
pvr_dereference_color_output_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,struct pvr_render_subpass * subpass)2275 pvr_dereference_color_output_list(struct pvr_renderpass_context *ctx,
2276                                   uint32_t subpass_num,
2277                                   struct pvr_render_subpass *subpass)
2278 {
2279    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2280       const uint32_t attach_idx = subpass->color_attachments[i];
2281 
2282       if (attach_idx != VK_ATTACHMENT_UNUSED)
2283          pvr_dereference_surface(ctx, attach_idx, subpass_num);
2284    }
2285 }
2286 
pvr_dereference_surface_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,uint32_t * attachments,uint32_t count)2287 static void pvr_dereference_surface_list(struct pvr_renderpass_context *ctx,
2288                                          uint32_t subpass_num,
2289                                          uint32_t *attachments,
2290                                          uint32_t count)
2291 {
2292    for (uint32_t i = 0U; i < count; i++) {
2293       if (attachments[i] != VK_ATTACHMENT_UNUSED)
2294          pvr_dereference_surface(ctx, attachments[i], subpass_num);
2295    }
2296 }
2297 
pvr_schedule_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,uint32_t subpass_idx)2298 static VkResult pvr_schedule_subpass(const struct pvr_device *device,
2299                                      struct pvr_renderpass_context *ctx,
2300                                      uint32_t subpass_idx)
2301 {
2302    struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2303    struct pvr_renderpass_hwsetup_render *hw_render;
2304    struct pvr_render_int_subpass *int_subpass;
2305    struct pvr_render_subpass *subpass;
2306    uint32_t subpass_num;
2307    VkResult result;
2308 
2309    int_subpass = &ctx->int_subpasses[subpass_idx];
2310    subpass = int_subpass->subpass;
2311 
2312    result = pvr_merge_subpass(device, ctx, subpass, &hw_subpass);
2313    if (result != VK_SUCCESS)
2314       return result;
2315 
2316    hw_render = ctx->hw_render;
2317    subpass_num = hw_render->subpass_count - 1U;
2318 
2319    /* Record where the subpass was scheduled. */
2320    ctx->hw_setup->subpass_map[subpass_idx].render =
2321       ctx->hw_setup->render_count - 1U;
2322    ctx->hw_setup->subpass_map[subpass_idx].subpass = subpass_num;
2323 
2324    /* Check this subpass was the last use of any attachments. */
2325    pvr_dereference_color_output_list(ctx, subpass_num, subpass);
2326    pvr_dereference_surface_list(ctx,
2327                                 subpass_num,
2328                                 subpass->input_attachments,
2329                                 subpass->input_count);
2330    if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
2331       struct pvr_render_int_attachment *int_depth_attach =
2332          &ctx->int_attach[subpass->depth_stencil_attachment];
2333 
2334       assert(int_depth_attach->remaining_count > 0U);
2335       int_depth_attach->remaining_count--;
2336 
2337       if (int_depth_attach->remaining_count == 0U) {
2338          if (int_depth_attach->first_use != -1)
2339             int_depth_attach->last_use = subpass_num;
2340 
2341          if (int_depth_attach->z_replicate)
2342             pvr_free_surface_storage(ctx, int_depth_attach);
2343       }
2344 
2345       if (int_depth_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2346          assert(int_depth_attach->stencil_remaining_count > 0U);
2347          int_depth_attach->stencil_remaining_count--;
2348       }
2349 
2350       /* The depth attachment has initialized data so load it from memory if it
2351        * is referenced again.
2352        */
2353       int_depth_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2354       int_depth_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2355    }
2356 
2357    /* Mark surfaces which have been the source or destination of an MSAA resolve
2358     * in the current render.
2359     */
2360    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2361       struct pvr_render_int_attachment *resolve_src;
2362       struct pvr_render_int_attachment *resolve_dst;
2363 
2364       if (!subpass->resolve_attachments)
2365          break;
2366 
2367       if (subpass->resolve_attachments[i] == VK_ATTACHMENT_UNUSED)
2368          continue;
2369 
2370       assert(subpass->color_attachments[i] <
2371              (int32_t)ctx->pass->attachment_count);
2372       resolve_src = &ctx->int_attach[subpass->color_attachments[i]];
2373 
2374       assert(subpass->resolve_attachments[i] <
2375              (int32_t)ctx->pass->attachment_count);
2376       resolve_dst = &ctx->int_attach[subpass->resolve_attachments[i]];
2377 
2378       /* Mark the resolve source. */
2379       assert(resolve_src->last_resolve_src_render <
2380              (int32_t)(ctx->hw_setup->render_count - 1U));
2381       resolve_src->last_resolve_src_render = ctx->hw_setup->render_count - 1U;
2382 
2383       /* Mark the resolve destination. */
2384       assert(resolve_dst->last_resolve_dst_render <
2385              (int32_t)(ctx->hw_setup->render_count - 1U));
2386       resolve_dst->last_resolve_dst_render = ctx->hw_setup->render_count - 1U;
2387 
2388       /* If we can't down scale through the PBE then the src must be stored
2389        * for transfer down scale.
2390        */
2391       if (!resolve_src->is_pbe_downscalable &&
2392           resolve_src->last_read < (int32_t)ctx->pass->subpass_count) {
2393          resolve_src->last_read = (int32_t)ctx->pass->subpass_count;
2394          resolve_src->remaining_count++;
2395       }
2396    }
2397 
2398    /* For subpasses dependent on this subpass decrement the unscheduled
2399     * dependency count.
2400     */
2401    for (uint32_t i = 0U; i < int_subpass->out_subpass_count; i++) {
2402       struct pvr_render_int_subpass *int_dst_subpass =
2403          int_subpass->out_subpasses[i];
2404 
2405       assert(int_dst_subpass->in_subpass_count > 0U);
2406       int_dst_subpass->in_subpass_count--;
2407    }
2408 
2409    return VK_SUCCESS;
2410 }
2411 
pvr_count_uses_in_list(uint32_t * attachments,uint32_t size,uint32_t attach_idx)2412 static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
2413                                        uint32_t size,
2414                                        uint32_t attach_idx)
2415 {
2416    uint32_t count = 0U;
2417 
2418    for (uint32_t i = 0U; i < size; i++) {
2419       if (attachments[i] == attach_idx)
2420          count++;
2421    }
2422 
2423    return count;
2424 }
2425 
2426 static uint32_t
pvr_count_uses_in_color_output_list(struct pvr_render_subpass * subpass,uint32_t attach_idx)2427 pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
2428                                     uint32_t attach_idx)
2429 {
2430    uint32_t count = 0U;
2431 
2432    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2433       if (subpass->color_attachments[i] == attach_idx) {
2434          count++;
2435 
2436          if (subpass->resolve_attachments &&
2437              subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
2438             count++;
2439       }
2440    }
2441 
2442    return count;
2443 }
2444 
pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks * alloc,struct pvr_renderpass_hwsetup * hw_setup)2445 void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
2446                                     struct pvr_renderpass_hwsetup *hw_setup)
2447 {
2448    for (uint32_t i = 0U; i < hw_setup->render_count; i++) {
2449       struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2450 
2451       vk_free(alloc, hw_render->eot_surfaces);
2452       vk_free(alloc, hw_render->eot_setup.mrt_resources);
2453       vk_free(alloc, hw_render->init_setup.mrt_resources);
2454       vk_free(alloc, hw_render->color_init);
2455 
2456       for (uint32_t j = 0U; j < hw_render->subpass_count; j++) {
2457          struct pvr_renderpass_hwsetup_subpass *subpass =
2458             &hw_render->subpasses[j];
2459 
2460          vk_free(alloc, subpass->color_initops);
2461          vk_free(alloc, subpass->input_access);
2462          vk_free(alloc, subpass->setup.mrt_resources);
2463       }
2464 
2465       vk_free(alloc, hw_render->subpasses);
2466    }
2467 
2468    vk_free(alloc, hw_setup->renders);
2469    vk_free(alloc, hw_setup);
2470 }
2471 
pvr_create_renderpass_hwsetup(struct pvr_device * device,const VkAllocationCallbacks * alloc,struct pvr_render_pass * pass,bool disable_merge,struct pvr_renderpass_hwsetup ** const hw_setup_out)2472 VkResult pvr_create_renderpass_hwsetup(
2473    struct pvr_device *device,
2474    const VkAllocationCallbacks *alloc,
2475    struct pvr_render_pass *pass,
2476    bool disable_merge,
2477    struct pvr_renderpass_hwsetup **const hw_setup_out)
2478 {
2479    struct pvr_render_int_attachment *int_attachments;
2480    struct pvr_render_int_subpass *int_subpasses;
2481    struct pvr_renderpass_hw_map *subpass_map;
2482    struct pvr_renderpass_hwsetup *hw_setup;
2483    struct pvr_renderpass_context *ctx;
2484    bool *surface_allocate;
2485    VkResult result;
2486 
2487    VK_MULTIALLOC(ma);
2488    vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
2489    vk_multialloc_add(&ma,
2490                      &surface_allocate,
2491                      __typeof__(*surface_allocate),
2492                      pass->attachment_count);
2493    vk_multialloc_add(&ma,
2494                      &subpass_map,
2495                      __typeof__(*subpass_map),
2496                      pass->subpass_count);
2497 
2498    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2499       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2500 
2501    hw_setup->surface_allocate = surface_allocate;
2502    hw_setup->subpass_map = subpass_map;
2503 
2504    VK_MULTIALLOC(ma_ctx);
2505    vk_multialloc_add(&ma_ctx, &ctx, __typeof__(*ctx), 1);
2506    vk_multialloc_add(&ma_ctx,
2507                      &int_attachments,
2508                      __typeof__(*int_attachments),
2509                      pass->attachment_count);
2510    vk_multialloc_add(&ma_ctx,
2511                      &int_subpasses,
2512                      __typeof__(*int_subpasses),
2513                      pass->subpass_count);
2514 
2515    if (!vk_multialloc_zalloc(&ma_ctx,
2516                              alloc,
2517                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
2518       vk_free(alloc, hw_setup);
2519       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2520    }
2521 
2522    ctx->pass = pass;
2523    ctx->hw_setup = hw_setup;
2524    ctx->int_attach = int_attachments;
2525    ctx->int_subpasses = int_subpasses;
2526    ctx->allocator = alloc;
2527 
2528    for (uint32_t i = 0U; i < pass->attachment_count; i++) {
2529       struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
2530       struct pvr_render_int_attachment *int_attach = &ctx->int_attach[i];
2531       const uint32_t pixel_size =
2532          vk_format_get_blocksizebits(attachment->vk_format) / 32U;
2533       const uint32_t part_bits =
2534          vk_format_get_blocksizebits(attachment->vk_format) % 32U;
2535 
2536       int_attach->resource.type = USC_MRT_RESOURCE_TYPE_INVALID;
2537       int_attach->resource.intermediate_size =
2538          DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format),
2539                       CHAR_BIT);
2540       int_attach->resource.mrt_desc.intermediate_size =
2541          int_attach->resource.intermediate_size;
2542 
2543       for (uint32_t j = 0U; j < pixel_size; j++)
2544          int_attach->resource.mrt_desc.valid_mask[j] = ~0;
2545 
2546       if (part_bits > 0U) {
2547          int_attach->resource.mrt_desc.valid_mask[pixel_size] =
2548             BITFIELD_MASK(part_bits);
2549       }
2550 
2551       int_attach->load_op = pass->attachments[i].load_op;
2552       int_attach->stencil_load_op = pass->attachments[i].stencil_load_op;
2553       int_attach->attachment = attachment;
2554       int_attach->first_use = -1;
2555       int_attach->last_use = -1;
2556       int_attach->last_read = -1;
2557       int_attach->mrt_idx = -1;
2558       int_attach->last_resolve_dst_render = -1;
2559       int_attach->last_resolve_src_render = -1;
2560       int_attach->z_replicate = false;
2561       int_attach->is_pbe_downscalable = attachment->is_pbe_downscalable;
2562 
2563       /* Count the number of references to this attachment in subpasses. */
2564       for (uint32_t j = 0U; j < pass->subpass_count; j++) {
2565          struct pvr_render_subpass *subpass = &pass->subpasses[j];
2566          const uint32_t color_output_uses =
2567             pvr_count_uses_in_color_output_list(subpass, i);
2568          const uint32_t input_attachment_uses =
2569             pvr_count_uses_in_list(subpass->input_attachments,
2570                                    subpass->input_count,
2571                                    i);
2572 
2573          if (color_output_uses != 0U || input_attachment_uses != 0U)
2574             int_attach->last_read = j;
2575 
2576          int_attach->remaining_count +=
2577             color_output_uses + input_attachment_uses;
2578 
2579          if ((uint32_t)subpass->depth_stencil_attachment == i)
2580             int_attach->remaining_count++;
2581       }
2582 
2583       if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2584          int_attach->stencil_remaining_count = int_attach->remaining_count;
2585          if (pass->attachments[i].stencil_store_op ==
2586              VK_ATTACHMENT_STORE_OP_STORE) {
2587             int_attach->stencil_remaining_count++;
2588          }
2589       }
2590 
2591       if (pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
2592          int_attach->remaining_count++;
2593          int_attach->last_read = pass->subpass_count;
2594       }
2595    }
2596 
2597    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2598       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2599 
2600       int_subpass->subpass = &pass->subpasses[i];
2601       int_subpass->out_subpass_count = 0U;
2602       int_subpass->out_subpasses = NULL;
2603       int_subpass->in_subpass_count = int_subpass->subpass->dep_count;
2604    }
2605 
2606    /* For each dependency of a subpass create an edge in the opposite
2607     * direction.
2608     */
2609    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2610       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2611 
2612       for (uint32_t j = 0U; j < int_subpass->in_subpass_count; j++) {
2613          uint32_t src_idx = int_subpass->subpass->dep_list[j];
2614          struct pvr_render_int_subpass *int_src_subpass;
2615          struct pvr_render_int_subpass **out_subpasses;
2616 
2617          assert(src_idx < pass->subpass_count);
2618 
2619          int_src_subpass = &ctx->int_subpasses[src_idx];
2620 
2621          out_subpasses =
2622             vk_realloc(ctx->allocator,
2623                        int_src_subpass->out_subpasses,
2624                        sizeof(int_src_subpass->out_subpasses[0U]) *
2625                           (int_src_subpass->out_subpass_count + 1U),
2626                        8U,
2627                        VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2628          if (!out_subpasses) {
2629             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2630             goto end_create_renderpass_hwsetup;
2631          }
2632 
2633          int_src_subpass->out_subpasses = out_subpasses;
2634          int_src_subpass->out_subpasses[int_src_subpass->out_subpass_count] =
2635             int_subpass;
2636          int_src_subpass->out_subpass_count++;
2637       }
2638    }
2639 
2640    pvr_reset_render(ctx);
2641 
2642    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2643       uint32_t j;
2644 
2645       /* Find a subpass with no unscheduled dependencies. */
2646       for (j = 0U; j < pass->subpass_count; j++) {
2647          struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[j];
2648 
2649          if (int_subpass->subpass && int_subpass->in_subpass_count == 0U)
2650             break;
2651       }
2652       assert(j < pass->subpass_count);
2653 
2654       result = pvr_schedule_subpass(device, ctx, j);
2655       if (result != VK_SUCCESS)
2656          goto end_create_renderpass_hwsetup;
2657 
2658       if (disable_merge) {
2659          result = pvr_close_render(device, ctx);
2660          if (result != VK_SUCCESS)
2661             goto end_create_renderpass_hwsetup;
2662       }
2663 
2664       ctx->int_subpasses[j].subpass = NULL;
2665    }
2666 
2667    /* Finalise the last in-progress render. */
2668    result = pvr_close_render(device, ctx);
2669 
2670 end_create_renderpass_hwsetup:
2671    if (result != VK_SUCCESS) {
2672       pvr_free_render(ctx);
2673 
2674       if (hw_setup) {
2675          pvr_destroy_renderpass_hwsetup(alloc, hw_setup);
2676          hw_setup = NULL;
2677       }
2678    }
2679 
2680    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2681       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2682 
2683       if (int_subpass->out_subpass_count > 0U)
2684          vk_free(alloc, int_subpass->out_subpasses);
2685    }
2686 
2687    vk_free(alloc, ctx);
2688 
2689    *hw_setup_out = hw_setup;
2690 
2691    return result;
2692 }
2693