1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30
31 #include "hwdef/rogue_hw_defs.h"
32 #include "hwdef/rogue_hw_utils.h"
33 #include "pvr_hw_pass.h"
34 #include "pvr_private.h"
35 #include "util/bitset.h"
36 #include "util/list.h"
37 #include "util/macros.h"
38 #include "util/u_math.h"
39 #include "vk_alloc.h"
40 #include "vk_format.h"
41 #include "vk_log.h"
42
43 struct pvr_render_int_subpass {
44 /* Points to the input subpass. This is set to NULL when the subpass is
45 * unscheduled.
46 */
47 struct pvr_render_subpass *subpass;
48
49 /* Count of other subpasses which have this subpass as a dependency. */
50 uint32_t out_subpass_count;
51
52 /* Pointers to the other subpasses which have this subpass as a dependency.
53 */
54 struct pvr_render_int_subpass **out_subpasses;
55
56 /* Count of subpasses on which this subpass is dependent and which haven't
57 * been scheduled yet.
58 */
59 uint32_t in_subpass_count;
60 };
61
62 struct pvr_renderpass_resource {
63 /* Resource type allocated for render target. */
64 enum usc_mrt_resource_type type;
65
66 union {
67 /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
68 struct {
69 /* The output register to use. */
70 uint32_t output_reg;
71
72 /* The offset in bytes within the output register. */
73 uint32_t offset;
74 } reg;
75
76 /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
77 struct {
78 /* The index of the tile buffer to use. */
79 uint32_t tile_buffer;
80
81 /* The offset (in dwords) within the tile buffer. */
82 uint32_t offset_dw;
83 } mem;
84 };
85 };
86
87 struct pvr_render_int_attachment {
88 /* Points to the corresponding input attachment. */
89 struct pvr_render_pass_attachment *attachment;
90
91 /* True if this attachment is referenced in the currently open render. */
92 bool is_used;
93
94 /* Operation to use when this attachment is non-resident and referenced as a
95 * color or depth attachment.
96 */
97 VkAttachmentLoadOp load_op;
98
99 /* Operation to use for the stencil component when this attachment is
100 * non-resident and referenced as a color or depth attachment.
101 */
102 VkAttachmentLoadOp stencil_load_op;
103
104 /* Count of uses of this attachment in unscheduled subpasses. */
105 uint32_t remaining_count;
106
107 /* Count of uses of the stencil component of this attachment in unscheduled
108 * subpasses.
109 */
110 uint32_t stencil_remaining_count;
111
112 /* If this attachment has currently allocated on-chip storage then details of
113 * the allocated location.
114 */
115 struct usc_mrt_resource resource;
116
117 /* Index of the subpass in the current render where the attachment is first
118 * used. VK_ATTACHMENT_UNUSED if the attachment isn't used in the current
119 * render.
120 */
121 int32_t first_use;
122
123 /* Index of the subpass in the current render where the attachment is last
124 * used.
125 */
126 int32_t last_use;
127
128 /* Index of the subpass (global) where the attachment is last read. */
129 int32_t last_read;
130
131 /* If this attachment has currently allocated on-chip storage then the entry
132 * in context.active_surf_list.
133 */
134 struct list_head link;
135
136 /* During pvr_close_render: if this attachment has allocated on-chip storage
137 * then the index in pvr_renderpass_hwsetup_render.eot_setup.mrt_resources
138 * with details of the storage location. Otherwise -1.
139 */
140 int32_t mrt_idx;
141
142 /* Index of the last render where the attachment was the source of an MSAA
143 * resolve.
144 */
145 int32_t last_resolve_src_render;
146
147 /* Index of the last render where the attachment was the destination of an
148 * MSAA resolve.
149 */
150 int32_t last_resolve_dst_render;
151
152 /* true if the attachment is used with a z replicate in the current render.
153 */
154 bool z_replicate;
155
156 /* true if this attachment can be resolved by the PBE. */
157 bool is_pbe_downscalable;
158
159 /* true if this attachment requires an EOT attachment. */
160 bool eot_surf_required;
161 };
162
163 /* Which parts of the output registers/a tile buffer are currently allocated. */
164 struct pvr_renderpass_alloc_buffer {
165 /* Bit array. A bit is set if the corresponding dword is allocated. */
166 BITSET_DECLARE(allocs, 8U);
167 };
168
169 struct pvr_renderpass_alloc {
170 /* Which pixel output registers are allocated. */
171 struct pvr_renderpass_alloc_buffer output_reg;
172
173 /* Range of allocated output registers. */
174 uint32_t output_regs_count;
175
176 /* Number of tile buffers allocated. */
177 uint32_t tile_buffers_count;
178
179 /* Which parts of each tile buffer are allocated. Length is
180 * tile_buffers_count.
181 */
182 struct pvr_renderpass_alloc_buffer *tile_buffers;
183 };
184
185 struct pvr_renderpass_subpass {
186 /* A pointer to the input subpass description. */
187 struct pvr_render_subpass *input_subpass;
188
189 /* true if the depth attachment for this subpass has z replication enabled.
190 */
191 bool z_replicate;
192
193 /* Which pixel output registers/tile buffer locations are allocated during
194 * this subpass.
195 */
196 struct pvr_renderpass_alloc alloc;
197 };
198
199 struct pvr_renderpass_context {
200 /* Internal information about each input attachment. */
201 struct pvr_render_int_attachment *int_attach;
202
203 /* Internal information about each input subpass. */
204 struct pvr_render_int_subpass *int_subpasses;
205
206 /* Input structure. */
207 struct pvr_render_pass *pass;
208
209 /* Output structure. */
210 struct pvr_renderpass_hwsetup *hw_setup;
211
212 /* In-progress render. */
213 struct pvr_renderpass_hwsetup_render *hw_render;
214
215 /* Information about each subpass in the current render. */
216 struct pvr_renderpass_subpass *subpasses;
217
218 /* Which parts of color storage are currently allocated. */
219 struct pvr_renderpass_alloc alloc;
220
221 /* Attachment which is currently allocated the on-chip depth/stencil. */
222 struct pvr_render_int_attachment *int_ds_attach;
223
224 /* Attachment which is loaded into the on-chip depth/stencil at the start of
225 * the render.
226 */
227 struct pvr_render_int_attachment *ds_load_surface;
228
229 /* Attachment which the depth/stencil attachment should be resolved to at the
230 * end of the render.
231 */
232 struct pvr_render_int_attachment *ds_resolve_surface;
233
234 /* Count of surfaces which are allocated on-chip color storage. */
235 uint32_t active_surfaces;
236
237 /* List of attachment/ranges which are allocated on-chip color storage. */
238 struct list_head active_surf_list;
239
240 const VkAllocationCallbacks *allocator;
241 };
242
243 struct pvr_render_int_subpass_dsts {
244 struct pvr_renderpass_resource *color;
245 struct pvr_renderpass_resource incoming_zrep;
246 struct pvr_renderpass_resource existing_zrep;
247 };
248
249 struct pvr_render_subpass_depth_params {
250 bool existing_ds_is_input;
251 bool incoming_ds_is_input;
252 uint32_t existing_ds_attach;
253 };
254
255 struct pvr_renderpass_storage_firstuse_buffer {
256 /* For each pixel output register/tile buffer location: true if the output
257 * register has been allocated in the current render.
258 */
259 bool used[8U];
260 };
261
262 struct pvr_renderpass_storage_firstuse {
263 /* First use information for pixel output registers. */
264 struct pvr_renderpass_storage_firstuse_buffer output_reg;
265
266 /* First use information for tile buffers. */
267 struct pvr_renderpass_storage_firstuse_buffer *tile_buffers;
268 };
269
270 /** Copy information about allocated color storage. */
pvr_copy_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)271 static VkResult pvr_copy_alloc(struct pvr_renderpass_context *ctx,
272 struct pvr_renderpass_alloc *dst,
273 struct pvr_renderpass_alloc *src)
274 {
275 dst->output_reg = src->output_reg;
276 dst->output_regs_count = src->output_regs_count;
277
278 dst->tile_buffers_count = src->tile_buffers_count;
279 if (dst->tile_buffers_count > 0U) {
280 dst->tile_buffers =
281 vk_alloc(ctx->allocator,
282 sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count,
283 8,
284 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
285 if (!dst->tile_buffers)
286 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
287
288 memcpy(dst->tile_buffers,
289 src->tile_buffers,
290 sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count);
291 } else {
292 dst->tile_buffers = NULL;
293 }
294
295 return VK_SUCCESS;
296 }
297
298 /** Free information about allocated color storage. */
pvr_free_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc)299 static void pvr_free_alloc(struct pvr_renderpass_context *ctx,
300 struct pvr_renderpass_alloc *alloc)
301 {
302 if (alloc->tile_buffers)
303 vk_free(ctx->allocator, alloc->tile_buffers);
304
305 memset(alloc, 0U, sizeof(*alloc));
306 }
307
pvr_reset_render(struct pvr_renderpass_context * ctx)308 static void pvr_reset_render(struct pvr_renderpass_context *ctx)
309 {
310 ctx->int_ds_attach = NULL;
311 ctx->active_surfaces = 0U;
312 list_inithead(&ctx->active_surf_list);
313
314 memset(&ctx->alloc.output_reg, 0U, sizeof(ctx->alloc.output_reg));
315 ctx->alloc.output_regs_count = 0U;
316 ctx->alloc.tile_buffers_count = 0U;
317 ctx->alloc.tile_buffers = NULL;
318
319 ctx->hw_render = NULL;
320 ctx->subpasses = NULL;
321 ctx->ds_load_surface = NULL;
322 }
323
324 /** Gets the amount of memory to allocate per-core for a tile buffer. */
325 static uint32_t
pvr_get_tile_buffer_size_per_core(const struct pvr_device * device)326 pvr_get_tile_buffer_size_per_core(const struct pvr_device *device)
327 {
328 uint32_t clusters =
329 PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, num_clusters, 1U);
330
331 /* Round the number of clusters up to the next power of two. */
332 if (!PVR_HAS_FEATURE(&device->pdevice->dev_info, tile_per_usc))
333 clusters = util_next_power_of_two(clusters);
334
335 /* Tile buffer is (total number of partitions across all clusters) * 16 * 16
336 * (quadrant size in pixels).
337 */
338 return device->pdevice->dev_runtime_info.total_reserved_partition_size *
339 clusters * sizeof(uint32_t);
340 }
341
342 /**
343 * Gets the amount of memory to allocate for a tile buffer on the current BVNC.
344 */
pvr_get_tile_buffer_size(const struct pvr_device * device)345 uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device)
346 {
347 /* On a multicore system duplicate the buffer for each core. */
348 return pvr_get_tile_buffer_size_per_core(device) *
349 rogue_get_max_num_cores(&device->pdevice->dev_info);
350 }
351
352 static void
pvr_finalise_mrt_setup(const struct pvr_device * device,struct pvr_renderpass_hwsetup_render * hw_render,struct usc_mrt_setup * mrt)353 pvr_finalise_mrt_setup(const struct pvr_device *device,
354 struct pvr_renderpass_hwsetup_render *hw_render,
355 struct usc_mrt_setup *mrt)
356 {
357 mrt->num_output_regs = hw_render->output_regs_count;
358 mrt->num_tile_buffers = hw_render->tile_buffers_count;
359 mrt->tile_buffer_size = pvr_get_tile_buffer_size(device);
360 }
361
362 /**
363 * Copy information about the number of pixel output registers and tile buffers
364 * required for the current render to the output structure.
365 */
pvr_finalise_po_alloc(const struct pvr_device * device,struct pvr_renderpass_context * ctx)366 static void pvr_finalise_po_alloc(const struct pvr_device *device,
367 struct pvr_renderpass_context *ctx)
368 {
369 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
370
371 /* The number of output registers must be a power of two. */
372 hw_render->output_regs_count =
373 util_next_power_of_two(ctx->alloc.output_regs_count);
374
375 assert(ctx->alloc.tile_buffers_count <= ctx->pass->max_tilebuffer_count);
376 hw_render->tile_buffers_count = ctx->alloc.tile_buffers_count;
377
378 /* Copy the number of output registers and tile buffers to each subpass. */
379 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
380 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
381 &hw_render->subpasses[i];
382
383 pvr_finalise_mrt_setup(device, hw_render, &hw_subpass->setup);
384 }
385
386 pvr_finalise_mrt_setup(device, hw_render, &hw_render->init_setup);
387 pvr_finalise_mrt_setup(device, hw_render, &hw_render->eot_setup);
388 }
389
390 /** Mark that device memory must be allocated for an attachment. */
pvr_mark_surface_alloc(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)391 static void pvr_mark_surface_alloc(struct pvr_renderpass_context *ctx,
392 struct pvr_render_int_attachment *int_attach)
393 {
394 const uint32_t attach_idx = int_attach - ctx->int_attach;
395
396 assert(attach_idx < ctx->pass->attachment_count);
397 ctx->hw_setup->surface_allocate[attach_idx] = true;
398 }
399
400 /**
401 * Check if there is space in a buffer for storing a render target of a
402 * specified size.
403 */
404 static int32_t
pvr_is_space_in_buffer(const struct pvr_device_info * dev_info,struct pvr_renderpass_alloc_buffer * buffer,uint32_t pixel_size)405 pvr_is_space_in_buffer(const struct pvr_device_info *dev_info,
406 struct pvr_renderpass_alloc_buffer *buffer,
407 uint32_t pixel_size)
408 {
409 const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info);
410 uint32_t alignment = 1U;
411
412 if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) {
413 /* For a 64-bit/128-bit source format: the start offset must be even. */
414 if (pixel_size == 2U || pixel_size == 4U)
415 alignment = 2U;
416 }
417
418 assert(pixel_size <= max_out_regs);
419
420 for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) {
421 if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U))
422 return i;
423 }
424
425 return -1;
426 }
427
428 static VkResult
pvr_surface_setup_render_init(struct pvr_renderpass_context * ctx,struct pvr_renderpass_storage_firstuse * first_use,struct usc_mrt_resource const * resource,struct pvr_render_pass_attachment * attachment,VkAttachmentLoadOp load_op,bool * use_render_init)429 pvr_surface_setup_render_init(struct pvr_renderpass_context *ctx,
430 struct pvr_renderpass_storage_firstuse *first_use,
431 struct usc_mrt_resource const *resource,
432 struct pvr_render_pass_attachment *attachment,
433 VkAttachmentLoadOp load_op,
434 bool *use_render_init)
435 {
436 const uint32_t pixel_size =
437 DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
438 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
439 struct pvr_renderpass_storage_firstuse_buffer *buffer;
440 uint32_t start;
441
442 /* Check if this is the first use of all the allocated registers. */
443 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
444 buffer = &first_use->output_reg;
445 start = resource->reg.output_reg;
446 } else {
447 assert(resource->mem.tile_buffer < ctx->alloc.tile_buffers_count);
448 buffer = &first_use->tile_buffers[resource->mem.tile_buffer];
449 start = resource->mem.offset_dw;
450 }
451
452 *use_render_init = true;
453 for (uint32_t i = 0U; i < pixel_size; i++) {
454 /* Don't initialize at the render level if the output registers were
455 * previously allocated a different attachment.
456 */
457 if (buffer->used[start + i])
458 *use_render_init = false;
459
460 /* Don't use render init for future attachments allocated to the same
461 * registers.
462 */
463 buffer->used[start + i] = true;
464 }
465
466 if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
467 *use_render_init = false;
468
469 if (*use_render_init) {
470 struct pvr_renderpass_colorinit *new_color_init;
471 struct usc_mrt_resource *new_mrt;
472
473 /* Initialize the storage at the start of the render. */
474 new_color_init = vk_realloc(ctx->allocator,
475 hw_render->color_init,
476 sizeof(hw_render->color_init[0U]) *
477 (hw_render->color_init_count + 1U),
478 8U,
479 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
480 if (!new_color_init)
481 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
482
483 hw_render->color_init = new_color_init;
484 hw_render->color_init[hw_render->color_init_count].index =
485 attachment->index;
486 hw_render->color_init[hw_render->color_init_count].op = load_op;
487
488 /* Set the destination for the attachment load/clear. */
489 assert(hw_render->init_setup.num_render_targets ==
490 hw_render->color_init_count);
491
492 new_mrt = vk_realloc(ctx->allocator,
493 hw_render->init_setup.mrt_resources,
494 sizeof(hw_render->init_setup.mrt_resources[0U]) *
495 (hw_render->init_setup.num_render_targets + 1U),
496 8U,
497 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
498 if (!new_mrt)
499 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
500
501 hw_render->init_setup.mrt_resources = new_mrt;
502 hw_render->init_setup
503 .mrt_resources[hw_render->init_setup.num_render_targets] = *resource;
504 hw_render->init_setup.num_render_targets++;
505
506 hw_render->color_init_count++;
507 }
508
509 return VK_SUCCESS;
510 }
511
512 static VkResult
pvr_subpass_setup_render_init(struct pvr_renderpass_context * ctx)513 pvr_subpass_setup_render_init(struct pvr_renderpass_context *ctx)
514 {
515 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
516 struct pvr_renderpass_storage_firstuse first_use = { 0 };
517 bool first_ds = true;
518 VkResult result;
519
520 if (ctx->alloc.tile_buffers_count > 0U) {
521 first_use.tile_buffers = vk_zalloc(ctx->allocator,
522 sizeof(first_use.tile_buffers[0U]) *
523 ctx->alloc.tile_buffers_count,
524 8,
525 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
526 if (!first_use.tile_buffers)
527 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
528 }
529
530 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
531 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
532 &hw_render->subpasses[i];
533 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
534 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
535
536 /* If this is the first depth attachment in the render then clear at the
537 * render level, not the subpass level.
538 */
539 if (first_ds &&
540 (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR ||
541 hw_subpass->stencil_clear)) {
542 struct pvr_render_int_attachment *int_ds_attach;
543
544 assert(input_subpass->depth_stencil_attachment !=
545 VK_ATTACHMENT_UNUSED);
546 assert(input_subpass->depth_stencil_attachment <
547 ctx->pass->attachment_count);
548 int_ds_attach =
549 &ctx->int_attach[input_subpass->depth_stencil_attachment];
550
551 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
552 hw_render->ds_attach_idx == int_ds_attach->attachment->index);
553 hw_render->ds_attach_idx = int_ds_attach->attachment->index;
554
555 if (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)
556 hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
557
558 if (hw_subpass->stencil_clear) {
559 hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
560 hw_subpass->stencil_clear = false;
561 }
562 }
563
564 if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
565 first_ds = false;
566
567 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
568 struct usc_mrt_resource *mrt = &hw_subpass->setup.mrt_resources[j];
569 const uint32_t attach_idx = input_subpass->color_attachments[j];
570 struct pvr_render_int_attachment *int_attach;
571
572 if (attach_idx == VK_ATTACHMENT_UNUSED)
573 continue;
574
575 int_attach = &ctx->int_attach[attach_idx];
576
577 assert(vk_format_get_blocksizebits(int_attach->attachment->vk_format) >
578 0U);
579
580 /* Is this the first use of the attachment? */
581 if (int_attach->first_use == (int32_t)i) {
582 /* Set if we should initialize the attachment storage at the
583 * render level.
584 */
585 bool use_render_init;
586 result = pvr_surface_setup_render_init(ctx,
587 &first_use,
588 mrt,
589 int_attach->attachment,
590 hw_subpass->color_initops[j],
591 &use_render_init);
592 if (result != VK_SUCCESS) {
593 if (!first_use.tile_buffers)
594 free(first_use.tile_buffers);
595
596 return result;
597 }
598
599 /* On success don't initialize the attachment at the subpass level.
600 */
601 if (use_render_init)
602 hw_subpass->color_initops[j] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
603 } else {
604 /* This attachment is already present in on-chip storage so don't
605 * do anything.
606 */
607 assert(hw_subpass->color_initops[j] ==
608 VK_ATTACHMENT_LOAD_OP_DONT_CARE);
609 }
610 }
611 }
612
613 if (!first_use.tile_buffers)
614 free(first_use.tile_buffers);
615
616 return VK_SUCCESS;
617 }
618
619 static void
pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer * buffer,uint32_t start,uint32_t pixel_size)620 pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer *buffer,
621 uint32_t start,
622 uint32_t pixel_size)
623 {
624 assert(!BITSET_TEST_RANGE(buffer->allocs, start, start + pixel_size - 1U));
625 BITSET_SET_RANGE(buffer->allocs, start, start + pixel_size - 1U);
626 }
627
628 static VkResult
pvr_mark_storage_allocated(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)629 pvr_mark_storage_allocated(struct pvr_renderpass_context *ctx,
630 struct pvr_renderpass_alloc *alloc,
631 struct pvr_render_pass_attachment *attachment,
632 struct pvr_renderpass_resource *resource)
633 {
634 /* Number of dwords to allocate for the attachment. */
635 const uint32_t pixel_size =
636 DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
637
638 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
639 /* Update the locations used in the pixel output registers. */
640 pvr_mark_storage_allocated_in_buffer(&alloc->output_reg,
641 resource->reg.output_reg,
642 pixel_size);
643
644 /* Update the range of pixel output registers used. */
645 alloc->output_regs_count =
646 MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size);
647 } else {
648 assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
649
650 if (resource->mem.tile_buffer >= alloc->tile_buffers_count) {
651 /* Grow the number of tile buffers. */
652 struct pvr_renderpass_alloc_buffer *new_tile_buffers = vk_realloc(
653 ctx->allocator,
654 alloc->tile_buffers,
655 sizeof(alloc->tile_buffers[0U]) * (resource->mem.tile_buffer + 1U),
656 8U,
657 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
658 if (!new_tile_buffers)
659 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
660
661 alloc->tile_buffers = new_tile_buffers;
662 memset(
663 &alloc->tile_buffers[alloc->tile_buffers_count],
664 0U,
665 sizeof(alloc->tile_buffers[0U]) *
666 (resource->mem.tile_buffer + 1U - alloc->tile_buffers_count));
667 alloc->tile_buffers_count = resource->mem.tile_buffer + 1U;
668 assert(alloc->tile_buffers_count <= ctx->pass->max_tilebuffer_count);
669 }
670
671 /* Update the locations used in the tile buffer. */
672 pvr_mark_storage_allocated_in_buffer(
673 &alloc->tile_buffers[resource->mem.tile_buffer],
674 resource->mem.offset_dw,
675 pixel_size);
676
677 /* The hardware makes the bit depth of the on-chip storage and memory
678 * storage the same so make sure the memory storage is large enough to
679 * accommodate the largest render target.
680 */
681 alloc->output_regs_count =
682 MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size);
683 }
684
685 return VK_SUCCESS;
686 }
687
688 static VkResult
pvr_surface_alloc_color_storage(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)689 pvr_surface_alloc_color_storage(const struct pvr_device_info *dev_info,
690 struct pvr_renderpass_context *ctx,
691 struct pvr_renderpass_alloc *alloc,
692 struct pvr_render_pass_attachment *attachment,
693 struct pvr_renderpass_resource *resource)
694 {
695 /* Number of dwords to allocate for the attachment. */
696 const uint32_t pixel_size =
697 DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format), 32U);
698
699 /* Try allocating pixel output registers. */
700 const int32_t output_reg =
701 pvr_is_space_in_buffer(dev_info, &alloc->output_reg, pixel_size);
702 if (output_reg != -1) {
703 resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
704 resource->reg.output_reg = (uint32_t)output_reg;
705 resource->reg.offset = 0U;
706 } else {
707 uint32_t i;
708
709 /* Mark the attachment as using a tile buffer. */
710 resource->type = USC_MRT_RESOURCE_TYPE_MEMORY;
711
712 /* Try allocating from an existing tile buffer. */
713 for (i = 0U; i < alloc->tile_buffers_count; i++) {
714 const int32_t tile_buffer_offset =
715 pvr_is_space_in_buffer(dev_info,
716 &alloc->tile_buffers[i],
717 pixel_size);
718
719 if (tile_buffer_offset != -1) {
720 resource->mem.tile_buffer = i;
721 resource->mem.offset_dw = (uint32_t)tile_buffer_offset;
722 break;
723 }
724 }
725
726 if (i == alloc->tile_buffers_count) {
727 /* Check for reaching the maximum number of tile buffers. */
728 if (alloc->tile_buffers_count == ctx->pass->max_tilebuffer_count)
729 return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
730
731 /* Use a newly allocated tile buffer. */
732 resource->mem.tile_buffer = i;
733 resource->mem.offset_dw = 0U;
734 }
735 }
736
737 /* Update which parts of the pixel outputs/tile buffers are used. */
738 return pvr_mark_storage_allocated(ctx, alloc, attachment, resource);
739 }
740
741 /** Free the storage allocated to an attachment. */
742 static void
pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer * buffer,struct pvr_render_int_attachment * int_attach,uint32_t start)743 pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer *buffer,
744 struct pvr_render_int_attachment *int_attach,
745 uint32_t start)
746 {
747 const uint32_t pixel_size = DIV_ROUND_UP(
748 vk_format_get_blocksizebits(int_attach->attachment->vk_format),
749 32U);
750
751 BITSET_CLEAR_RANGE(buffer->allocs, start, start + pixel_size - 1U);
752 }
753
754 /** Free the storage allocated to an attachment. */
755 static void
pvr_free_surface_storage(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)756 pvr_free_surface_storage(struct pvr_renderpass_context *ctx,
757 struct pvr_render_int_attachment *int_attach)
758 {
759 struct usc_mrt_resource *resource = &int_attach->resource;
760 struct pvr_renderpass_alloc *alloc = &ctx->alloc;
761
762 assert(resource->type != USC_MRT_RESOURCE_TYPE_INVALID);
763
764 /* Mark the storage as free. */
765 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
766 pvr_free_buffer_storage(&alloc->output_reg,
767 int_attach,
768 resource->reg.output_reg);
769 } else {
770 struct pvr_renderpass_alloc_buffer *tile_buffer;
771
772 assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
773
774 assert(resource->mem.tile_buffer < alloc->tile_buffers_count);
775 tile_buffer = &alloc->tile_buffers[resource->mem.tile_buffer];
776 pvr_free_buffer_storage(tile_buffer, int_attach, resource->mem.offset_dw);
777 }
778
779 /* Mark that the attachment doesn't have allocated storage. */
780 resource->type = USC_MRT_RESOURCE_TYPE_INVALID;
781
782 /* Remove from the list of surfaces with allocated on-chip storage. */
783 assert(ctx->active_surfaces > 0U);
784 ctx->active_surfaces--;
785 list_del(&int_attach->link);
786 }
787
pvr_reset_surface(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)788 static void pvr_reset_surface(struct pvr_renderpass_context *ctx,
789 struct pvr_render_int_attachment *int_attach)
790 {
791 /* Reset information about the range of uses. */
792 int_attach->first_use = int_attach->last_use = -1;
793 int_attach->z_replicate = false;
794
795 pvr_free_surface_storage(ctx, int_attach);
796 }
797
798 static void
pvr_make_surface_active(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach,uint32_t subpass_num)799 pvr_make_surface_active(struct pvr_renderpass_context *ctx,
800 struct pvr_render_int_attachment *int_attach,
801 uint32_t subpass_num)
802 {
803 /* Add to the list of surfaces with on-chip storage. */
804 assert(int_attach->first_use == -1);
805 int_attach->first_use = subpass_num;
806 ctx->active_surfaces++;
807 list_addtail(&int_attach->link, &ctx->active_surf_list);
808 }
809
810 /**
811 * For a subpass copy details of storage locations for the input/color to the
812 * output structure.
813 */
814 static VkResult
pvr_copy_storage_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)815 pvr_copy_storage_details(struct pvr_renderpass_context *ctx,
816 struct pvr_renderpass_hwsetup_subpass *hw_subpass,
817 struct pvr_renderpass_subpass *subpass)
818 {
819 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
820 const uint32_t max_rts =
821 input_subpass->color_count + input_subpass->input_count;
822 VkResult result;
823
824 if (max_rts == 0)
825 return VK_SUCCESS;
826
827 hw_subpass->setup.mrt_resources =
828 vk_zalloc(ctx->allocator,
829 sizeof(hw_subpass->setup.mrt_resources[0U]) * max_rts,
830 8,
831 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
832 if (!hw_subpass->setup.mrt_resources) {
833 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
834 goto end_copy_storage_details;
835 }
836
837 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
838 const uint32_t attach_idx = input_subpass->color_attachments[i];
839 struct pvr_render_int_attachment *int_attach;
840
841 if (attach_idx == VK_ATTACHMENT_UNUSED)
842 continue;
843
844 int_attach = &ctx->int_attach[attach_idx];
845
846 /* Record for the subpass where the color attachment is stored. */
847 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
848 hw_subpass->setup.mrt_resources[i] = int_attach->resource;
849 }
850
851 hw_subpass->setup.num_render_targets = input_subpass->color_count;
852
853 if (input_subpass->input_count == 0)
854 return VK_SUCCESS;
855
856 /* For this subpass's input attachments. */
857 hw_subpass->input_access = vk_alloc(ctx->allocator,
858 sizeof(hw_subpass->input_access[0U]) *
859 input_subpass->input_count,
860 8,
861 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
862 if (!hw_subpass->input_access) {
863 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
864 goto end_copy_storage_details;
865 }
866
867 for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
868 const uint32_t attach_idx = input_subpass->input_attachments[i];
869 struct pvr_render_int_attachment *int_attach;
870
871 if (attach_idx == VK_ATTACHMENT_UNUSED)
872 continue;
873
874 int_attach = &ctx->int_attach[attach_idx];
875
876 if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID) {
877 bool is_color = false;
878
879 /* Access the input attachment from on-chip storage. */
880 if (int_attach->z_replicate) {
881 hw_subpass->input_access[i].type =
882 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE;
883 } else {
884 hw_subpass->input_access[i].type =
885 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP;
886 }
887
888 /* If this attachment is also a color attachment then point to the
889 * color attachment's resource.
890 */
891 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
892 if (input_subpass->color_attachments[j] == (int32_t)attach_idx) {
893 hw_subpass->input_access[i].on_chip_rt = j;
894 is_color = true;
895 break;
896 }
897 }
898
899 if (!is_color) {
900 const uint32_t num_rts = hw_subpass->setup.num_render_targets;
901
902 hw_subpass->input_access[i].on_chip_rt = num_rts;
903 hw_subpass->setup.num_render_targets++;
904
905 /* Record the location of the storage for the attachment. */
906 hw_subpass->setup.mrt_resources[num_rts] = int_attach->resource;
907 }
908 } else {
909 /* Access the input attachment from memory. */
910 hw_subpass->input_access[i].type =
911 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
912 hw_subpass->input_access[i].on_chip_rt = -1;
913 }
914 }
915
916 return VK_SUCCESS;
917
918 end_copy_storage_details:
919 if (hw_subpass->input_access) {
920 vk_free(ctx->allocator, hw_subpass->input_access);
921 hw_subpass->input_access = NULL;
922 }
923
924 if (hw_subpass->setup.mrt_resources) {
925 vk_free(ctx->allocator, hw_subpass->setup.mrt_resources);
926 hw_subpass->setup.mrt_resources = NULL;
927 }
928
929 return result;
930 }
931
932 /**
933 * For a subpass copy details of any storage location for a replicated version
934 * of the depth attachment to the output structure.
935 */
936 static VkResult
pvr_copy_z_replicate_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)937 pvr_copy_z_replicate_details(struct pvr_renderpass_context *ctx,
938 struct pvr_renderpass_hwsetup_subpass *hw_subpass,
939 struct pvr_renderpass_subpass *subpass)
940 {
941 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
942 struct pvr_render_int_attachment *int_ds_attach;
943 uint32_t z_replicate;
944 bool found = false;
945
946 assert(input_subpass->depth_stencil_attachment >= 0U &&
947 input_subpass->depth_stencil_attachment <
948 (int32_t)ctx->pass->attachment_count);
949
950 int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
951
952 assert(hw_subpass->z_replicate == -1);
953
954 /* Is the replicated depth also an input attachment? */
955 for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
956 const uint32_t attach_idx = input_subpass->input_attachments[i];
957 struct pvr_render_int_attachment *int_attach;
958
959 if (attach_idx == VK_ATTACHMENT_UNUSED)
960 continue;
961
962 int_attach = &ctx->int_attach[attach_idx];
963
964 if (int_attach == int_ds_attach) {
965 z_replicate = hw_subpass->input_access[i].on_chip_rt;
966 found = true;
967 break;
968 }
969 }
970
971 if (!found)
972 z_replicate = hw_subpass->setup.num_render_targets;
973
974 /* If the Z replicate attachment isn't also an input attachment then grow the
975 * array of locations.
976 */
977 assert(z_replicate <= hw_subpass->setup.num_render_targets);
978 if (z_replicate == hw_subpass->setup.num_render_targets) {
979 struct usc_mrt_resource *mrt =
980 vk_realloc(ctx->allocator,
981 hw_subpass->setup.mrt_resources,
982 sizeof(hw_subpass->setup.mrt_resources[0U]) *
983 (hw_subpass->setup.num_render_targets + 1U),
984 8U,
985 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
986 if (!mrt)
987 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
988
989 hw_subpass->setup.mrt_resources = mrt;
990 hw_subpass->setup.num_render_targets++;
991 }
992
993 /* Copy the location of the Z replicate. */
994 assert(int_ds_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
995 hw_subpass->setup.mrt_resources[z_replicate] = int_ds_attach->resource;
996 hw_subpass->z_replicate = z_replicate;
997
998 return VK_SUCCESS;
999 }
1000
pvr_dereference_surface(struct pvr_renderpass_context * ctx,int32_t attach_idx,uint32_t subpass_num)1001 static void pvr_dereference_surface(struct pvr_renderpass_context *ctx,
1002 int32_t attach_idx,
1003 uint32_t subpass_num)
1004 {
1005 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1006
1007 assert(int_attach->remaining_count > 0U);
1008 int_attach->remaining_count--;
1009
1010 if (int_attach->remaining_count == 0U) {
1011 if (int_attach->first_use != -1)
1012 int_attach->last_use = subpass_num;
1013
1014 if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID)
1015 pvr_free_surface_storage(ctx, int_attach);
1016 }
1017
1018 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1019 assert(int_attach->stencil_remaining_count > 0U);
1020 int_attach->stencil_remaining_count--;
1021 }
1022 }
1023
pvr_free_render(struct pvr_renderpass_context * ctx)1024 static void pvr_free_render(struct pvr_renderpass_context *ctx)
1025 {
1026 pvr_free_alloc(ctx, &ctx->alloc);
1027
1028 if (ctx->subpasses) {
1029 for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++)
1030 pvr_free_alloc(ctx, &ctx->subpasses[i].alloc);
1031
1032 vk_free(ctx->allocator, ctx->subpasses);
1033 ctx->subpasses = NULL;
1034 }
1035 }
1036
pvr_render_has_side_effects(struct pvr_renderpass_context * ctx)1037 static bool pvr_render_has_side_effects(struct pvr_renderpass_context *ctx)
1038 {
1039 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1040 struct pvr_render_pass *pass = ctx->pass;
1041
1042 if ((hw_render->depth_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1043 hw_render->depth_store) ||
1044 (hw_render->stencil_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1045 hw_render->stencil_store)) {
1046 return true;
1047 }
1048
1049 for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1050 const struct pvr_renderpass_hwsetup_eot_surface *eot_attach =
1051 &hw_render->eot_surfaces[i];
1052 const struct pvr_render_pass_attachment *attachment =
1053 &pass->attachments[eot_attach->attachment_idx];
1054
1055 if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1056 attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) {
1057 return true;
1058 }
1059
1060 if (eot_attach->need_resolve)
1061 return true;
1062 }
1063
1064 return false;
1065 }
1066
pvr_close_render(const struct pvr_device * device,struct pvr_renderpass_context * ctx)1067 static VkResult pvr_close_render(const struct pvr_device *device,
1068 struct pvr_renderpass_context *ctx)
1069 {
1070 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1071 struct pvr_renderpass_hwsetup_eot_surface *eot_attach;
1072 struct usc_mrt_setup *eot_setup;
1073 int32_t mrt_idx;
1074 VkResult result;
1075
1076 /* Render already closed. */
1077 if (!hw_render)
1078 return VK_SUCCESS;
1079
1080 /* Setup render and allocate resources for color/depth loads and clears. */
1081 result = pvr_subpass_setup_render_init(ctx);
1082 if (result != VK_SUCCESS)
1083 return result;
1084
1085 /* Reset surfaces whose last use was in the current render. */
1086 list_for_each_entry_safe (struct pvr_render_int_attachment,
1087 int_attach,
1088 &ctx->active_surf_list,
1089 link) {
1090 if (int_attach->last_use != -1) {
1091 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1092 pvr_reset_surface(ctx, int_attach);
1093 }
1094 }
1095
1096 /* Check if the depth attachment has uses in future subpasses. */
1097 if (ctx->int_ds_attach) {
1098 /* Store the depth to the attachment at the end of the render. */
1099 if (ctx->int_ds_attach->remaining_count > 0U)
1100 hw_render->depth_store = true;
1101
1102 /* Store the stencil to the attachment at the end of the render. */
1103 if (ctx->int_ds_attach->stencil_remaining_count > 0U)
1104 hw_render->stencil_store = true;
1105
1106 if (hw_render->depth_store || hw_render->stencil_store) {
1107 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
1108 hw_render->ds_attach_idx ==
1109 ctx->int_ds_attach->attachment->index);
1110 hw_render->ds_attach_idx = ctx->int_ds_attach->attachment->index;
1111
1112 /* Allocate memory for the attachment. */
1113 pvr_mark_surface_alloc(ctx, ctx->int_ds_attach);
1114 }
1115
1116 /* Load the depth and stencil before the next use. */
1117 ctx->int_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1118 ctx->int_ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1119 }
1120
1121 eot_setup = &hw_render->eot_setup;
1122 memset(eot_setup, 0U, sizeof(*eot_setup));
1123
1124 /* Set the number of pixel output registers/tile buffers allocated for the
1125 * render and copy the information to all subpasses and the EOT program.
1126 */
1127 pvr_finalise_po_alloc(device, ctx);
1128
1129 /* If any attachment are used with z replicate then they will be stored to by
1130 * the ISP. So remove them from the list to store to using the PBE.
1131 */
1132 list_for_each_entry_safe (struct pvr_render_int_attachment,
1133 int_attach,
1134 &ctx->active_surf_list,
1135 link) {
1136 if (int_attach->z_replicate)
1137 pvr_reset_surface(ctx, int_attach);
1138 }
1139
1140 /* Number of surfaces with allocated on-chip storage. */
1141 eot_setup->num_render_targets = ctx->active_surfaces;
1142 eot_setup->mrt_resources = vk_alloc(ctx->allocator,
1143 sizeof(eot_setup->mrt_resources[0U]) *
1144 eot_setup->num_render_targets,
1145 8,
1146 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1147 if (!eot_setup->mrt_resources)
1148 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1149
1150 /* Record the location of the on-chip storage. */
1151 mrt_idx = 0U;
1152 list_for_each_entry_safe (struct pvr_render_int_attachment,
1153 int_attach,
1154 &ctx->active_surf_list,
1155 link) {
1156 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1157 assert(int_attach->remaining_count > 0U);
1158 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1159 assert(int_attach->stencil_remaining_count > 0U);
1160
1161 /* Copy the location of the source data for this attachment. */
1162 eot_setup->mrt_resources[mrt_idx] = int_attach->resource;
1163
1164 assert(int_attach->mrt_idx == -1);
1165 int_attach->mrt_idx = mrt_idx;
1166
1167 mrt_idx++;
1168 }
1169 assert(mrt_idx == (int32_t)eot_setup->num_render_targets);
1170
1171 hw_render->eot_surface_count = 0U;
1172 hw_render->pbe_emits = 0U;
1173
1174 /* Count the number of surfaces to store to at the end of the subpass. */
1175 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1176 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1177 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1178
1179 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1180 const uint32_t resolve_output =
1181 input_subpass->resolve_attachments
1182 ? input_subpass->resolve_attachments[j]
1183 : VK_ATTACHMENT_UNUSED;
1184 struct pvr_render_int_attachment *color_attach;
1185
1186 if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1187 continue;
1188
1189 color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1190
1191 if (list_is_linked(&color_attach->link)) {
1192 uint32_t rem_count = resolve_output == VK_ATTACHMENT_UNUSED ? 0U
1193 : 1U;
1194
1195 /* If a color attachment is resolved it will have an extra
1196 * remaining usage.
1197 */
1198 if (color_attach->remaining_count > rem_count &&
1199 !color_attach->eot_surf_required) {
1200 color_attach->eot_surf_required = true;
1201 hw_render->eot_surface_count++;
1202 }
1203 }
1204
1205 if (resolve_output != VK_ATTACHMENT_UNUSED) {
1206 struct pvr_render_int_attachment *int_resolve_attach =
1207 &ctx->int_attach[resolve_output];
1208
1209 if (!int_resolve_attach->eot_surf_required) {
1210 int_resolve_attach->eot_surf_required = true;
1211 hw_render->eot_surface_count++;
1212 }
1213 }
1214 }
1215 }
1216
1217 assert(hw_render->eot_surface_count <= 16U);
1218
1219 hw_render->eot_surfaces = vk_alloc(ctx->allocator,
1220 sizeof(hw_render->eot_surfaces[0U]) *
1221 hw_render->eot_surface_count,
1222 8,
1223 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1224 if (!hw_render->eot_surfaces)
1225 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1226
1227 eot_attach = hw_render->eot_surfaces;
1228
1229 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1230 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1231 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1232
1233 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1234 const uint32_t resolve_output =
1235 input_subpass->resolve_attachments
1236 ? input_subpass->resolve_attachments[j]
1237 : VK_ATTACHMENT_UNUSED;
1238 struct pvr_render_int_attachment *color_attach;
1239
1240 if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1241 continue;
1242
1243 color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1244
1245 if (resolve_output != VK_ATTACHMENT_UNUSED) {
1246 struct pvr_render_int_attachment *resolve_src =
1247 &ctx->int_attach[input_subpass->color_attachments[j]];
1248 struct pvr_render_int_attachment *resolve_dst =
1249 &ctx->int_attach[resolve_output];
1250
1251 assert(resolve_dst->eot_surf_required);
1252 resolve_dst->eot_surf_required = false;
1253
1254 /* Dereference the source to the resolve. */
1255 assert(resolve_src->remaining_count > 0U);
1256 resolve_src->remaining_count--;
1257
1258 /* Allocate device memory for the resolve destination. */
1259 pvr_mark_surface_alloc(ctx, resolve_dst);
1260
1261 /* The attachment has been written so load the attachment the
1262 * next time it is referenced.
1263 */
1264 resolve_dst->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1265
1266 eot_attach->mrt_idx = resolve_src->mrt_idx;
1267 eot_attach->attachment_idx = resolve_dst->attachment->index;
1268 eot_attach->src_attachment_idx = resolve_src->attachment->index;
1269
1270 eot_attach->need_resolve = true;
1271
1272 if (!resolve_src->is_pbe_downscalable) {
1273 /* Resolve src must be stored for transfer resolve. */
1274 assert(resolve_src->remaining_count > 0U);
1275
1276 eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1277 } else if (resolve_src->remaining_count == 0U) {
1278 eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1279 hw_render->pbe_emits++;
1280 } else {
1281 eot_attach->resolve_type = PVR_RESOLVE_TYPE_INVALID;
1282 }
1283
1284 eot_attach++;
1285 }
1286
1287 if (color_attach->eot_surf_required) {
1288 assert(color_attach->remaining_count > 0U);
1289
1290 pvr_mark_surface_alloc(ctx, color_attach);
1291
1292 assert(color_attach->mrt_idx >= 0);
1293 assert(color_attach->mrt_idx <
1294 (int32_t)hw_render->eot_setup.num_render_targets);
1295
1296 eot_attach->mrt_idx = color_attach->mrt_idx;
1297 eot_attach->attachment_idx = color_attach->attachment->index;
1298 eot_attach->need_resolve = false;
1299 eot_attach++;
1300
1301 hw_render->pbe_emits++;
1302
1303 color_attach->eot_surf_required = false;
1304 }
1305 }
1306 }
1307
1308 assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1309
1310 /* Count the number of extra resolves we can do through the PBE. */
1311 for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1312 eot_attach = &hw_render->eot_surfaces[i];
1313
1314 if (eot_attach->need_resolve &&
1315 eot_attach->resolve_type == PVR_RESOLVE_TYPE_INVALID) {
1316 if (hw_render->pbe_emits == PVR_NUM_PBE_EMIT_REGS) {
1317 eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1318 } else {
1319 eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1320 hw_render->pbe_emits++;
1321 }
1322 }
1323 }
1324
1325 assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1326
1327 /* Check for side effects in the final render. */
1328 hw_render->has_side_effects = pvr_render_has_side_effects(ctx);
1329
1330 /* Reset active surfaces. */
1331 list_for_each_entry_safe (struct pvr_render_int_attachment,
1332 int_attach,
1333 &ctx->active_surf_list,
1334 link) {
1335 int_attach->mrt_idx = -1;
1336 pvr_reset_surface(ctx, int_attach);
1337 }
1338
1339 assert(ctx->active_surfaces == 0U);
1340 assert(list_is_empty(&ctx->active_surf_list));
1341
1342 pvr_free_render(ctx);
1343 pvr_reset_render(ctx);
1344
1345 return VK_SUCCESS;
1346 }
1347
pvr_is_input(struct pvr_render_subpass * subpass,uint32_t attach_idx)1348 static bool pvr_is_input(struct pvr_render_subpass *subpass,
1349 uint32_t attach_idx)
1350 {
1351 if (attach_idx == VK_ATTACHMENT_UNUSED)
1352 return false;
1353
1354 for (uint32_t i = 0U; i < subpass->input_count; i++) {
1355 if (subpass->input_attachments[i] == attach_idx)
1356 return true;
1357 }
1358
1359 return false;
1360 }
1361
1362 static bool
pvr_depth_zls_conflict(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_ds_attach,bool existing_ds_is_input)1363 pvr_depth_zls_conflict(struct pvr_renderpass_context *ctx,
1364 struct pvr_render_int_attachment *int_ds_attach,
1365 bool existing_ds_is_input)
1366 {
1367 if (!ctx->int_ds_attach)
1368 return false;
1369
1370 /* No conflict if the incoming subpass doesn't have a depth/stencil
1371 * attachment.
1372 */
1373 if (!int_ds_attach)
1374 return false;
1375
1376 /* No conflict if the incoming depth/stencil attachment is the same as the
1377 * existing one.
1378 */
1379 if (ctx->int_ds_attach == int_ds_attach)
1380 return false;
1381
1382 /* If the existing depth/stencil attachment is used later, then we can't
1383 * overwrite it.
1384 *
1385 * The exception is if the only use is as an input attachment in the incoming
1386 * subpass in which case we can use the Z replicate feature to save the
1387 * value.
1388 */
1389 if (ctx->int_ds_attach->remaining_count > 0U &&
1390 !(existing_ds_is_input && ctx->int_ds_attach->remaining_count == 1U)) {
1391 return true;
1392 }
1393
1394 if (ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1395 ctx->int_ds_attach->stencil_remaining_count > 0U) {
1396 return true;
1397 }
1398
1399 /* We can't load midrender so fail if the new depth/stencil attachment is
1400 * already initialized.
1401 */
1402 if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
1403 return true;
1404
1405 if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1406 int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
1407 return true;
1408 }
1409
1410 return false;
1411 }
1412
1413 static void
pvr_set_surface_resource(struct pvr_render_int_attachment * int_attach,struct pvr_renderpass_resource * resource)1414 pvr_set_surface_resource(struct pvr_render_int_attachment *int_attach,
1415 struct pvr_renderpass_resource *resource)
1416 {
1417 int_attach->resource.type = resource->type;
1418
1419 switch (resource->type) {
1420 case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1421 int_attach->resource.reg.output_reg = resource->reg.output_reg;
1422 int_attach->resource.reg.offset = resource->reg.offset;
1423 break;
1424
1425 case USC_MRT_RESOURCE_TYPE_MEMORY:
1426 int_attach->resource.mem.tile_buffer = resource->mem.tile_buffer;
1427 int_attach->resource.mem.offset_dw = resource->mem.offset_dw;
1428 break;
1429
1430 default:
1431 break;
1432 }
1433 }
1434
pvr_equal_resources(struct pvr_renderpass_resource * resource1,struct pvr_renderpass_resource * resource2)1435 static bool pvr_equal_resources(struct pvr_renderpass_resource *resource1,
1436 struct pvr_renderpass_resource *resource2)
1437 {
1438 if (resource1->type != resource2->type)
1439 return false;
1440
1441 switch (resource1->type) {
1442 case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1443 return resource1->reg.output_reg == resource2->reg.output_reg &&
1444 resource1->reg.offset == resource2->reg.offset;
1445
1446 case USC_MRT_RESOURCE_TYPE_MEMORY:
1447 return resource1->mem.tile_buffer == resource2->mem.tile_buffer &&
1448 resource1->mem.offset_dw == resource2->mem.offset_dw;
1449
1450 default:
1451 return true;
1452 }
1453 }
1454
1455 static VkResult
pvr_enable_z_replicate(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_render * hw_render,int32_t replicate_attach_idx,struct pvr_renderpass_resource * replicate_dst)1456 pvr_enable_z_replicate(struct pvr_renderpass_context *ctx,
1457 struct pvr_renderpass_hwsetup_render *hw_render,
1458 int32_t replicate_attach_idx,
1459 struct pvr_renderpass_resource *replicate_dst)
1460 {
1461 struct pvr_render_int_attachment *int_attach =
1462 &ctx->int_attach[replicate_attach_idx];
1463 int32_t first_use = -1;
1464
1465 /* If Z replication was already enabled for the attachment then nothing more
1466 * to do.
1467 */
1468 if (!int_attach->z_replicate) {
1469 /* Copy details of the storage for the replicated value to the attachment.
1470 */
1471 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1472 assert(replicate_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
1473 pvr_set_surface_resource(int_attach, replicate_dst);
1474 } else {
1475 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1476 assert(replicate_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
1477 }
1478
1479 /* Find the first subpass where the attachment is written. */
1480 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1481 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1482 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1483
1484 if (input_subpass->depth_stencil_attachment == replicate_attach_idx) {
1485 first_use = i;
1486 break;
1487 }
1488 }
1489 assert(first_use >= 0);
1490
1491 /* For all subpasses from the first write. */
1492 for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1493 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1494 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1495
1496 /* If the subpass writes to the attachment then enable z replication. */
1497 if (input_subpass->depth_stencil_attachment == replicate_attach_idx &&
1498 !subpass->z_replicate) {
1499 subpass->z_replicate = true;
1500
1501 if (i != (hw_render->subpass_count - 1U)) {
1502 /* Copy the details of the storage for replicated value. */
1503 const VkResult result =
1504 pvr_copy_z_replicate_details(ctx,
1505 &ctx->hw_render->subpasses[i],
1506 subpass);
1507 if (result != VK_SUCCESS)
1508 return result;
1509 }
1510 }
1511 }
1512
1513 if (!int_attach->z_replicate) {
1514 /* Add the storage for the replicated value to locations in use at each
1515 * subpass.
1516 */
1517 for (uint32_t i = first_use; i < (hw_render->subpass_count - 1U); i++) {
1518 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1519
1520 pvr_mark_storage_allocated(ctx,
1521 &subpass->alloc,
1522 int_attach->attachment,
1523 replicate_dst);
1524 }
1525
1526 /* Add the depth attachment to the list of surfaces with allocated
1527 * storage.
1528 */
1529 pvr_make_surface_active(ctx, int_attach, first_use);
1530
1531 int_attach->z_replicate = true;
1532 }
1533
1534 return VK_SUCCESS;
1535 }
1536
pvr_is_pending_resolve_dest(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1537 static bool pvr_is_pending_resolve_dest(struct pvr_renderpass_context *ctx,
1538 uint32_t attach_idx)
1539 {
1540 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1541
1542 return int_attach->last_resolve_dst_render != -1 &&
1543 int_attach->last_resolve_dst_render ==
1544 (int32_t)(ctx->hw_setup->render_count - 1U);
1545 }
1546
pvr_is_pending_resolve_src(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1547 static bool pvr_is_pending_resolve_src(struct pvr_renderpass_context *ctx,
1548 uint32_t attach_idx)
1549 {
1550 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1551
1552 return int_attach->last_resolve_src_render != -1 &&
1553 int_attach->last_resolve_src_render ==
1554 (int32_t)(ctx->hw_setup->render_count - 1U);
1555 }
1556
pvr_exceeds_pbe_registers(struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass)1557 static bool pvr_exceeds_pbe_registers(struct pvr_renderpass_context *ctx,
1558 struct pvr_render_subpass *subpass)
1559 {
1560 int32_t live_outputs[PVR_NUM_PBE_EMIT_REGS];
1561 uint32_t num_live_outputs = 0U;
1562
1563 /* Count all color outputs so far. */
1564 for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++) {
1565 struct pvr_render_subpass *input_subpass =
1566 ctx->subpasses[i].input_subpass;
1567
1568 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1569 const uint32_t global_color_attach =
1570 input_subpass->color_attachments[j];
1571 struct pvr_render_int_attachment *int_attach;
1572 bool found = false;
1573
1574 if (global_color_attach == VK_ATTACHMENT_UNUSED)
1575 continue;
1576
1577 int_attach = &ctx->int_attach[global_color_attach];
1578
1579 if (int_attach->last_read <= (int32_t)subpass->index)
1580 continue;
1581
1582 for (uint32_t k = 0U; k < num_live_outputs; k++) {
1583 if (live_outputs[k] == global_color_attach) {
1584 found = true;
1585 break;
1586 }
1587 }
1588
1589 if (!found)
1590 live_outputs[num_live_outputs++] = global_color_attach;
1591 }
1592 }
1593
1594 assert(num_live_outputs <= PVR_NUM_PBE_EMIT_REGS);
1595
1596 /* Check if adding all the color outputs of the new subpass to the render
1597 * would exceed the limit.
1598 */
1599 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1600 const uint32_t global_color_attach = subpass->color_attachments[i];
1601 struct pvr_render_int_attachment *int_attach;
1602 bool found = false;
1603
1604 if (global_color_attach == VK_ATTACHMENT_UNUSED)
1605 continue;
1606
1607 int_attach = &ctx->int_attach[global_color_attach];
1608
1609 if (int_attach->last_read <= (int32_t)subpass->index)
1610 continue;
1611
1612 for (uint32_t j = 0U; j < num_live_outputs; j++) {
1613 if (live_outputs[j] == global_color_attach) {
1614 found = true;
1615 break;
1616 }
1617 }
1618
1619 if (!found) {
1620 if (num_live_outputs >= PVR_NUM_PBE_EMIT_REGS)
1621 return true;
1622
1623 live_outputs[num_live_outputs++] = global_color_attach;
1624 }
1625 }
1626
1627 return false;
1628 }
1629
pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer * dst,struct pvr_renderpass_alloc_buffer * src)1630 static void pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer *dst,
1631 struct pvr_renderpass_alloc_buffer *src)
1632 {
1633 for (uint32_t i = 0U; i < ARRAY_SIZE(dst->allocs); i++)
1634 dst->allocs[i] |= src->allocs[i];
1635 }
1636
pvr_merge_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)1637 static VkResult pvr_merge_alloc(struct pvr_renderpass_context *ctx,
1638 struct pvr_renderpass_alloc *dst,
1639 struct pvr_renderpass_alloc *src)
1640 {
1641 pvr_merge_alloc_buffer(&dst->output_reg, &src->output_reg);
1642
1643 dst->output_regs_count =
1644 MAX2(dst->output_regs_count, src->output_regs_count);
1645
1646 if (dst->tile_buffers_count < src->tile_buffers_count) {
1647 struct pvr_renderpass_alloc_buffer *new_tile_buffers =
1648 vk_realloc(ctx->allocator,
1649 dst->tile_buffers,
1650 sizeof(dst->tile_buffers[0U]) * src->tile_buffers_count,
1651 8U,
1652 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1653 if (!new_tile_buffers)
1654 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1655
1656 dst->tile_buffers = new_tile_buffers;
1657 memset(dst->tile_buffers + dst->tile_buffers_count,
1658 0U,
1659 sizeof(dst->tile_buffers[0U]) *
1660 (src->tile_buffers_count - dst->tile_buffers_count));
1661 dst->tile_buffers_count = src->tile_buffers_count;
1662 }
1663
1664 for (uint32_t i = 0U; i < src->tile_buffers_count; i++)
1665 pvr_merge_alloc_buffer(&dst->tile_buffers[i], &src->tile_buffers[i]);
1666
1667 return VK_SUCCESS;
1668 }
1669
1670 static VkResult
pvr_is_z_replicate_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,uint32_t attach_idx,struct pvr_renderpass_resource * resource)1671 pvr_is_z_replicate_space_available(const struct pvr_device_info *dev_info,
1672 struct pvr_renderpass_context *ctx,
1673 struct pvr_renderpass_alloc *alloc,
1674 uint32_t attach_idx,
1675 struct pvr_renderpass_resource *resource)
1676 {
1677 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1678 struct pvr_render_int_attachment *int_attach;
1679 struct pvr_renderpass_alloc combined_alloc;
1680 uint32_t first_use;
1681 VkResult result;
1682
1683 /* If z replication was already enabled by a previous subpass then storage
1684 * will already be allocated.
1685 */
1686 assert(attach_idx < ctx->pass->attachment_count);
1687
1688 int_attach = &ctx->int_attach[attach_idx];
1689 if (int_attach->z_replicate) {
1690 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1691 return VK_SUCCESS;
1692 }
1693
1694 /* Find the subpass where the depth is first written. */
1695 if (hw_render) {
1696 first_use = hw_render->subpass_count;
1697 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1698 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1699 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1700
1701 if (input_subpass->depth_stencil_attachment == (int32_t)attach_idx) {
1702 first_use = i;
1703 break;
1704 }
1705 }
1706 }
1707
1708 /* Get the registers used in any subpass after the depth is first written.
1709 * Start with registers used in the incoming subpass.
1710 */
1711 result = pvr_copy_alloc(ctx, &combined_alloc, alloc);
1712 if (result != VK_SUCCESS)
1713 return result;
1714
1715 if (hw_render) {
1716 /* Merge in registers used in previous subpasses. */
1717 for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1718 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1719
1720 result = pvr_merge_alloc(ctx, &combined_alloc, &subpass->alloc);
1721 if (result != VK_SUCCESS) {
1722 pvr_free_alloc(ctx, &combined_alloc);
1723 return result;
1724 }
1725 }
1726 }
1727
1728 result = pvr_surface_alloc_color_storage(dev_info,
1729 ctx,
1730 &combined_alloc,
1731 int_attach->attachment,
1732 resource);
1733
1734 pvr_free_alloc(ctx, &combined_alloc);
1735 if (result != VK_SUCCESS)
1736 return result;
1737
1738 return pvr_mark_storage_allocated(ctx,
1739 alloc,
1740 int_attach->attachment,
1741 resource);
1742 }
1743
1744 static VkResult
pvr_is_subpass_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_renderpass_alloc * alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1745 pvr_is_subpass_space_available(const struct pvr_device_info *dev_info,
1746 struct pvr_renderpass_context *ctx,
1747 struct pvr_render_subpass *subpass,
1748 struct pvr_render_subpass_depth_params *sp_depth,
1749 struct pvr_renderpass_alloc *alloc,
1750 struct pvr_render_int_subpass_dsts *sp_dsts)
1751 {
1752 VkResult result;
1753
1754 /* Mark pointers in return structures as not allocated. */
1755 sp_dsts->color = NULL;
1756 alloc->tile_buffers = NULL;
1757
1758 /* Allocate space for which locations are in use after this subpass. */
1759 result = pvr_copy_alloc(ctx, alloc, &ctx->alloc);
1760 if (result != VK_SUCCESS)
1761 return result;
1762
1763 /* Allocate space to store our results. */
1764 if (subpass->color_count > 0U) {
1765 sp_dsts->color =
1766 vk_alloc(ctx->allocator,
1767 sizeof(sp_dsts->color[0U]) * subpass->color_count,
1768 8,
1769 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1770 if (!sp_dsts->color) {
1771 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1772 goto err_free_alloc;
1773 }
1774 } else {
1775 sp_dsts->color = NULL;
1776 }
1777
1778 sp_dsts->existing_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1779 sp_dsts->incoming_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1780
1781 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1782 const uint32_t attach_idx = subpass->color_attachments[i];
1783 struct pvr_render_int_attachment *int_attach;
1784
1785 if (attach_idx == VK_ATTACHMENT_UNUSED)
1786 continue;
1787
1788 int_attach = &ctx->int_attach[attach_idx];
1789
1790 assert(vk_format_get_blocksizebits(int_attach->attachment->vk_format) >
1791 0U);
1792
1793 /* Is the attachment not allocated on-chip storage? */
1794 if (int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID) {
1795 result = pvr_surface_alloc_color_storage(dev_info,
1796 ctx,
1797 alloc,
1798 int_attach->attachment,
1799 &sp_dsts->color[i]);
1800 if (result != VK_SUCCESS)
1801 goto err_free_alloc;
1802
1803 /* Avoid merging subpasses which result in tile buffers having to be
1804 * used. The benefit of merging must be weighed against the cost of
1805 * writing/reading to tile buffers.
1806 */
1807 if (ctx->hw_render &&
1808 sp_dsts->color[i].type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
1809 result = vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
1810 goto err_free_alloc;
1811 }
1812 } else {
1813 sp_dsts->color[i].type = USC_MRT_RESOURCE_TYPE_INVALID;
1814 }
1815 }
1816
1817 if (sp_depth->existing_ds_is_input) {
1818 result = pvr_is_z_replicate_space_available(dev_info,
1819 ctx,
1820 alloc,
1821 sp_depth->existing_ds_attach,
1822 &sp_dsts->existing_zrep);
1823 if (result != VK_SUCCESS)
1824 goto err_free_alloc;
1825 }
1826
1827 if (sp_depth->incoming_ds_is_input) {
1828 if (sp_depth->existing_ds_attach != subpass->depth_stencil_attachment) {
1829 result = pvr_is_z_replicate_space_available(
1830 dev_info,
1831 ctx,
1832 alloc,
1833 subpass->depth_stencil_attachment,
1834 &sp_dsts->incoming_zrep);
1835 if (result != VK_SUCCESS)
1836 goto err_free_alloc;
1837 } else {
1838 sp_dsts->incoming_zrep = sp_dsts->existing_zrep;
1839 }
1840 }
1841
1842 return VK_SUCCESS;
1843
1844 err_free_alloc:
1845 pvr_free_alloc(ctx, alloc);
1846 if (sp_dsts->color)
1847 vk_free(ctx->allocator, sp_dsts->color);
1848
1849 sp_dsts->color = NULL;
1850
1851 return result;
1852 }
1853
1854 static bool
pvr_can_combine_with_render(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_render_int_attachment * int_ds_attach,struct pvr_renderpass_alloc * new_alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1855 pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
1856 struct pvr_renderpass_context *ctx,
1857 struct pvr_render_subpass *subpass,
1858 struct pvr_render_subpass_depth_params *sp_depth,
1859 struct pvr_render_int_attachment *int_ds_attach,
1860 struct pvr_renderpass_alloc *new_alloc,
1861 struct pvr_render_int_subpass_dsts *sp_dsts)
1862 {
1863 VkResult result;
1864 bool ret;
1865
1866 /* Mark pointers in return structures as not allocated. */
1867 sp_dsts->color = NULL;
1868 new_alloc->tile_buffers = NULL;
1869
1870 /* The hardware doesn't support replicating the stencil, so we need to store
1871 * the depth to memory if a stencil attachment is used as an input
1872 * attachment.
1873 */
1874 if (sp_depth->existing_ds_is_input &&
1875 ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1876 return false;
1877 }
1878
1879 if (sp_depth->incoming_ds_is_input && int_ds_attach &&
1880 int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1881 ctx->hw_render) {
1882 return false;
1883 }
1884
1885 /* Can't mix multiple sample counts into same render. */
1886 if (ctx->hw_render &&
1887 ctx->hw_render->sample_count != subpass->sample_count) {
1888 return false;
1889 }
1890
1891 /* If the depth is used by both the render and the incoming subpass and
1892 * either the existing depth must be saved or the new depth must be loaded
1893 * then we can't merge.
1894 */
1895 ret = pvr_depth_zls_conflict(ctx,
1896 int_ds_attach,
1897 sp_depth->existing_ds_is_input);
1898 if (ret)
1899 return false;
1900
1901 /* Check if any of the subpass's dependencies are marked that the two
1902 * subpasses can't be in the same render.
1903 */
1904 for (uint32_t i = 0U; i < subpass->dep_count; i++) {
1905 const uint32_t dep = subpass->dep_list[i];
1906 if (subpass->flush_on_dep[i] && ctx->hw_setup->subpass_map[dep].render ==
1907 (ctx->hw_setup->render_count - 1U)) {
1908 return false;
1909 }
1910 }
1911
1912 /* Check if one of the input/color attachments is written by an MSAA resolve
1913 * in an existing subpass in the current render.
1914 */
1915 for (uint32_t i = 0U; i < subpass->input_count; i++) {
1916 const uint32_t attach_idx = subpass->input_attachments[i];
1917 if (attach_idx != VK_ATTACHMENT_UNUSED &&
1918 pvr_is_pending_resolve_dest(ctx, attach_idx)) {
1919 return false;
1920 }
1921 }
1922
1923 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1924 if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED &&
1925 (pvr_is_pending_resolve_dest(ctx, subpass->color_attachments[i]) ||
1926 pvr_is_pending_resolve_src(ctx, subpass->color_attachments[i]))) {
1927 return false;
1928 }
1929
1930 if (subpass->resolve_attachments &&
1931 subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED &&
1932 pvr_is_pending_resolve_dest(ctx, subpass->resolve_attachments[i])) {
1933 return false;
1934 }
1935 }
1936
1937 /* No chance of exceeding PBE registers in a single subpass. */
1938 if (ctx->hw_render) {
1939 ret = pvr_exceeds_pbe_registers(ctx, subpass);
1940 if (ret)
1941 return false;
1942 }
1943
1944 /* Check we can allocate storage for the new subpass's color attachments and
1945 * any z replications.
1946 */
1947 result = pvr_is_subpass_space_available(dev_info,
1948 ctx,
1949 subpass,
1950 sp_depth,
1951 new_alloc,
1952 sp_dsts);
1953 if (result != VK_SUCCESS)
1954 return false;
1955
1956 return true;
1957 }
1958
1959 static VkResult
pvr_merge_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * input_subpass,struct pvr_renderpass_hwsetup_subpass ** const hw_subpass_out)1960 pvr_merge_subpass(const struct pvr_device *device,
1961 struct pvr_renderpass_context *ctx,
1962 struct pvr_render_subpass *input_subpass,
1963 struct pvr_renderpass_hwsetup_subpass **const hw_subpass_out)
1964 {
1965 struct pvr_renderpass_hwsetup_subpass *new_hw_subpasses;
1966 struct pvr_renderpass_hwsetup_subpass *hw_subpass;
1967 struct pvr_render_int_attachment *int_ds_attach;
1968 struct pvr_renderpass_hwsetup_render *hw_render;
1969 struct pvr_render_subpass_depth_params sp_depth;
1970 struct pvr_renderpass_subpass *new_subpasses;
1971 struct pvr_render_int_subpass_dsts sp_dsts;
1972 struct pvr_renderpass_subpass *subpass;
1973 struct pvr_renderpass_alloc alloc;
1974 VkResult result;
1975 bool ret;
1976
1977 /* Depth attachment for the incoming subpass. */
1978 if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
1979 int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
1980 else
1981 int_ds_attach = NULL;
1982
1983 /* Attachment ID for the existing depth attachment. */
1984 if (ctx->int_ds_attach)
1985 sp_depth.existing_ds_attach = ctx->int_ds_attach - ctx->int_attach;
1986 else
1987 sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
1988
1989 /* Is the incoming depth attachment used as an input to the incoming subpass?
1990 */
1991 sp_depth.incoming_ds_is_input =
1992 pvr_is_input(input_subpass, input_subpass->depth_stencil_attachment);
1993
1994 /* Is the current depth attachment used as an input to the incoming subpass?
1995 */
1996 sp_depth.existing_ds_is_input =
1997 pvr_is_input(input_subpass, sp_depth.existing_ds_attach);
1998
1999 /* Can the incoming subpass be combined with the existing render? Also checks
2000 * if space is available for the subpass results and return the allocated
2001 * locations.
2002 */
2003 ret = pvr_can_combine_with_render(&device->pdevice->dev_info,
2004 ctx,
2005 input_subpass,
2006 &sp_depth,
2007 int_ds_attach,
2008 &alloc,
2009 &sp_dsts);
2010 if (!ret) {
2011 result = pvr_close_render(device, ctx);
2012 if (result != VK_SUCCESS)
2013 goto end_merge_subpass;
2014
2015 sp_depth.existing_ds_is_input = false;
2016 sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
2017
2018 /* Allocate again in a new render. */
2019 result = pvr_is_subpass_space_available(&device->pdevice->dev_info,
2020 ctx,
2021 input_subpass,
2022 &sp_depth,
2023 &alloc,
2024 &sp_dsts);
2025 assert(result != VK_ERROR_TOO_MANY_OBJECTS);
2026 if (result != VK_SUCCESS)
2027 goto end_merge_subpass;
2028 }
2029
2030 /* If there isn't an in-progress render then allocate one. */
2031 if (!ctx->hw_render) {
2032 struct pvr_renderpass_hwsetup *hw_setup = ctx->hw_setup;
2033 struct pvr_renderpass_hwsetup_render *new_hw_render = vk_realloc(
2034 ctx->allocator,
2035 hw_setup->renders,
2036 sizeof(hw_setup->renders[0U]) * (hw_setup->render_count + 1U),
2037 8U,
2038 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2039 if (!new_hw_render) {
2040 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2041 goto end_merge_subpass;
2042 }
2043
2044 hw_setup->renders = new_hw_render;
2045
2046 ctx->hw_render = &hw_setup->renders[hw_setup->render_count];
2047 memset(ctx->hw_render, 0U, sizeof(*hw_render));
2048 ctx->hw_render->ds_attach_idx = VK_ATTACHMENT_UNUSED;
2049 hw_setup->render_count++;
2050 ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2051 ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2052 ctx->hw_render->sample_count = input_subpass->sample_count;
2053 }
2054
2055 /* Allocate a new subpass in the in-progress render. */
2056 hw_render = ctx->hw_render;
2057
2058 new_hw_subpasses = vk_realloc(ctx->allocator,
2059 hw_render->subpasses,
2060 sizeof(hw_render->subpasses[0U]) *
2061 (hw_render->subpass_count + 1U),
2062 8U,
2063 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2064 if (!new_hw_subpasses) {
2065 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2066 goto end_merge_subpass;
2067 }
2068
2069 hw_render->subpasses = new_hw_subpasses;
2070 hw_subpass = &hw_render->subpasses[hw_render->subpass_count];
2071
2072 new_subpasses =
2073 vk_realloc(ctx->allocator,
2074 ctx->subpasses,
2075 sizeof(ctx->subpasses[0U]) * (hw_render->subpass_count + 1U),
2076 8U,
2077 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2078 if (!new_subpasses) {
2079 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2080 goto end_merge_subpass;
2081 }
2082
2083 ctx->subpasses = new_subpasses;
2084
2085 subpass = &ctx->subpasses[hw_render->subpass_count];
2086 subpass->input_subpass = input_subpass;
2087 subpass->z_replicate = false;
2088
2089 /* Save the allocation state at the subpass. */
2090 result = pvr_copy_alloc(ctx, &subpass->alloc, &alloc);
2091 if (result != VK_SUCCESS)
2092 goto end_merge_subpass;
2093
2094 hw_render->subpass_count++;
2095
2096 memset(hw_subpass, 0U, sizeof(*hw_subpass));
2097 hw_subpass->index = input_subpass->index;
2098 hw_subpass->z_replicate = -1;
2099 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2100
2101 if (int_ds_attach && ctx->int_ds_attach != int_ds_attach) {
2102 bool setup_render_ds = false;
2103 bool stencil_load = false;
2104 bool depth_load = false;
2105
2106 if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2107 depth_load = true;
2108 setup_render_ds = true;
2109 hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2110 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_LOAD;
2111
2112 assert(!ctx->ds_load_surface);
2113 ctx->ds_load_surface = int_ds_attach;
2114 } else if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
2115 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_CLEAR;
2116 }
2117
2118 if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2119 if (int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2120 stencil_load = true;
2121 setup_render_ds = true;
2122 hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2123 } else if (int_ds_attach->stencil_load_op ==
2124 VK_ATTACHMENT_LOAD_OP_CLEAR) {
2125 hw_subpass->stencil_clear = true;
2126 }
2127 }
2128
2129 /* If the depth is loaded then allocate external memory for the depth
2130 * attachment.
2131 */
2132 if (depth_load || stencil_load)
2133 pvr_mark_surface_alloc(ctx, int_ds_attach);
2134
2135 if (setup_render_ds) {
2136 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED);
2137 hw_render->ds_attach_idx = int_ds_attach->attachment->index;
2138 }
2139
2140 ctx->int_ds_attach = int_ds_attach;
2141 }
2142
2143 /* Set up the initialization operations for subpasses. */
2144 hw_subpass->color_initops = vk_alloc(ctx->allocator,
2145 sizeof(hw_subpass->color_initops[0U]) *
2146 input_subpass->color_count,
2147 8,
2148 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2149 if (!hw_subpass->color_initops) {
2150 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2151 goto end_merge_subpass;
2152 }
2153
2154 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2155 const uint32_t attach_idx = input_subpass->color_attachments[i];
2156 struct pvr_render_int_attachment *int_attach;
2157
2158 if (attach_idx == VK_ATTACHMENT_UNUSED)
2159 continue;
2160
2161 int_attach = &ctx->int_attach[attach_idx];
2162
2163 if (int_attach->first_use == -1) {
2164 hw_subpass->color_initops[i] = int_attach->load_op;
2165
2166 /* If the attachment is loaded then off-chip memory must be
2167 * allocated for it.
2168 */
2169 if (int_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
2170 pvr_mark_surface_alloc(ctx, int_attach);
2171
2172 /* The attachment has been written so load the attachment the next
2173 * time it is referenced.
2174 */
2175 int_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2176 } else {
2177 hw_subpass->color_initops[i] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2178 }
2179 }
2180
2181 /* Copy the destinations allocated for the color attachments. */
2182 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2183 const uint32_t attach_idx = input_subpass->color_attachments[i];
2184 struct pvr_render_int_attachment *int_attach;
2185 struct pvr_renderpass_resource *attach_dst;
2186
2187 if (attach_idx == VK_ATTACHMENT_UNUSED)
2188 continue;
2189
2190 int_attach = &ctx->int_attach[attach_idx];
2191 attach_dst = &sp_dsts.color[i];
2192
2193 if (int_attach->first_use == -1) {
2194 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
2195 assert(attach_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
2196 pvr_set_surface_resource(int_attach, attach_dst);
2197
2198 /* If this attachment is being used for the first time then add it
2199 * to the active list.
2200 */
2201 pvr_make_surface_active(ctx,
2202 int_attach,
2203 hw_render->subpass_count - 1U);
2204 } else {
2205 assert(attach_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
2206 }
2207 }
2208
2209 /* We can't directly read the on-chip depth so mark subpasses where the depth
2210 * is written to replicate the value into part of the color storage.
2211 */
2212 if (sp_depth.existing_ds_is_input) {
2213 result = pvr_enable_z_replicate(ctx,
2214 hw_render,
2215 sp_depth.existing_ds_attach,
2216 &sp_dsts.existing_zrep);
2217 if (result != VK_SUCCESS)
2218 goto end_merge_subpass;
2219 }
2220
2221 if (sp_depth.incoming_ds_is_input) {
2222 if (input_subpass->depth_stencil_attachment !=
2223 sp_depth.existing_ds_attach) {
2224 result =
2225 pvr_enable_z_replicate(ctx,
2226 hw_render,
2227 input_subpass->depth_stencil_attachment,
2228 &sp_dsts.incoming_zrep);
2229 if (result != VK_SUCCESS)
2230 goto end_merge_subpass;
2231 } else {
2232 assert(pvr_equal_resources(&sp_dsts.existing_zrep,
2233 &sp_dsts.incoming_zrep));
2234 }
2235 }
2236
2237 /* Copy the locations of color/input attachments to the output structure.
2238 * N.B. Need to do this after Z replication in case the replicated depth is
2239 * an input attachment for the incoming subpass.
2240 */
2241 result = pvr_copy_storage_details(ctx, hw_subpass, subpass);
2242 if (result != VK_SUCCESS)
2243 goto end_merge_subpass;
2244
2245 if (subpass->z_replicate) {
2246 result = pvr_copy_z_replicate_details(ctx, hw_subpass, subpass);
2247 if (result != VK_SUCCESS)
2248 goto end_merge_subpass;
2249 }
2250
2251 /* Copy the allocation at the subpass. This will then be updated if this was
2252 * last use of any attachment.
2253 */
2254 pvr_free_alloc(ctx, &ctx->alloc);
2255 ctx->alloc = alloc;
2256
2257 /* Free information about subpass destinations. */
2258 if (sp_dsts.color)
2259 vk_free(ctx->allocator, sp_dsts.color);
2260
2261 *hw_subpass_out = hw_subpass;
2262
2263 return VK_SUCCESS;
2264
2265 end_merge_subpass:
2266 if (sp_dsts.color)
2267 vk_free(ctx->allocator, sp_dsts.color);
2268
2269 pvr_free_alloc(ctx, &alloc);
2270
2271 return result;
2272 }
2273
2274 static void
pvr_dereference_color_output_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,struct pvr_render_subpass * subpass)2275 pvr_dereference_color_output_list(struct pvr_renderpass_context *ctx,
2276 uint32_t subpass_num,
2277 struct pvr_render_subpass *subpass)
2278 {
2279 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2280 const uint32_t attach_idx = subpass->color_attachments[i];
2281
2282 if (attach_idx != VK_ATTACHMENT_UNUSED)
2283 pvr_dereference_surface(ctx, attach_idx, subpass_num);
2284 }
2285 }
2286
pvr_dereference_surface_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,uint32_t * attachments,uint32_t count)2287 static void pvr_dereference_surface_list(struct pvr_renderpass_context *ctx,
2288 uint32_t subpass_num,
2289 uint32_t *attachments,
2290 uint32_t count)
2291 {
2292 for (uint32_t i = 0U; i < count; i++) {
2293 if (attachments[i] != VK_ATTACHMENT_UNUSED)
2294 pvr_dereference_surface(ctx, attachments[i], subpass_num);
2295 }
2296 }
2297
pvr_schedule_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,uint32_t subpass_idx)2298 static VkResult pvr_schedule_subpass(const struct pvr_device *device,
2299 struct pvr_renderpass_context *ctx,
2300 uint32_t subpass_idx)
2301 {
2302 struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2303 struct pvr_renderpass_hwsetup_render *hw_render;
2304 struct pvr_render_int_subpass *int_subpass;
2305 struct pvr_render_subpass *subpass;
2306 uint32_t subpass_num;
2307 VkResult result;
2308
2309 int_subpass = &ctx->int_subpasses[subpass_idx];
2310 subpass = int_subpass->subpass;
2311
2312 result = pvr_merge_subpass(device, ctx, subpass, &hw_subpass);
2313 if (result != VK_SUCCESS)
2314 return result;
2315
2316 hw_render = ctx->hw_render;
2317 subpass_num = hw_render->subpass_count - 1U;
2318
2319 /* Record where the subpass was scheduled. */
2320 ctx->hw_setup->subpass_map[subpass_idx].render =
2321 ctx->hw_setup->render_count - 1U;
2322 ctx->hw_setup->subpass_map[subpass_idx].subpass = subpass_num;
2323
2324 /* Check this subpass was the last use of any attachments. */
2325 pvr_dereference_color_output_list(ctx, subpass_num, subpass);
2326 pvr_dereference_surface_list(ctx,
2327 subpass_num,
2328 subpass->input_attachments,
2329 subpass->input_count);
2330 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
2331 struct pvr_render_int_attachment *int_depth_attach =
2332 &ctx->int_attach[subpass->depth_stencil_attachment];
2333
2334 assert(int_depth_attach->remaining_count > 0U);
2335 int_depth_attach->remaining_count--;
2336
2337 if (int_depth_attach->remaining_count == 0U) {
2338 if (int_depth_attach->first_use != -1)
2339 int_depth_attach->last_use = subpass_num;
2340
2341 if (int_depth_attach->z_replicate)
2342 pvr_free_surface_storage(ctx, int_depth_attach);
2343 }
2344
2345 if (int_depth_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2346 assert(int_depth_attach->stencil_remaining_count > 0U);
2347 int_depth_attach->stencil_remaining_count--;
2348 }
2349
2350 /* The depth attachment has initialized data so load it from memory if it
2351 * is referenced again.
2352 */
2353 int_depth_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2354 int_depth_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2355 }
2356
2357 /* Mark surfaces which have been the source or destination of an MSAA resolve
2358 * in the current render.
2359 */
2360 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2361 struct pvr_render_int_attachment *resolve_src;
2362 struct pvr_render_int_attachment *resolve_dst;
2363
2364 if (!subpass->resolve_attachments)
2365 break;
2366
2367 if (subpass->resolve_attachments[i] == VK_ATTACHMENT_UNUSED)
2368 continue;
2369
2370 assert(subpass->color_attachments[i] <
2371 (int32_t)ctx->pass->attachment_count);
2372 resolve_src = &ctx->int_attach[subpass->color_attachments[i]];
2373
2374 assert(subpass->resolve_attachments[i] <
2375 (int32_t)ctx->pass->attachment_count);
2376 resolve_dst = &ctx->int_attach[subpass->resolve_attachments[i]];
2377
2378 /* Mark the resolve source. */
2379 assert(resolve_src->last_resolve_src_render <
2380 (int32_t)(ctx->hw_setup->render_count - 1U));
2381 resolve_src->last_resolve_src_render = ctx->hw_setup->render_count - 1U;
2382
2383 /* Mark the resolve destination. */
2384 assert(resolve_dst->last_resolve_dst_render <
2385 (int32_t)(ctx->hw_setup->render_count - 1U));
2386 resolve_dst->last_resolve_dst_render = ctx->hw_setup->render_count - 1U;
2387
2388 /* If we can't down scale through the PBE then the src must be stored
2389 * for transfer down scale.
2390 */
2391 if (!resolve_src->is_pbe_downscalable &&
2392 resolve_src->last_read < (int32_t)ctx->pass->subpass_count) {
2393 resolve_src->last_read = (int32_t)ctx->pass->subpass_count;
2394 resolve_src->remaining_count++;
2395 }
2396 }
2397
2398 /* For subpasses dependent on this subpass decrement the unscheduled
2399 * dependency count.
2400 */
2401 for (uint32_t i = 0U; i < int_subpass->out_subpass_count; i++) {
2402 struct pvr_render_int_subpass *int_dst_subpass =
2403 int_subpass->out_subpasses[i];
2404
2405 assert(int_dst_subpass->in_subpass_count > 0U);
2406 int_dst_subpass->in_subpass_count--;
2407 }
2408
2409 return VK_SUCCESS;
2410 }
2411
pvr_count_uses_in_list(uint32_t * attachments,uint32_t size,uint32_t attach_idx)2412 static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
2413 uint32_t size,
2414 uint32_t attach_idx)
2415 {
2416 uint32_t count = 0U;
2417
2418 for (uint32_t i = 0U; i < size; i++) {
2419 if (attachments[i] == attach_idx)
2420 count++;
2421 }
2422
2423 return count;
2424 }
2425
2426 static uint32_t
pvr_count_uses_in_color_output_list(struct pvr_render_subpass * subpass,uint32_t attach_idx)2427 pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
2428 uint32_t attach_idx)
2429 {
2430 uint32_t count = 0U;
2431
2432 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2433 if (subpass->color_attachments[i] == attach_idx) {
2434 count++;
2435
2436 if (subpass->resolve_attachments &&
2437 subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
2438 count++;
2439 }
2440 }
2441
2442 return count;
2443 }
2444
pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks * alloc,struct pvr_renderpass_hwsetup * hw_setup)2445 void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
2446 struct pvr_renderpass_hwsetup *hw_setup)
2447 {
2448 for (uint32_t i = 0U; i < hw_setup->render_count; i++) {
2449 struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2450
2451 vk_free(alloc, hw_render->eot_surfaces);
2452 vk_free(alloc, hw_render->eot_setup.mrt_resources);
2453 vk_free(alloc, hw_render->init_setup.mrt_resources);
2454 vk_free(alloc, hw_render->color_init);
2455
2456 for (uint32_t j = 0U; j < hw_render->subpass_count; j++) {
2457 struct pvr_renderpass_hwsetup_subpass *subpass =
2458 &hw_render->subpasses[j];
2459
2460 vk_free(alloc, subpass->color_initops);
2461 vk_free(alloc, subpass->input_access);
2462 vk_free(alloc, subpass->setup.mrt_resources);
2463 }
2464
2465 vk_free(alloc, hw_render->subpasses);
2466 }
2467
2468 vk_free(alloc, hw_setup->renders);
2469 vk_free(alloc, hw_setup);
2470 }
2471
pvr_create_renderpass_hwsetup(struct pvr_device * device,const VkAllocationCallbacks * alloc,struct pvr_render_pass * pass,bool disable_merge,struct pvr_renderpass_hwsetup ** const hw_setup_out)2472 VkResult pvr_create_renderpass_hwsetup(
2473 struct pvr_device *device,
2474 const VkAllocationCallbacks *alloc,
2475 struct pvr_render_pass *pass,
2476 bool disable_merge,
2477 struct pvr_renderpass_hwsetup **const hw_setup_out)
2478 {
2479 struct pvr_render_int_attachment *int_attachments;
2480 struct pvr_render_int_subpass *int_subpasses;
2481 struct pvr_renderpass_hw_map *subpass_map;
2482 struct pvr_renderpass_hwsetup *hw_setup;
2483 struct pvr_renderpass_context *ctx;
2484 bool *surface_allocate;
2485 VkResult result;
2486
2487 VK_MULTIALLOC(ma);
2488 vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
2489 vk_multialloc_add(&ma,
2490 &surface_allocate,
2491 __typeof__(*surface_allocate),
2492 pass->attachment_count);
2493 vk_multialloc_add(&ma,
2494 &subpass_map,
2495 __typeof__(*subpass_map),
2496 pass->subpass_count);
2497
2498 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2499 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2500
2501 hw_setup->surface_allocate = surface_allocate;
2502 hw_setup->subpass_map = subpass_map;
2503
2504 VK_MULTIALLOC(ma_ctx);
2505 vk_multialloc_add(&ma_ctx, &ctx, __typeof__(*ctx), 1);
2506 vk_multialloc_add(&ma_ctx,
2507 &int_attachments,
2508 __typeof__(*int_attachments),
2509 pass->attachment_count);
2510 vk_multialloc_add(&ma_ctx,
2511 &int_subpasses,
2512 __typeof__(*int_subpasses),
2513 pass->subpass_count);
2514
2515 if (!vk_multialloc_zalloc(&ma_ctx,
2516 alloc,
2517 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
2518 vk_free(alloc, hw_setup);
2519 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2520 }
2521
2522 ctx->pass = pass;
2523 ctx->hw_setup = hw_setup;
2524 ctx->int_attach = int_attachments;
2525 ctx->int_subpasses = int_subpasses;
2526 ctx->allocator = alloc;
2527
2528 for (uint32_t i = 0U; i < pass->attachment_count; i++) {
2529 struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
2530 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[i];
2531 const uint32_t pixel_size =
2532 vk_format_get_blocksizebits(attachment->vk_format) / 32U;
2533 const uint32_t part_bits =
2534 vk_format_get_blocksizebits(attachment->vk_format) % 32U;
2535
2536 int_attach->resource.type = USC_MRT_RESOURCE_TYPE_INVALID;
2537 int_attach->resource.intermediate_size =
2538 DIV_ROUND_UP(vk_format_get_blocksizebits(attachment->vk_format),
2539 CHAR_BIT);
2540 int_attach->resource.mrt_desc.intermediate_size =
2541 int_attach->resource.intermediate_size;
2542
2543 for (uint32_t j = 0U; j < pixel_size; j++)
2544 int_attach->resource.mrt_desc.valid_mask[j] = ~0;
2545
2546 if (part_bits > 0U) {
2547 int_attach->resource.mrt_desc.valid_mask[pixel_size] =
2548 BITFIELD_MASK(part_bits);
2549 }
2550
2551 int_attach->load_op = pass->attachments[i].load_op;
2552 int_attach->stencil_load_op = pass->attachments[i].stencil_load_op;
2553 int_attach->attachment = attachment;
2554 int_attach->first_use = -1;
2555 int_attach->last_use = -1;
2556 int_attach->last_read = -1;
2557 int_attach->mrt_idx = -1;
2558 int_attach->last_resolve_dst_render = -1;
2559 int_attach->last_resolve_src_render = -1;
2560 int_attach->z_replicate = false;
2561 int_attach->is_pbe_downscalable = attachment->is_pbe_downscalable;
2562
2563 /* Count the number of references to this attachment in subpasses. */
2564 for (uint32_t j = 0U; j < pass->subpass_count; j++) {
2565 struct pvr_render_subpass *subpass = &pass->subpasses[j];
2566 const uint32_t color_output_uses =
2567 pvr_count_uses_in_color_output_list(subpass, i);
2568 const uint32_t input_attachment_uses =
2569 pvr_count_uses_in_list(subpass->input_attachments,
2570 subpass->input_count,
2571 i);
2572
2573 if (color_output_uses != 0U || input_attachment_uses != 0U)
2574 int_attach->last_read = j;
2575
2576 int_attach->remaining_count +=
2577 color_output_uses + input_attachment_uses;
2578
2579 if ((uint32_t)subpass->depth_stencil_attachment == i)
2580 int_attach->remaining_count++;
2581 }
2582
2583 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2584 int_attach->stencil_remaining_count = int_attach->remaining_count;
2585 if (pass->attachments[i].stencil_store_op ==
2586 VK_ATTACHMENT_STORE_OP_STORE) {
2587 int_attach->stencil_remaining_count++;
2588 }
2589 }
2590
2591 if (pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
2592 int_attach->remaining_count++;
2593 int_attach->last_read = pass->subpass_count;
2594 }
2595 }
2596
2597 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2598 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2599
2600 int_subpass->subpass = &pass->subpasses[i];
2601 int_subpass->out_subpass_count = 0U;
2602 int_subpass->out_subpasses = NULL;
2603 int_subpass->in_subpass_count = int_subpass->subpass->dep_count;
2604 }
2605
2606 /* For each dependency of a subpass create an edge in the opposite
2607 * direction.
2608 */
2609 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2610 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2611
2612 for (uint32_t j = 0U; j < int_subpass->in_subpass_count; j++) {
2613 uint32_t src_idx = int_subpass->subpass->dep_list[j];
2614 struct pvr_render_int_subpass *int_src_subpass;
2615 struct pvr_render_int_subpass **out_subpasses;
2616
2617 assert(src_idx < pass->subpass_count);
2618
2619 int_src_subpass = &ctx->int_subpasses[src_idx];
2620
2621 out_subpasses =
2622 vk_realloc(ctx->allocator,
2623 int_src_subpass->out_subpasses,
2624 sizeof(int_src_subpass->out_subpasses[0U]) *
2625 (int_src_subpass->out_subpass_count + 1U),
2626 8U,
2627 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2628 if (!out_subpasses) {
2629 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2630 goto end_create_renderpass_hwsetup;
2631 }
2632
2633 int_src_subpass->out_subpasses = out_subpasses;
2634 int_src_subpass->out_subpasses[int_src_subpass->out_subpass_count] =
2635 int_subpass;
2636 int_src_subpass->out_subpass_count++;
2637 }
2638 }
2639
2640 pvr_reset_render(ctx);
2641
2642 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2643 uint32_t j;
2644
2645 /* Find a subpass with no unscheduled dependencies. */
2646 for (j = 0U; j < pass->subpass_count; j++) {
2647 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[j];
2648
2649 if (int_subpass->subpass && int_subpass->in_subpass_count == 0U)
2650 break;
2651 }
2652 assert(j < pass->subpass_count);
2653
2654 result = pvr_schedule_subpass(device, ctx, j);
2655 if (result != VK_SUCCESS)
2656 goto end_create_renderpass_hwsetup;
2657
2658 if (disable_merge) {
2659 result = pvr_close_render(device, ctx);
2660 if (result != VK_SUCCESS)
2661 goto end_create_renderpass_hwsetup;
2662 }
2663
2664 ctx->int_subpasses[j].subpass = NULL;
2665 }
2666
2667 /* Finalise the last in-progress render. */
2668 result = pvr_close_render(device, ctx);
2669
2670 end_create_renderpass_hwsetup:
2671 if (result != VK_SUCCESS) {
2672 pvr_free_render(ctx);
2673
2674 if (hw_setup) {
2675 pvr_destroy_renderpass_hwsetup(alloc, hw_setup);
2676 hw_setup = NULL;
2677 }
2678 }
2679
2680 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2681 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2682
2683 if (int_subpass->out_subpass_count > 0U)
2684 vk_free(alloc, int_subpass->out_subpasses);
2685 }
2686
2687 vk_free(alloc, ctx);
2688
2689 *hw_setup_out = hw_setup;
2690
2691 return result;
2692 }
2693