1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include <stdint.h>
26
27 #include "hwdef/rogue_hw_utils.h"
28 #include "pvr_bo.h"
29 #include "pvr_device_info.h"
30 #include "pvr_formats.h"
31 #include "pvr_hw_pass.h"
32 #include "pvr_pds.h"
33 #include "pvr_private.h"
34 #include "pvr_types.h"
35 #include "pvr_usc_fragment_shader.h"
36 #include "util/macros.h"
37 #include "rogue/rogue.h"
38 #include "vk_alloc.h"
39 #include "vk_format.h"
40 #include "vk_log.h"
41 #include "vk_render_pass.h"
42
43 /*****************************************************************************
44 PDS pre-baked program generation parameters and variables.
45 *****************************************************************************/
46 /* These would normally be produced by the compiler or other code. We're using
47 * them for now just to speed up things. All of these should eventually be
48 * removed.
49 */
50
51 static const struct {
52 /* Indicates the amount of temporaries for the shader. */
53 uint32_t temp_count;
54 enum rogue_msaa_mode msaa_mode;
55 /* Indicates the presence of PHAS instruction. */
56 bool has_phase_rate_change;
57 } pvr_pds_fragment_program_params = {
58 .temp_count = 0,
59 .msaa_mode = ROGUE_MSAA_MODE_PIXEL,
60 .has_phase_rate_change = false,
61 };
62
pvr_subpass_has_msaa_input_attachment(struct pvr_render_subpass * subpass,const VkRenderPassCreateInfo2 * pCreateInfo)63 static inline bool pvr_subpass_has_msaa_input_attachment(
64 struct pvr_render_subpass *subpass,
65 const VkRenderPassCreateInfo2 *pCreateInfo)
66 {
67 for (uint32_t i = 0; i < subpass->input_count; i++) {
68 const uint32_t attachment = subpass->input_attachments[i];
69
70 if (pCreateInfo->pAttachments[attachment].samples > 1)
71 return true;
72 }
73
74 return false;
75 }
76
pvr_is_subpass_initops_flush_needed(const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render)77 static bool pvr_is_subpass_initops_flush_needed(
78 const struct pvr_render_pass *pass,
79 const struct pvr_renderpass_hwsetup_render *hw_render)
80 {
81 struct pvr_render_subpass *subpass = &pass->subpasses[0];
82 uint32_t render_loadop_mask = 0;
83 uint32_t color_attachment_mask;
84
85 for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
86 if (hw_render->color_init[i].op != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
87 render_loadop_mask |= (1 << hw_render->color_init[i].index);
88 }
89
90 /* If there are no load ops then there's nothing to flush. */
91 if (render_loadop_mask == 0)
92 return false;
93
94 /* If the first subpass has any input attachments, they need to be
95 * initialized with the result of the load op. Since the input attachment
96 * may be read from fragments with an opaque pass type, the load ops must be
97 * flushed or else they would be obscured and eliminated by HSR.
98 */
99 if (subpass->input_count != 0)
100 return true;
101
102 color_attachment_mask = 0;
103
104 for (uint32_t i = 0; i < subpass->color_count; i++) {
105 const uint32_t color_idx = subpass->color_attachments[i];
106
107 if (color_idx != VK_ATTACHMENT_UNUSED)
108 color_attachment_mask |= (1 << pass->attachments[color_idx].index);
109 }
110
111 /* If the first subpass does not write to all attachments which have a load
112 * op then the load ops need to be flushed to ensure they don't get obscured
113 * and removed by HSR.
114 */
115 return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
116 }
117
118 static void
pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup * hw_setup,struct pvr_render_pass * pass,struct pvr_render_subpass * subpasses)119 pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup *hw_setup,
120 struct pvr_render_pass *pass,
121 struct pvr_render_subpass *subpasses)
122 {
123 uint32_t subpass_idx = 0;
124
125 for (uint32_t i = 0; i < hw_setup->render_count; i++) {
126 struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
127 const uint32_t initial_isp_userpass =
128 (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
129
130 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
131 subpasses[subpass_idx].isp_userpass =
132 (j + initial_isp_userpass) & PVRX(CR_ISP_CTL_UPASS_START_SIZE_MAX);
133 subpass_idx++;
134 }
135 }
136
137 assert(subpass_idx == pass->subpass_count);
138 }
139
pvr_has_output_register_writes(const struct pvr_renderpass_hwsetup_render * hw_render)140 static inline bool pvr_has_output_register_writes(
141 const struct pvr_renderpass_hwsetup_render *hw_render)
142 {
143 for (uint32_t i = 0; i < hw_render->init_setup.num_render_targets; i++) {
144 struct usc_mrt_resource *mrt_resource =
145 &hw_render->init_setup.mrt_resources[i];
146
147 if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG)
148 return true;
149 }
150
151 return false;
152 }
153
pvr_pds_unitex_state_program_create_and_upload(struct pvr_device * device,const VkAllocationCallbacks * allocator,uint32_t texture_kicks,uint32_t uniform_kicks,struct pvr_pds_upload * const pds_upload_out)154 VkResult pvr_pds_unitex_state_program_create_and_upload(
155 struct pvr_device *device,
156 const VkAllocationCallbacks *allocator,
157 uint32_t texture_kicks,
158 uint32_t uniform_kicks,
159 struct pvr_pds_upload *const pds_upload_out)
160 {
161 struct pvr_pds_pixel_shader_sa_program program = {
162 .num_texture_dma_kicks = texture_kicks,
163 .num_uniform_dma_kicks = uniform_kicks,
164 };
165 uint32_t staging_buffer_size;
166 uint32_t *staging_buffer;
167 VkResult result;
168
169 pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
170
171 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
172
173 staging_buffer = vk_alloc2(&device->vk.alloc,
174 allocator,
175 staging_buffer_size,
176 8U,
177 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
178 if (!staging_buffer)
179 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
180
181 pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
182
183 /* FIXME: Figure out the define for alignment of 16. */
184 result = pvr_gpu_upload_pds(device,
185 NULL,
186 0U,
187 0U,
188 staging_buffer,
189 program.code_size,
190 16U,
191 16U,
192 pds_upload_out);
193 if (result != VK_SUCCESS) {
194 vk_free2(&device->vk.alloc, allocator, staging_buffer);
195 return result;
196 }
197
198 vk_free2(&device->vk.alloc, allocator, staging_buffer);
199
200 return VK_SUCCESS;
201 }
202
203 /* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
204 * similar. See if we can dedup them?
205 */
206 static VkResult
pvr_create_subpass_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,uint32_t hw_subpass_idx,struct pvr_load_op ** const load_op_out)207 pvr_create_subpass_load_op(struct pvr_device *device,
208 const VkAllocationCallbacks *allocator,
209 const struct pvr_render_pass *pass,
210 struct pvr_renderpass_hwsetup_render *hw_render,
211 uint32_t hw_subpass_idx,
212 struct pvr_load_op **const load_op_out)
213 {
214 const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
215 &hw_render->subpasses[hw_subpass_idx];
216 const struct pvr_render_subpass *subpass =
217 &pass->subpasses[hw_subpass->index];
218
219 struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
220 allocator,
221 sizeof(*load_op),
222 8,
223 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
224 if (!load_op)
225 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
226
227 load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
228
229 if (hw_subpass->z_replicate != -1) {
230 const int32_t z_replicate = hw_subpass->z_replicate;
231
232 switch (hw_subpass->depth_initop) {
233 case VK_ATTACHMENT_LOAD_OP_LOAD:
234 assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
235 load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
236 load_op->clears_loads_state.dest_vk_format[z_replicate] =
237 VK_FORMAT_D32_SFLOAT;
238 break;
239
240 case VK_ATTACHMENT_LOAD_OP_CLEAR:
241 load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
242 break;
243
244 default:
245 break;
246 }
247 }
248
249 assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
250 for (uint32_t i = 0; i < subpass->color_count; i++) {
251 const uint32_t attachment_idx = subpass->color_attachments[i];
252
253 assert(attachment_idx < pass->attachment_count);
254 load_op->clears_loads_state.dest_vk_format[i] =
255 pass->attachments[attachment_idx].vk_format;
256
257 if (pass->attachments[attachment_idx].sample_count > 1)
258 load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
259
260 if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
261 load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
262 else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
263 load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
264 }
265
266 load_op->is_hw_object = false;
267 load_op->subpass = subpass;
268
269 *load_op_out = load_op;
270
271 return VK_SUCCESS;
272 }
273
274 static VkResult
pvr_create_render_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op ** const load_op_out)275 pvr_create_render_load_op(struct pvr_device *device,
276 const VkAllocationCallbacks *allocator,
277 const struct pvr_render_pass *pass,
278 const struct pvr_renderpass_hwsetup_render *hw_render,
279 struct pvr_load_op **const load_op_out)
280 {
281 struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
282 allocator,
283 sizeof(*load_op),
284 8,
285 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
286 if (!load_op)
287 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
288
289 load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
290
291 assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
292 for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
293 struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
294
295 assert(color_init->index < pass->attachment_count);
296 load_op->clears_loads_state.dest_vk_format[i] =
297 pass->attachments[color_init->index].vk_format;
298
299 if (pass->attachments[color_init->index].sample_count > 1)
300 load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
301
302 if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
303 load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
304 else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
305 load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
306 }
307
308 load_op->is_hw_object = true;
309 load_op->hw_render = hw_render;
310
311 *load_op_out = load_op;
312
313 return VK_SUCCESS;
314 }
315
316 static VkResult
pvr_generate_load_op_shader(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op * load_op)317 pvr_generate_load_op_shader(struct pvr_device *device,
318 const VkAllocationCallbacks *allocator,
319 struct pvr_renderpass_hwsetup_render *hw_render,
320 struct pvr_load_op *load_op)
321 {
322 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
323 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
324
325 VkResult result = pvr_gpu_upload_usc(device,
326 pvr_usc_fragment_shader,
327 sizeof(pvr_usc_fragment_shader),
328 cache_line_size,
329 &load_op->usc_frag_prog_bo);
330 if (result != VK_SUCCESS)
331 return result;
332
333 result = pvr_pds_fragment_program_create_and_upload(
334 device,
335 allocator,
336 load_op->usc_frag_prog_bo,
337 pvr_pds_fragment_program_params.temp_count,
338 pvr_pds_fragment_program_params.msaa_mode,
339 pvr_pds_fragment_program_params.has_phase_rate_change,
340 &load_op->pds_frag_prog);
341 if (result != VK_SUCCESS)
342 goto err_free_usc_frag_prog_bo;
343
344 result = pvr_pds_unitex_state_program_create_and_upload(
345 device,
346 allocator,
347 1U,
348 0U,
349 &load_op->pds_tex_state_prog);
350 if (result != VK_SUCCESS)
351 goto err_free_pds_frag_prog;
352
353 /* FIXME: These should be based on the USC and PDS programs, but are hard
354 * coded for now.
355 */
356 load_op->const_shareds_count = 1;
357 load_op->shareds_dest_offset = 0;
358 load_op->shareds_count = 1;
359 load_op->temps_count = 1;
360
361 return VK_SUCCESS;
362
363 err_free_pds_frag_prog:
364 pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
365
366 err_free_usc_frag_prog_bo:
367 pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
368
369 return result;
370 }
371
pvr_load_op_destroy(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_load_op * load_op)372 static void pvr_load_op_destroy(struct pvr_device *device,
373 const VkAllocationCallbacks *allocator,
374 struct pvr_load_op *load_op)
375 {
376 pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
377 pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
378 pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
379 vk_free2(&device->vk.alloc, allocator, load_op);
380 }
381
382 #define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \
383 ({ \
384 int __ret = PVR_MAX_TILE_BUFFER_COUNT; \
385 if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
386 __ret -= 4U; \
387 __ret; \
388 })
389
390 static bool
pvr_is_load_op_needed(const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,const uint32_t subpass_idx)391 pvr_is_load_op_needed(const struct pvr_render_pass *pass,
392 struct pvr_renderpass_hwsetup_render *hw_render,
393 const uint32_t subpass_idx)
394 {
395 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
396 &hw_render->subpasses[subpass_idx];
397 const struct pvr_render_subpass *subpass =
398 &pass->subpasses[hw_subpass->index];
399
400 if (hw_subpass->z_replicate != -1 &&
401 (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD ||
402 hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)) {
403 return true;
404 }
405
406 for (uint32_t i = 0; i < subpass->color_count; i++) {
407 if (subpass->color_attachments[i] == VK_ATTACHMENT_UNUSED)
408 continue;
409
410 if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD ||
411 hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) {
412 return true;
413 }
414 }
415
416 return false;
417 }
418
pvr_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)419 VkResult pvr_CreateRenderPass2(VkDevice _device,
420 const VkRenderPassCreateInfo2 *pCreateInfo,
421 const VkAllocationCallbacks *pAllocator,
422 VkRenderPass *pRenderPass)
423 {
424 struct pvr_render_pass_attachment *attachments;
425 PVR_FROM_HANDLE(pvr_device, device, _device);
426 struct pvr_render_subpass *subpasses;
427 const VkAllocationCallbacks *alloc;
428 size_t subpass_attachment_count;
429 uint32_t *subpass_attachments;
430 struct pvr_render_pass *pass;
431 uint32_t *dep_list;
432 bool *flush_on_dep;
433 VkResult result;
434
435 alloc = pAllocator ? pAllocator : &device->vk.alloc;
436
437 VK_MULTIALLOC(ma);
438 vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
439 vk_multialloc_add(&ma,
440 &attachments,
441 __typeof__(*attachments),
442 pCreateInfo->attachmentCount);
443 vk_multialloc_add(&ma,
444 &subpasses,
445 __typeof__(*subpasses),
446 pCreateInfo->subpassCount);
447
448 subpass_attachment_count = 0;
449 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
450 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
451 subpass_attachment_count +=
452 desc->inputAttachmentCount + desc->colorAttachmentCount +
453 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
454 }
455
456 vk_multialloc_add(&ma,
457 &subpass_attachments,
458 __typeof__(*subpass_attachments),
459 subpass_attachment_count);
460 vk_multialloc_add(&ma,
461 &dep_list,
462 __typeof__(*dep_list),
463 pCreateInfo->dependencyCount);
464 vk_multialloc_add(&ma,
465 &flush_on_dep,
466 __typeof__(*flush_on_dep),
467 pCreateInfo->dependencyCount);
468
469 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
470 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
471
472 vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
473 pass->attachment_count = pCreateInfo->attachmentCount;
474 pass->attachments = attachments;
475 pass->subpass_count = pCreateInfo->subpassCount;
476 pass->subpasses = subpasses;
477 pass->max_sample_count = 1;
478
479 /* Copy attachment descriptions. */
480 for (uint32_t i = 0; i < pass->attachment_count; i++) {
481 const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
482 struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
483
484 pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
485
486 attachment->load_op = desc->loadOp;
487 attachment->store_op = desc->storeOp;
488
489 attachment->aspects = vk_format_aspects(desc->format);
490 if (attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
491 attachment->stencil_load_op = desc->stencilLoadOp;
492 attachment->stencil_store_op = desc->stencilStoreOp;
493 }
494
495 attachment->vk_format = desc->format;
496 attachment->sample_count = desc->samples;
497 attachment->initial_layout = desc->initialLayout;
498 attachment->is_pbe_downscalable =
499 pvr_format_is_pbe_downscalable(attachment->vk_format);
500 attachment->index = i;
501
502 if (attachment->sample_count > pass->max_sample_count)
503 pass->max_sample_count = attachment->sample_count;
504 }
505
506 /* Count how many dependencies each subpass has. */
507 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
508 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
509
510 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
511 dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
512 dep->srcSubpass != dep->dstSubpass) {
513 pass->subpasses[dep->dstSubpass].dep_count++;
514 }
515 }
516
517 /* Assign reference pointers to lists, and fill in the attachments list, we
518 * need to re-walk the dependencies array later to fill the per-subpass
519 * dependencies lists in.
520 */
521 for (uint32_t i = 0; i < pass->subpass_count; i++) {
522 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
523 struct pvr_render_subpass *subpass = &pass->subpasses[i];
524
525 subpass->pipeline_bind_point = desc->pipelineBindPoint;
526
527 /* From the Vulkan spec. 1.3.265
528 * VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872:
529 *
530 * "If none of the VK_AMD_mixed_attachment_samples extension, the
531 * VK_NV_framebuffer_mixed_samples extension, or the
532 * multisampledRenderToSingleSampled feature are enabled, all
533 * attachments in pDepthStencilAttachment or pColorAttachments that are
534 * not VK_ATTACHMENT_UNUSED must have the same sample count"
535 *
536 */
537 subpass->sample_count = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
538
539 if (desc->pDepthStencilAttachment) {
540 uint32_t index = desc->pDepthStencilAttachment->attachment;
541
542 if (index != VK_ATTACHMENT_UNUSED)
543 subpass->sample_count = pass->attachments[index].sample_count;
544
545 subpass->depth_stencil_attachment = index;
546 } else {
547 subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
548 }
549
550 subpass->color_count = desc->colorAttachmentCount;
551 if (subpass->color_count > 0) {
552 subpass->color_attachments = subpass_attachments;
553 subpass_attachments += subpass->color_count;
554
555 for (uint32_t j = 0; j < subpass->color_count; j++) {
556 subpass->color_attachments[j] =
557 desc->pColorAttachments[j].attachment;
558
559 if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
560 continue;
561
562 if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
563 uint32_t index;
564 index = subpass->color_attachments[j];
565 subpass->sample_count = pass->attachments[index].sample_count;
566 }
567 }
568 }
569
570 if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)
571 subpass->sample_count = VK_SAMPLE_COUNT_1_BIT;
572
573 if (desc->pResolveAttachments) {
574 subpass->resolve_attachments = subpass_attachments;
575 subpass_attachments += subpass->color_count;
576
577 for (uint32_t j = 0; j < subpass->color_count; j++) {
578 subpass->resolve_attachments[j] =
579 desc->pResolveAttachments[j].attachment;
580 }
581 }
582
583 subpass->input_count = desc->inputAttachmentCount;
584 if (subpass->input_count > 0) {
585 subpass->input_attachments = subpass_attachments;
586 subpass_attachments += subpass->input_count;
587
588 for (uint32_t j = 0; j < subpass->input_count; j++) {
589 subpass->input_attachments[j] =
590 desc->pInputAttachments[j].attachment;
591 }
592 }
593
594 /* Give the dependencies a slice of the subpass_attachments array. */
595 subpass->dep_list = dep_list;
596 dep_list += subpass->dep_count;
597 subpass->flush_on_dep = flush_on_dep;
598 flush_on_dep += subpass->dep_count;
599
600 /* Reset the dependencies count so we can start from 0 and index into
601 * the dependencies array.
602 */
603 subpass->dep_count = 0;
604 subpass->index = i;
605 }
606
607 /* Compute dependencies and populate dep_list and flush_on_dep. */
608 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
609 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
610
611 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
612 dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
613 dep->srcSubpass != dep->dstSubpass) {
614 struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
615 bool is_dep_fb_local =
616 vk_subpass_dependency_is_fb_local(dep,
617 dep->srcStageMask,
618 dep->dstStageMask);
619
620 subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
621 if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo) ||
622 !is_dep_fb_local) {
623 subpass->flush_on_dep[subpass->dep_count] = true;
624 }
625
626 subpass->dep_count++;
627 }
628 }
629
630 pass->max_tilebuffer_count =
631 PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
632
633 result =
634 pvr_create_renderpass_hwsetup(device, alloc, pass, false, &pass->hw_setup);
635 if (result != VK_SUCCESS)
636 goto err_free_pass;
637
638 pvr_init_subpass_isp_userpass(pass->hw_setup, pass, pass->subpasses);
639
640 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
641 struct pvr_renderpass_hwsetup_render *hw_render =
642 &pass->hw_setup->renders[i];
643 struct pvr_load_op *load_op = NULL;
644
645 if (hw_render->tile_buffers_count) {
646 result = pvr_device_tile_buffer_ensure_cap(
647 device,
648 hw_render->tile_buffers_count,
649 hw_render->eot_setup.tile_buffer_size);
650 if (result != VK_SUCCESS)
651 goto err_free_pass;
652 }
653
654 assert(!hw_render->load_op);
655
656 if (hw_render->color_init_count != 0U) {
657 if (!pvr_has_output_register_writes(hw_render)) {
658 const uint32_t last = hw_render->init_setup.num_render_targets;
659 struct usc_mrt_resource *mrt_resources;
660
661 hw_render->init_setup.num_render_targets++;
662
663 mrt_resources =
664 vk_realloc(alloc,
665 hw_render->init_setup.mrt_resources,
666 hw_render->init_setup.num_render_targets *
667 sizeof(*mrt_resources),
668 8U,
669 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
670 if (!mrt_resources) {
671 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
672 goto err_load_op_destroy;
673 }
674
675 hw_render->init_setup.mrt_resources = mrt_resources;
676
677 mrt_resources[last].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
678 mrt_resources[last].reg.output_reg = 0U;
679 mrt_resources[last].reg.offset = 0U;
680 mrt_resources[last].intermediate_size = 4U;
681 mrt_resources[last].mrt_desc.intermediate_size = 4U;
682 mrt_resources[last].mrt_desc.priority = 0U;
683 mrt_resources[last].mrt_desc.valid_mask[0U] = ~0;
684 mrt_resources[last].mrt_desc.valid_mask[1U] = ~0;
685 mrt_resources[last].mrt_desc.valid_mask[2U] = ~0;
686 mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
687 }
688
689 result = pvr_create_render_load_op(device,
690 pAllocator,
691 pass,
692 hw_render,
693 &load_op);
694 if (result != VK_SUCCESS)
695 goto err_load_op_destroy;
696
697 result =
698 pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
699 if (result != VK_SUCCESS) {
700 vk_free2(&device->vk.alloc, pAllocator, load_op);
701 goto err_load_op_destroy;
702 }
703
704 hw_render->load_op = load_op;
705 }
706
707 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
708 if (!pvr_is_load_op_needed(pass, hw_render, j))
709 continue;
710
711 result = pvr_create_subpass_load_op(device,
712 pAllocator,
713 pass,
714 hw_render,
715 j,
716 &load_op);
717 if (result != VK_SUCCESS) {
718 vk_free2(&device->vk.alloc, pAllocator, load_op);
719 goto err_load_op_destroy;
720 }
721
722 result =
723 pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
724 if (result != VK_SUCCESS)
725 goto err_load_op_destroy;
726
727 hw_render->subpasses[j].load_op = load_op;
728 }
729 }
730
731 *pRenderPass = pvr_render_pass_to_handle(pass);
732
733 return VK_SUCCESS;
734
735 err_load_op_destroy:
736 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
737 struct pvr_renderpass_hwsetup_render *hw_render =
738 &pass->hw_setup->renders[i];
739
740 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
741 if (hw_render->subpasses[j].load_op) {
742 pvr_load_op_destroy(device,
743 pAllocator,
744 hw_render->subpasses[j].load_op);
745 }
746 }
747
748 if (hw_render->load_op)
749 pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
750 }
751
752 pvr_destroy_renderpass_hwsetup(alloc, pass->hw_setup);
753
754 err_free_pass:
755 vk_object_base_finish(&pass->base);
756 vk_free2(&device->vk.alloc, pAllocator, pass);
757
758 return result;
759 }
760
pvr_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)761 void pvr_DestroyRenderPass(VkDevice _device,
762 VkRenderPass _pass,
763 const VkAllocationCallbacks *pAllocator)
764 {
765 PVR_FROM_HANDLE(pvr_device, device, _device);
766 PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
767
768 if (!pass)
769 return;
770
771 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
772 struct pvr_renderpass_hwsetup_render *hw_render =
773 &pass->hw_setup->renders[i];
774
775 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
776 if (hw_render->subpasses[j].load_op) {
777 pvr_load_op_destroy(device,
778 pAllocator,
779 hw_render->subpasses[j].load_op);
780 }
781 }
782
783 if (hw_render->load_op)
784 pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
785 }
786
787 pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
788 pass->hw_setup);
789 vk_object_base_finish(&pass->base);
790 vk_free2(&device->vk.alloc, pAllocator, pass);
791 }
792
pvr_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)793 void pvr_GetRenderAreaGranularity(VkDevice _device,
794 VkRenderPass renderPass,
795 VkExtent2D *pGranularity)
796 {
797 PVR_FROM_HANDLE(pvr_device, device, _device);
798 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
799
800 /* Granularity does not depend on any settings in the render pass, so return
801 * the tile granularity.
802 *
803 * The default value is based on the minimum value found in all existing
804 * cores.
805 */
806 pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
807 pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
808 }
809