xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_pass.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include <stdint.h>
26 
27 #include "hwdef/rogue_hw_utils.h"
28 #include "pvr_bo.h"
29 #include "pvr_device_info.h"
30 #include "pvr_formats.h"
31 #include "pvr_hw_pass.h"
32 #include "pvr_pds.h"
33 #include "pvr_private.h"
34 #include "pvr_types.h"
35 #include "pvr_usc_fragment_shader.h"
36 #include "util/macros.h"
37 #include "rogue/rogue.h"
38 #include "vk_alloc.h"
39 #include "vk_format.h"
40 #include "vk_log.h"
41 #include "vk_render_pass.h"
42 
43 /*****************************************************************************
44   PDS pre-baked program generation parameters and variables.
45 *****************************************************************************/
46 /* These would normally be produced by the compiler or other code. We're using
47  * them for now just to speed up things. All of these should eventually be
48  * removed.
49  */
50 
51 static const struct {
52    /* Indicates the amount of temporaries for the shader. */
53    uint32_t temp_count;
54    enum rogue_msaa_mode msaa_mode;
55    /* Indicates the presence of PHAS instruction. */
56    bool has_phase_rate_change;
57 } pvr_pds_fragment_program_params = {
58    .temp_count = 0,
59    .msaa_mode = ROGUE_MSAA_MODE_PIXEL,
60    .has_phase_rate_change = false,
61 };
62 
pvr_subpass_has_msaa_input_attachment(struct pvr_render_subpass * subpass,const VkRenderPassCreateInfo2 * pCreateInfo)63 static inline bool pvr_subpass_has_msaa_input_attachment(
64    struct pvr_render_subpass *subpass,
65    const VkRenderPassCreateInfo2 *pCreateInfo)
66 {
67    for (uint32_t i = 0; i < subpass->input_count; i++) {
68       const uint32_t attachment = subpass->input_attachments[i];
69 
70       if (pCreateInfo->pAttachments[attachment].samples > 1)
71          return true;
72    }
73 
74    return false;
75 }
76 
pvr_is_subpass_initops_flush_needed(const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render)77 static bool pvr_is_subpass_initops_flush_needed(
78    const struct pvr_render_pass *pass,
79    const struct pvr_renderpass_hwsetup_render *hw_render)
80 {
81    struct pvr_render_subpass *subpass = &pass->subpasses[0];
82    uint32_t render_loadop_mask = 0;
83    uint32_t color_attachment_mask;
84 
85    for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
86       if (hw_render->color_init[i].op != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
87          render_loadop_mask |= (1 << hw_render->color_init[i].index);
88    }
89 
90    /* If there are no load ops then there's nothing to flush. */
91    if (render_loadop_mask == 0)
92       return false;
93 
94    /* If the first subpass has any input attachments, they need to be
95     * initialized with the result of the load op. Since the input attachment
96     * may be read from fragments with an opaque pass type, the load ops must be
97     * flushed or else they would be obscured and eliminated by HSR.
98     */
99    if (subpass->input_count != 0)
100       return true;
101 
102    color_attachment_mask = 0;
103 
104    for (uint32_t i = 0; i < subpass->color_count; i++) {
105       const uint32_t color_idx = subpass->color_attachments[i];
106 
107       if (color_idx != VK_ATTACHMENT_UNUSED)
108          color_attachment_mask |= (1 << pass->attachments[color_idx].index);
109    }
110 
111    /* If the first subpass does not write to all attachments which have a load
112     * op then the load ops need to be flushed to ensure they don't get obscured
113     * and removed by HSR.
114     */
115    return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
116 }
117 
118 static void
pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup * hw_setup,struct pvr_render_pass * pass,struct pvr_render_subpass * subpasses)119 pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup *hw_setup,
120                               struct pvr_render_pass *pass,
121                               struct pvr_render_subpass *subpasses)
122 {
123    uint32_t subpass_idx = 0;
124 
125    for (uint32_t i = 0; i < hw_setup->render_count; i++) {
126       struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
127       const uint32_t initial_isp_userpass =
128          (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
129 
130       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
131          subpasses[subpass_idx].isp_userpass =
132             (j + initial_isp_userpass) & PVRX(CR_ISP_CTL_UPASS_START_SIZE_MAX);
133          subpass_idx++;
134       }
135    }
136 
137    assert(subpass_idx == pass->subpass_count);
138 }
139 
pvr_has_output_register_writes(const struct pvr_renderpass_hwsetup_render * hw_render)140 static inline bool pvr_has_output_register_writes(
141    const struct pvr_renderpass_hwsetup_render *hw_render)
142 {
143    for (uint32_t i = 0; i < hw_render->init_setup.num_render_targets; i++) {
144       struct usc_mrt_resource *mrt_resource =
145          &hw_render->init_setup.mrt_resources[i];
146 
147       if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG)
148          return true;
149    }
150 
151    return false;
152 }
153 
pvr_pds_unitex_state_program_create_and_upload(struct pvr_device * device,const VkAllocationCallbacks * allocator,uint32_t texture_kicks,uint32_t uniform_kicks,struct pvr_pds_upload * const pds_upload_out)154 VkResult pvr_pds_unitex_state_program_create_and_upload(
155    struct pvr_device *device,
156    const VkAllocationCallbacks *allocator,
157    uint32_t texture_kicks,
158    uint32_t uniform_kicks,
159    struct pvr_pds_upload *const pds_upload_out)
160 {
161    struct pvr_pds_pixel_shader_sa_program program = {
162       .num_texture_dma_kicks = texture_kicks,
163       .num_uniform_dma_kicks = uniform_kicks,
164    };
165    uint32_t staging_buffer_size;
166    uint32_t *staging_buffer;
167    VkResult result;
168 
169    pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
170 
171    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
172 
173    staging_buffer = vk_alloc2(&device->vk.alloc,
174                               allocator,
175                               staging_buffer_size,
176                               8U,
177                               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
178    if (!staging_buffer)
179       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
180 
181    pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
182 
183    /* FIXME: Figure out the define for alignment of 16. */
184    result = pvr_gpu_upload_pds(device,
185                                NULL,
186                                0U,
187                                0U,
188                                staging_buffer,
189                                program.code_size,
190                                16U,
191                                16U,
192                                pds_upload_out);
193    if (result != VK_SUCCESS) {
194       vk_free2(&device->vk.alloc, allocator, staging_buffer);
195       return result;
196    }
197 
198    vk_free2(&device->vk.alloc, allocator, staging_buffer);
199 
200    return VK_SUCCESS;
201 }
202 
203 /* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
204  * similar. See if we can dedup them?
205  */
206 static VkResult
pvr_create_subpass_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,uint32_t hw_subpass_idx,struct pvr_load_op ** const load_op_out)207 pvr_create_subpass_load_op(struct pvr_device *device,
208                            const VkAllocationCallbacks *allocator,
209                            const struct pvr_render_pass *pass,
210                            struct pvr_renderpass_hwsetup_render *hw_render,
211                            uint32_t hw_subpass_idx,
212                            struct pvr_load_op **const load_op_out)
213 {
214    const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
215       &hw_render->subpasses[hw_subpass_idx];
216    const struct pvr_render_subpass *subpass =
217       &pass->subpasses[hw_subpass->index];
218 
219    struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
220                                             allocator,
221                                             sizeof(*load_op),
222                                             8,
223                                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
224    if (!load_op)
225       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
226 
227    load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
228 
229    if (hw_subpass->z_replicate != -1) {
230       const int32_t z_replicate = hw_subpass->z_replicate;
231 
232       switch (hw_subpass->depth_initop) {
233       case VK_ATTACHMENT_LOAD_OP_LOAD:
234          assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
235          load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
236          load_op->clears_loads_state.dest_vk_format[z_replicate] =
237             VK_FORMAT_D32_SFLOAT;
238          break;
239 
240       case VK_ATTACHMENT_LOAD_OP_CLEAR:
241          load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
242          break;
243 
244       default:
245          break;
246       }
247    }
248 
249    assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
250    for (uint32_t i = 0; i < subpass->color_count; i++) {
251       const uint32_t attachment_idx = subpass->color_attachments[i];
252 
253       assert(attachment_idx < pass->attachment_count);
254       load_op->clears_loads_state.dest_vk_format[i] =
255          pass->attachments[attachment_idx].vk_format;
256 
257       if (pass->attachments[attachment_idx].sample_count > 1)
258          load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
259 
260       if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
261          load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
262       else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
263          load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
264    }
265 
266    load_op->is_hw_object = false;
267    load_op->subpass = subpass;
268 
269    *load_op_out = load_op;
270 
271    return VK_SUCCESS;
272 }
273 
274 static VkResult
pvr_create_render_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op ** const load_op_out)275 pvr_create_render_load_op(struct pvr_device *device,
276                           const VkAllocationCallbacks *allocator,
277                           const struct pvr_render_pass *pass,
278                           const struct pvr_renderpass_hwsetup_render *hw_render,
279                           struct pvr_load_op **const load_op_out)
280 {
281    struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
282                                             allocator,
283                                             sizeof(*load_op),
284                                             8,
285                                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
286    if (!load_op)
287       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
288 
289    load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
290 
291    assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
292    for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
293       struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
294 
295       assert(color_init->index < pass->attachment_count);
296       load_op->clears_loads_state.dest_vk_format[i] =
297          pass->attachments[color_init->index].vk_format;
298 
299       if (pass->attachments[color_init->index].sample_count > 1)
300          load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
301 
302       if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
303          load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
304       else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
305          load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
306    }
307 
308    load_op->is_hw_object = true;
309    load_op->hw_render = hw_render;
310 
311    *load_op_out = load_op;
312 
313    return VK_SUCCESS;
314 }
315 
316 static VkResult
pvr_generate_load_op_shader(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op * load_op)317 pvr_generate_load_op_shader(struct pvr_device *device,
318                             const VkAllocationCallbacks *allocator,
319                             struct pvr_renderpass_hwsetup_render *hw_render,
320                             struct pvr_load_op *load_op)
321 {
322    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
323    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
324 
325    VkResult result = pvr_gpu_upload_usc(device,
326                                         pvr_usc_fragment_shader,
327                                         sizeof(pvr_usc_fragment_shader),
328                                         cache_line_size,
329                                         &load_op->usc_frag_prog_bo);
330    if (result != VK_SUCCESS)
331       return result;
332 
333    result = pvr_pds_fragment_program_create_and_upload(
334       device,
335       allocator,
336       load_op->usc_frag_prog_bo,
337       pvr_pds_fragment_program_params.temp_count,
338       pvr_pds_fragment_program_params.msaa_mode,
339       pvr_pds_fragment_program_params.has_phase_rate_change,
340       &load_op->pds_frag_prog);
341    if (result != VK_SUCCESS)
342       goto err_free_usc_frag_prog_bo;
343 
344    result = pvr_pds_unitex_state_program_create_and_upload(
345       device,
346       allocator,
347       1U,
348       0U,
349       &load_op->pds_tex_state_prog);
350    if (result != VK_SUCCESS)
351       goto err_free_pds_frag_prog;
352 
353    /* FIXME: These should be based on the USC and PDS programs, but are hard
354     * coded for now.
355     */
356    load_op->const_shareds_count = 1;
357    load_op->shareds_dest_offset = 0;
358    load_op->shareds_count = 1;
359    load_op->temps_count = 1;
360 
361    return VK_SUCCESS;
362 
363 err_free_pds_frag_prog:
364    pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
365 
366 err_free_usc_frag_prog_bo:
367    pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
368 
369    return result;
370 }
371 
pvr_load_op_destroy(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_load_op * load_op)372 static void pvr_load_op_destroy(struct pvr_device *device,
373                                 const VkAllocationCallbacks *allocator,
374                                 struct pvr_load_op *load_op)
375 {
376    pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
377    pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
378    pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
379    vk_free2(&device->vk.alloc, allocator, load_op);
380 }
381 
382 #define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info)              \
383    ({                                                        \
384       int __ret = PVR_MAX_TILE_BUFFER_COUNT;                 \
385       if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
386          __ret -= 4U;                                        \
387       __ret;                                                 \
388    })
389 
390 static bool
pvr_is_load_op_needed(const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,const uint32_t subpass_idx)391 pvr_is_load_op_needed(const struct pvr_render_pass *pass,
392                       struct pvr_renderpass_hwsetup_render *hw_render,
393                       const uint32_t subpass_idx)
394 {
395    struct pvr_renderpass_hwsetup_subpass *hw_subpass =
396       &hw_render->subpasses[subpass_idx];
397    const struct pvr_render_subpass *subpass =
398       &pass->subpasses[hw_subpass->index];
399 
400    if (hw_subpass->z_replicate != -1 &&
401        (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD ||
402         hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)) {
403       return true;
404    }
405 
406    for (uint32_t i = 0; i < subpass->color_count; i++) {
407       if (subpass->color_attachments[i] == VK_ATTACHMENT_UNUSED)
408          continue;
409 
410       if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD ||
411           hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) {
412          return true;
413       }
414    }
415 
416    return false;
417 }
418 
pvr_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)419 VkResult pvr_CreateRenderPass2(VkDevice _device,
420                                const VkRenderPassCreateInfo2 *pCreateInfo,
421                                const VkAllocationCallbacks *pAllocator,
422                                VkRenderPass *pRenderPass)
423 {
424    struct pvr_render_pass_attachment *attachments;
425    PVR_FROM_HANDLE(pvr_device, device, _device);
426    struct pvr_render_subpass *subpasses;
427    const VkAllocationCallbacks *alloc;
428    size_t subpass_attachment_count;
429    uint32_t *subpass_attachments;
430    struct pvr_render_pass *pass;
431    uint32_t *dep_list;
432    bool *flush_on_dep;
433    VkResult result;
434 
435    alloc = pAllocator ? pAllocator : &device->vk.alloc;
436 
437    VK_MULTIALLOC(ma);
438    vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
439    vk_multialloc_add(&ma,
440                      &attachments,
441                      __typeof__(*attachments),
442                      pCreateInfo->attachmentCount);
443    vk_multialloc_add(&ma,
444                      &subpasses,
445                      __typeof__(*subpasses),
446                      pCreateInfo->subpassCount);
447 
448    subpass_attachment_count = 0;
449    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
450       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
451       subpass_attachment_count +=
452          desc->inputAttachmentCount + desc->colorAttachmentCount +
453          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
454    }
455 
456    vk_multialloc_add(&ma,
457                      &subpass_attachments,
458                      __typeof__(*subpass_attachments),
459                      subpass_attachment_count);
460    vk_multialloc_add(&ma,
461                      &dep_list,
462                      __typeof__(*dep_list),
463                      pCreateInfo->dependencyCount);
464    vk_multialloc_add(&ma,
465                      &flush_on_dep,
466                      __typeof__(*flush_on_dep),
467                      pCreateInfo->dependencyCount);
468 
469    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
470       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
471 
472    vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
473    pass->attachment_count = pCreateInfo->attachmentCount;
474    pass->attachments = attachments;
475    pass->subpass_count = pCreateInfo->subpassCount;
476    pass->subpasses = subpasses;
477    pass->max_sample_count = 1;
478 
479    /* Copy attachment descriptions. */
480    for (uint32_t i = 0; i < pass->attachment_count; i++) {
481       const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
482       struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
483 
484       pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
485 
486       attachment->load_op = desc->loadOp;
487       attachment->store_op = desc->storeOp;
488 
489       attachment->aspects = vk_format_aspects(desc->format);
490       if (attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
491          attachment->stencil_load_op = desc->stencilLoadOp;
492          attachment->stencil_store_op = desc->stencilStoreOp;
493       }
494 
495       attachment->vk_format = desc->format;
496       attachment->sample_count = desc->samples;
497       attachment->initial_layout = desc->initialLayout;
498       attachment->is_pbe_downscalable =
499          pvr_format_is_pbe_downscalable(attachment->vk_format);
500       attachment->index = i;
501 
502       if (attachment->sample_count > pass->max_sample_count)
503          pass->max_sample_count = attachment->sample_count;
504    }
505 
506    /* Count how many dependencies each subpass has. */
507    for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
508       const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
509 
510       if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
511           dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
512           dep->srcSubpass != dep->dstSubpass) {
513          pass->subpasses[dep->dstSubpass].dep_count++;
514       }
515    }
516 
517    /* Assign reference pointers to lists, and fill in the attachments list, we
518     * need to re-walk the dependencies array later to fill the per-subpass
519     * dependencies lists in.
520     */
521    for (uint32_t i = 0; i < pass->subpass_count; i++) {
522       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
523       struct pvr_render_subpass *subpass = &pass->subpasses[i];
524 
525       subpass->pipeline_bind_point = desc->pipelineBindPoint;
526 
527       /* From the Vulkan spec. 1.3.265
528        * VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872:
529        *
530        *   "If none of the VK_AMD_mixed_attachment_samples extension, the
531        *   VK_NV_framebuffer_mixed_samples extension, or the
532        *   multisampledRenderToSingleSampled feature are enabled, all
533        *   attachments in pDepthStencilAttachment or pColorAttachments that are
534        *   not VK_ATTACHMENT_UNUSED must have the same sample count"
535        *
536        */
537       subpass->sample_count = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
538 
539       if (desc->pDepthStencilAttachment) {
540          uint32_t index = desc->pDepthStencilAttachment->attachment;
541 
542          if (index != VK_ATTACHMENT_UNUSED)
543             subpass->sample_count = pass->attachments[index].sample_count;
544 
545          subpass->depth_stencil_attachment = index;
546       } else {
547          subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
548       }
549 
550       subpass->color_count = desc->colorAttachmentCount;
551       if (subpass->color_count > 0) {
552          subpass->color_attachments = subpass_attachments;
553          subpass_attachments += subpass->color_count;
554 
555          for (uint32_t j = 0; j < subpass->color_count; j++) {
556             subpass->color_attachments[j] =
557                desc->pColorAttachments[j].attachment;
558 
559             if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
560                continue;
561 
562             if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
563                uint32_t index;
564                index = subpass->color_attachments[j];
565                subpass->sample_count = pass->attachments[index].sample_count;
566             }
567          }
568       }
569 
570       if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)
571          subpass->sample_count = VK_SAMPLE_COUNT_1_BIT;
572 
573       if (desc->pResolveAttachments) {
574          subpass->resolve_attachments = subpass_attachments;
575          subpass_attachments += subpass->color_count;
576 
577          for (uint32_t j = 0; j < subpass->color_count; j++) {
578             subpass->resolve_attachments[j] =
579                desc->pResolveAttachments[j].attachment;
580          }
581       }
582 
583       subpass->input_count = desc->inputAttachmentCount;
584       if (subpass->input_count > 0) {
585          subpass->input_attachments = subpass_attachments;
586          subpass_attachments += subpass->input_count;
587 
588          for (uint32_t j = 0; j < subpass->input_count; j++) {
589             subpass->input_attachments[j] =
590                desc->pInputAttachments[j].attachment;
591          }
592       }
593 
594       /* Give the dependencies a slice of the subpass_attachments array. */
595       subpass->dep_list = dep_list;
596       dep_list += subpass->dep_count;
597       subpass->flush_on_dep = flush_on_dep;
598       flush_on_dep += subpass->dep_count;
599 
600       /* Reset the dependencies count so we can start from 0 and index into
601        * the dependencies array.
602        */
603       subpass->dep_count = 0;
604       subpass->index = i;
605    }
606 
607    /* Compute dependencies and populate dep_list and flush_on_dep. */
608    for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
609       const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
610 
611       if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
612           dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
613           dep->srcSubpass != dep->dstSubpass) {
614          struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
615          bool is_dep_fb_local =
616             vk_subpass_dependency_is_fb_local(dep,
617                                               dep->srcStageMask,
618                                               dep->dstStageMask);
619 
620          subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
621          if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo) ||
622              !is_dep_fb_local) {
623             subpass->flush_on_dep[subpass->dep_count] = true;
624          }
625 
626          subpass->dep_count++;
627       }
628    }
629 
630    pass->max_tilebuffer_count =
631       PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
632 
633    result =
634       pvr_create_renderpass_hwsetup(device, alloc, pass, false, &pass->hw_setup);
635    if (result != VK_SUCCESS)
636       goto err_free_pass;
637 
638    pvr_init_subpass_isp_userpass(pass->hw_setup, pass, pass->subpasses);
639 
640    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
641       struct pvr_renderpass_hwsetup_render *hw_render =
642          &pass->hw_setup->renders[i];
643       struct pvr_load_op *load_op = NULL;
644 
645       if (hw_render->tile_buffers_count) {
646          result = pvr_device_tile_buffer_ensure_cap(
647             device,
648             hw_render->tile_buffers_count,
649             hw_render->eot_setup.tile_buffer_size);
650          if (result != VK_SUCCESS)
651             goto err_free_pass;
652       }
653 
654       assert(!hw_render->load_op);
655 
656       if (hw_render->color_init_count != 0U) {
657          if (!pvr_has_output_register_writes(hw_render)) {
658             const uint32_t last = hw_render->init_setup.num_render_targets;
659             struct usc_mrt_resource *mrt_resources;
660 
661             hw_render->init_setup.num_render_targets++;
662 
663             mrt_resources =
664                vk_realloc(alloc,
665                           hw_render->init_setup.mrt_resources,
666                           hw_render->init_setup.num_render_targets *
667                              sizeof(*mrt_resources),
668                           8U,
669                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
670             if (!mrt_resources) {
671                result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
672                goto err_load_op_destroy;
673             }
674 
675             hw_render->init_setup.mrt_resources = mrt_resources;
676 
677             mrt_resources[last].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
678             mrt_resources[last].reg.output_reg = 0U;
679             mrt_resources[last].reg.offset = 0U;
680             mrt_resources[last].intermediate_size = 4U;
681             mrt_resources[last].mrt_desc.intermediate_size = 4U;
682             mrt_resources[last].mrt_desc.priority = 0U;
683             mrt_resources[last].mrt_desc.valid_mask[0U] = ~0;
684             mrt_resources[last].mrt_desc.valid_mask[1U] = ~0;
685             mrt_resources[last].mrt_desc.valid_mask[2U] = ~0;
686             mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
687          }
688 
689          result = pvr_create_render_load_op(device,
690                                             pAllocator,
691                                             pass,
692                                             hw_render,
693                                             &load_op);
694          if (result != VK_SUCCESS)
695             goto err_load_op_destroy;
696 
697          result =
698             pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
699          if (result != VK_SUCCESS) {
700             vk_free2(&device->vk.alloc, pAllocator, load_op);
701             goto err_load_op_destroy;
702          }
703 
704          hw_render->load_op = load_op;
705       }
706 
707       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
708          if (!pvr_is_load_op_needed(pass, hw_render, j))
709             continue;
710 
711          result = pvr_create_subpass_load_op(device,
712                                              pAllocator,
713                                              pass,
714                                              hw_render,
715                                              j,
716                                              &load_op);
717          if (result != VK_SUCCESS) {
718             vk_free2(&device->vk.alloc, pAllocator, load_op);
719             goto err_load_op_destroy;
720          }
721 
722          result =
723             pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
724          if (result != VK_SUCCESS)
725             goto err_load_op_destroy;
726 
727          hw_render->subpasses[j].load_op = load_op;
728       }
729    }
730 
731    *pRenderPass = pvr_render_pass_to_handle(pass);
732 
733    return VK_SUCCESS;
734 
735 err_load_op_destroy:
736    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
737       struct pvr_renderpass_hwsetup_render *hw_render =
738          &pass->hw_setup->renders[i];
739 
740       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
741          if (hw_render->subpasses[j].load_op) {
742             pvr_load_op_destroy(device,
743                                 pAllocator,
744                                 hw_render->subpasses[j].load_op);
745          }
746       }
747 
748       if (hw_render->load_op)
749          pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
750    }
751 
752    pvr_destroy_renderpass_hwsetup(alloc, pass->hw_setup);
753 
754 err_free_pass:
755    vk_object_base_finish(&pass->base);
756    vk_free2(&device->vk.alloc, pAllocator, pass);
757 
758    return result;
759 }
760 
pvr_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)761 void pvr_DestroyRenderPass(VkDevice _device,
762                            VkRenderPass _pass,
763                            const VkAllocationCallbacks *pAllocator)
764 {
765    PVR_FROM_HANDLE(pvr_device, device, _device);
766    PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
767 
768    if (!pass)
769       return;
770 
771    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
772       struct pvr_renderpass_hwsetup_render *hw_render =
773          &pass->hw_setup->renders[i];
774 
775       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
776          if (hw_render->subpasses[j].load_op) {
777             pvr_load_op_destroy(device,
778                                 pAllocator,
779                                 hw_render->subpasses[j].load_op);
780          }
781       }
782 
783       if (hw_render->load_op)
784          pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
785    }
786 
787    pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
788                                   pass->hw_setup);
789    vk_object_base_finish(&pass->base);
790    vk_free2(&device->vk.alloc, pAllocator, pass);
791 }
792 
pvr_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)793 void pvr_GetRenderAreaGranularity(VkDevice _device,
794                                   VkRenderPass renderPass,
795                                   VkExtent2D *pGranularity)
796 {
797    PVR_FROM_HANDLE(pvr_device, device, _device);
798    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
799 
800    /* Granularity does not depend on any settings in the render pass, so return
801     * the tile granularity.
802     *
803     * The default value is based on the minimum value found in all existing
804     * cores.
805     */
806    pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
807    pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
808 }
809