xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_pass.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_pass.h"
11 
12 #include "vk_util.h"
13 #include "vk_render_pass.h"
14 
15 #include "tu_cmd_buffer.h"
16 #include "tu_device.h"
17 #include "tu_image.h"
18 
19 static void
tu_render_pass_add_subpass_dep(struct tu_render_pass * pass,const VkSubpassDependency2 * dep)20 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
21                                const VkSubpassDependency2 *dep)
22 {
23    uint32_t src = dep->srcSubpass;
24    uint32_t dst = dep->dstSubpass;
25 
26    /* Ignore subpass self-dependencies as they allow the app to call
27     * vkCmdPipelineBarrier() inside the render pass and the driver should only
28     * do the barrier when called, not when starting the render pass.
29     *
30     * We cannot decide whether to allow gmem rendering before a barrier
31     * is actually emitted, so we delay the decision until then.
32     */
33    if (src == dst)
34       return;
35 
36    /* From the Vulkan 1.2.195 spec:
37     *
38     * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
39     *  dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
40     *  and access scopes instead are defined by the parameters of VkMemoryBarrier2."
41     */
42    const VkMemoryBarrier2 *barrier =
43       vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
44    VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
45    VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
46    VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
47    VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
48 
49    /* We can conceptually break down the process of rewriting a sysmem
50     * renderpass into a gmem one into two parts:
51     *
52     * 1. Split each draw and multisample resolve into N copies, one for each
53     * bin. (If hardware binning, add one more copy where the FS is disabled
54     * for the binning pass). This is always allowed because the vertex stage
55     * is allowed to run an arbitrary number of times and there are no extra
56     * ordering constraints within a draw.
57     * 2. Take the last copy of the second-to-last draw and slide it down to
58     * before the last copy of the last draw. Repeat for each earlier draw
59     * until the draw pass for the last bin is complete, then repeat for each
60     * earlier bin until we finish with the first bin.
61     *
62     * During this rearranging process, we can't slide draws past each other in
63     * a way that breaks the subpass dependencies. For each draw, we must slide
64     * it past (copies of) the rest of the draws in the renderpass. We can
65     * slide a draw past another if there isn't a dependency between them, or
66     * if the dependenc(ies) are dependencies between framebuffer-space stages
67     * only with the BY_REGION bit set. Note that this includes
68     * self-dependencies, since these may result in pipeline barriers that also
69     * break the rearranging process.
70     */
71 
72    if (!vk_subpass_dependency_is_fb_local(dep, src_stage_mask, dst_stage_mask)) {
73       perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
74       for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
75          pass->gmem_pixels[i] = 0;
76    }
77 
78    struct tu_subpass_barrier *dst_barrier;
79    if (dst == VK_SUBPASS_EXTERNAL) {
80       dst_barrier = &pass->end_barrier;
81    } else {
82       dst_barrier = &pass->subpasses[dst].start_barrier;
83    }
84 
85    dst_barrier->src_stage_mask |= src_stage_mask;
86    dst_barrier->dst_stage_mask |= dst_stage_mask;
87    dst_barrier->src_access_mask |= src_access_mask;
88    dst_barrier->dst_access_mask |= dst_access_mask;
89 }
90 
91 /* We currently only care about undefined layouts, because we have to
92  * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
93  * UNDEFINED for anything not linear tiled, but we don't know yet whether the
94  * images used are tiled, so just assume they are.
95  */
96 
97 static bool
layout_undefined(VkImageLayout layout)98 layout_undefined(VkImageLayout layout)
99 {
100    return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
101           layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
102 }
103 
104 /* This implements the following bit of spec text:
105  *
106  *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
107  *    first subpass that uses an attachment, then an implicit subpass
108  *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
109  *    used in. The implicit subpass dependency only exists if there
110  *    exists an automatic layout transition away from initialLayout.
111  *    The subpass dependency operates as if defined with the
112  *    following parameters:
113  *
114  *    VkSubpassDependency implicitDependency = {
115  *        .srcSubpass = VK_SUBPASS_EXTERNAL;
116  *        .dstSubpass = firstSubpass; // First subpass attachment is used in
117  *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
118  *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
119  *        .srcAccessMask = 0;
120  *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
121  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
122  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
123  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
124  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
125  *        .dependencyFlags = 0;
126  *    };
127  *
128  *    Similarly, if there is no subpass dependency from the last subpass
129  *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
130  *    subpass dependency exists from the last subpass it is used in to
131  *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
132  *    if there exists an automatic layout transition into finalLayout.
133  *    The subpass dependency operates as if defined with the following
134  *    parameters:
135  *
136  *    VkSubpassDependency implicitDependency = {
137  *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
138  *        .dstSubpass = VK_SUBPASS_EXTERNAL;
139  *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
140  *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
141  *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
142  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
143  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
144  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
145  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
146  *        .dstAccessMask = 0;
147  *        .dependencyFlags = 0;
148  *    };
149  *
150  * Note: currently this is the only use we have for layout transitions,
151  * besides needing to invalidate CCU at the beginning, so we also flag
152  * transitions from UNDEFINED here.
153  */
154 static void
tu_render_pass_add_implicit_deps(struct tu_render_pass * pass,const VkRenderPassCreateInfo2 * info)155 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
156                                  const VkRenderPassCreateInfo2 *info)
157 {
158    const VkAttachmentDescription2* att = info->pAttachments;
159    bool has_external_src[info->subpassCount];
160    bool has_external_dst[info->subpassCount];
161    bool att_used[pass->attachment_count];
162 
163    memset(has_external_src, 0, sizeof(has_external_src));
164    memset(has_external_dst, 0, sizeof(has_external_dst));
165 
166    for (uint32_t i = 0; i < info->dependencyCount; i++) {
167       uint32_t src = info->pDependencies[i].srcSubpass;
168       uint32_t dst = info->pDependencies[i].dstSubpass;
169 
170       if (src == dst)
171          continue;
172 
173       if (src == VK_SUBPASS_EXTERNAL)
174          has_external_src[dst] = true;
175       if (dst == VK_SUBPASS_EXTERNAL)
176          has_external_dst[src] = true;
177    }
178 
179    memset(att_used, 0, sizeof(att_used));
180 
181    for (unsigned i = 0; i < info->subpassCount; i++) {
182       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
183       bool src_implicit_dep = false;
184 
185       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
186          uint32_t a = subpass->pInputAttachments[j].attachment;
187 
188          if (a == VK_ATTACHMENT_UNUSED)
189             continue;
190 
191          uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
192                vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
193                VK_IMAGE_LAYOUT_UNDEFINED;
194          uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
195 
196          if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
197              stencil_initial_layout != stencil_layout) &&
198              !att_used[a] && !has_external_src[i])
199             src_implicit_dep = true;
200          att_used[a] = true;
201       }
202 
203       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
204          uint32_t a = subpass->pColorAttachments[j].attachment;
205          if (a == VK_ATTACHMENT_UNUSED)
206             continue;
207          if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
208              !att_used[a] && !has_external_src[i])
209             src_implicit_dep = true;
210          att_used[a] = true;
211       }
212 
213       if (subpass->pDepthStencilAttachment &&
214           subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
215          uint32_t a = subpass->pDepthStencilAttachment->attachment;
216          uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
217          uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
218 
219          if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
220              stencil_initial_layout != stencil_layout) &&
221              !att_used[a] && !has_external_src[i]) {
222             src_implicit_dep = true;
223          }
224          att_used[a] = true;
225       }
226 
227       if (subpass->pResolveAttachments) {
228          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
229             uint32_t a = subpass->pResolveAttachments[j].attachment;
230             if (a == VK_ATTACHMENT_UNUSED)
231                continue;
232             if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
233                !att_used[a] && !has_external_src[i])
234                src_implicit_dep = true;
235             att_used[a] = true;
236          }
237       }
238 
239       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
240          vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
241 
242       if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
243           ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
244             uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
245             uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
246             uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
247 
248             if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
249                 stencil_initial_layout != stencil_layout) &&
250                 !att_used[a] && !has_external_src[i])
251                src_implicit_dep = true;
252             att_used[a] = true;
253       }
254 
255       if (src_implicit_dep) {
256          const VkSubpassDependency2 dep = {
257             .srcSubpass = VK_SUBPASS_EXTERNAL,
258             .dstSubpass = i,
259             .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
260             .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
261             .srcAccessMask = 0,
262             .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
263                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
264                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
265                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
266                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
267             .dependencyFlags = 0,
268          };
269 
270          tu_render_pass_add_subpass_dep(pass, &dep);
271       }
272    }
273 
274    memset(att_used, 0, sizeof(att_used));
275 
276    for (int i = info->subpassCount - 1; i >= 0; i--) {
277       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
278       bool dst_implicit_dep = false;
279 
280       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
281          uint32_t a = subpass->pInputAttachments[j].attachment;
282          if (a == VK_ATTACHMENT_UNUSED)
283             continue;
284 
285          uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
286                vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
287                VK_IMAGE_LAYOUT_UNDEFINED;
288          uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
289 
290          if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
291              stencil_final_layout != stencil_layout) &&
292              !att_used[a] && !has_external_dst[i])
293             dst_implicit_dep = true;
294          att_used[a] = true;
295       }
296 
297       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
298          uint32_t a = subpass->pColorAttachments[j].attachment;
299          if (a == VK_ATTACHMENT_UNUSED)
300             continue;
301          if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
302              !att_used[a] && !has_external_dst[i])
303             dst_implicit_dep = true;
304          att_used[a] = true;
305       }
306 
307       if (subpass->pDepthStencilAttachment &&
308           subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
309          uint32_t a = subpass->pDepthStencilAttachment->attachment;
310          uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
311          uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
312 
313          if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
314              stencil_final_layout != stencil_layout) &&
315              !att_used[a] && !has_external_dst[i]) {
316             dst_implicit_dep = true;
317          }
318          att_used[a] = true;
319       }
320 
321       if (subpass->pResolveAttachments) {
322          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
323             uint32_t a = subpass->pResolveAttachments[j].attachment;
324             if (a == VK_ATTACHMENT_UNUSED)
325                continue;
326             if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
327                 !att_used[a] && !has_external_dst[i])
328                dst_implicit_dep = true;
329             att_used[a] = true;
330          }
331       }
332 
333       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
334          vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
335 
336       if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
337           ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
338             uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
339             uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
340             uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
341 
342             if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
343                 stencil_final_layout != stencil_layout) &&
344                 !att_used[a] && !has_external_src[i])
345                dst_implicit_dep = true;
346             att_used[a] = true;
347       }
348 
349       if (dst_implicit_dep) {
350          VkSubpassDependency2 dep = {
351             .srcSubpass = i,
352             .dstSubpass = VK_SUBPASS_EXTERNAL,
353             .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
354             .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
355             .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
356                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
357                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
358                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
359                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
360             .dstAccessMask = 0,
361             .dependencyFlags = 0,
362          };
363          tu_render_pass_add_subpass_dep(pass, &dep);
364       }
365    }
366 
367    /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
368     * Assume that if an attachment has an initial layout of UNDEFINED, it gets
369     * transitioned eventually.
370     */
371    for (unsigned i = 0; i < info->attachmentCount; i++) {
372       if (layout_undefined(att[i].initialLayout)) {
373          if (vk_format_is_depth_or_stencil(att[i].format)) {
374             pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
375          } else {
376             pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
377          }
378       }
379    }
380 }
381 
382 /* If an input attachment is used without an intervening write to the same
383  * attachment, then we can just use the original image, even in GMEM mode.
384  * This is an optimization, but it's also important because it allows us to
385  * avoid having to invalidate UCHE at the beginning of each tile due to it
386  * becoming invalid. The only reads of GMEM via UCHE should be after an
387  * earlier subpass modified it, which only works if there's already an
388  * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
389  * don't consider this in the dependency code, so this is also required for
390  * correctness.
391  */
392 static void
tu_render_pass_patch_input_gmem(struct tu_render_pass * pass)393 tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
394 {
395    bool written[pass->attachment_count];
396 
397    memset(written, 0, sizeof(written));
398 
399    for (unsigned i = 0; i < pass->subpass_count; i++) {
400       struct tu_subpass *subpass = &pass->subpasses[i];
401 
402       for (unsigned j = 0; j < subpass->input_count; j++) {
403          uint32_t a = subpass->input_attachments[j].attachment;
404          if (a == VK_ATTACHMENT_UNUSED)
405             continue;
406          subpass->input_attachments[j].patch_input_gmem = written[a];
407       }
408 
409       for (unsigned j = 0; j < subpass->color_count; j++) {
410          uint32_t a = subpass->color_attachments[j].attachment;
411          if (a == VK_ATTACHMENT_UNUSED)
412             continue;
413          written[a] = true;
414 
415          for (unsigned k = 0; k < subpass->input_count; k++) {
416             if (subpass->input_attachments[k].attachment == a &&
417                 !subpass->input_attachments[k].patch_input_gmem) {
418                /* For render feedback loops, we have no idea whether the use
419                 * as a color attachment or input attachment will come first,
420                 * so we have to always use GMEM in case the color attachment
421                 * comes first and defensively invalidate UCHE in case the
422                 * input attachment comes first.
423                 */
424                subpass->feedback_invalidate = true;
425                subpass->input_attachments[k].patch_input_gmem = true;
426             }
427          }
428       }
429 
430       for (unsigned j = 0; j < subpass->resolve_count; j++) {
431          uint32_t a = subpass->resolve_attachments[j].attachment;
432          if (a == VK_ATTACHMENT_UNUSED)
433             continue;
434          written[a] = true;
435       }
436 
437       if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
438          written[subpass->depth_stencil_attachment.attachment] = true;
439          for (unsigned k = 0; k < subpass->input_count; k++) {
440             if (subpass->input_attachments[k].attachment ==
441                 subpass->depth_stencil_attachment.attachment &&
442                 !subpass->input_attachments[k].patch_input_gmem) {
443                subpass->feedback_invalidate = true;
444                subpass->input_attachments[k].patch_input_gmem = true;
445             }
446          }
447       }
448    }
449 }
450 
451 static void
tu_render_pass_check_feedback_loop(struct tu_render_pass * pass)452 tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
453 {
454    for (unsigned i = 0; i < pass->subpass_count; i++) {
455       struct tu_subpass *subpass = &pass->subpasses[i];
456 
457       for (unsigned j = 0; j < subpass->color_count; j++) {
458          uint32_t a = subpass->color_attachments[j].attachment;
459          if (a == VK_ATTACHMENT_UNUSED)
460             continue;
461          for (unsigned k = 0; k < subpass->input_count; k++) {
462             if (subpass->input_attachments[k].attachment == a) {
463                subpass->feedback_loop_color = true;
464                break;
465             }
466          }
467       }
468 
469       if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
470          for (unsigned k = 0; k < subpass->input_count; k++) {
471             if (subpass->input_attachments[k].attachment ==
472                 subpass->depth_stencil_attachment.attachment) {
473                subpass->feedback_loop_ds = true;
474                break;
475             }
476          }
477       }
478    }
479 }
480 
update_samples(struct tu_subpass * subpass,VkSampleCountFlagBits samples)481 static void update_samples(struct tu_subpass *subpass,
482                            VkSampleCountFlagBits samples)
483 {
484    assert(subpass->samples == 0 || subpass->samples == samples);
485    subpass->samples = samples;
486 }
487 
488 static void
tu_render_pass_calc_views(struct tu_render_pass * pass)489 tu_render_pass_calc_views(struct tu_render_pass *pass)
490 {
491    uint32_t view_mask = 0;
492    for (unsigned i = 0; i < pass->subpass_count; i++)
493       view_mask |= pass->subpasses[i].multiview_mask;
494    pass->num_views = util_last_bit(view_mask);
495 }
496 
497 /* If there are any multisample attachments with a load op other than
498  * clear/don't-care/none and store op other than don't-care/none, then we'd
499  * have to load/store a scaled multisample image which doesn't make much
500  * sense. Just disable fragment_density_map in this case.
501  */
502 static bool
tu_render_pass_disable_fdm(struct tu_render_pass * pass)503 tu_render_pass_disable_fdm(struct tu_render_pass *pass)
504 {
505    for (uint32_t i = 0; i < pass->attachment_count; i++) {
506       struct tu_render_pass_attachment *att = &pass->attachments[i];
507 
508       if (att->samples > 1 &&
509           (att->load || att->load_stencil ||
510            att->store || att->store_stencil)) {
511          return true;
512       }
513    }
514 
515    return false;
516 }
517 
518 static void
tu_render_pass_calc_hash(struct tu_render_pass * pass)519 tu_render_pass_calc_hash(struct tu_render_pass *pass)
520 {
521    #define HASH(hash, data) XXH64(&(data), sizeof(data), hash)
522 
523    uint64_t hash = HASH(0, pass->attachment_count);
524    hash = XXH64(pass->attachments,
525          pass->attachment_count * sizeof(pass->attachments[0]), hash);
526    hash = HASH(hash, pass->subpass_count);
527    for (unsigned i = 0; i < pass->subpass_count; i++) {
528       hash = HASH(hash, pass->subpasses[i].samples);
529       hash = HASH(hash, pass->subpasses[i].input_count);
530       hash = HASH(hash, pass->subpasses[i].color_count);
531       hash = HASH(hash, pass->subpasses[i].resolve_count);
532    }
533 
534    pass->autotune_hash = hash;
535 
536    #undef HASH
537 }
538 
539 static void
tu_render_pass_cond_config(struct tu_device * device,struct tu_render_pass * pass)540 tu_render_pass_cond_config(struct tu_device *device,
541                            struct tu_render_pass *pass)
542 {
543    /* With generic clears CmdClearAttachments isn't a draw and doesn't
544     * contribute to bin's geometry.
545     */
546    if (device->physical_device->info->a7xx.has_generic_clear)
547       return;
548 
549    for (uint32_t i = 0; i < pass->attachment_count; i++) {
550       struct tu_render_pass_attachment *att = &pass->attachments[i];
551 
552       /* When there is no geometry in a tile, and there is no other operations to
553        * read/write the tile, we can skip load/store.
554        *
555        * The only other operations are clear and resolve, which disable
556        * conditional load/store.
557        */
558       att->cond_load_allowed =
559          (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
560       att->cond_store_allowed =
561          (att->store || att->store_stencil) && !att->clear_mask;
562 
563       pass->has_cond_load_store |=
564          att->cond_load_allowed | att->cond_store_allowed;
565    }
566 }
567 
568 static void
tu_render_pass_gmem_config(struct tu_render_pass * pass,const struct tu_physical_device * phys_dev)569 tu_render_pass_gmem_config(struct tu_render_pass *pass,
570                            const struct tu_physical_device *phys_dev)
571 {
572    for (enum tu_gmem_layout layout = (enum tu_gmem_layout) 0;
573         layout < TU_GMEM_LAYOUT_COUNT;
574         layout = (enum tu_gmem_layout)(layout + 1)) {
575       /* log2(gmem_align/(tile_align_w*tile_align_h)) */
576       uint32_t block_align_shift = 3;
577       uint32_t tile_align_w = phys_dev->info->tile_align_w;
578       uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
579                             phys_dev->info->tile_align_h;
580 
581       /* calculate total bytes per pixel */
582       uint32_t cpp_total = 0;
583       uint32_t min_cpp = UINT32_MAX;
584       for (uint32_t i = 0; i < pass->attachment_count; i++) {
585          struct tu_render_pass_attachment *att = &pass->attachments[i];
586          bool cpp1 = (att->cpp == 1);
587          if (att->gmem) {
588             cpp_total += att->cpp;
589             min_cpp = MIN2(min_cpp, att->cpp);
590 
591             /* take into account the separate stencil: */
592             if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
593                min_cpp = MIN2(min_cpp, att->samples);
594                cpp1 = (att->samples == 1);
595                cpp_total += att->samples;
596             }
597 
598             /* texture pitch must be aligned to 64, use a tile_align_w that is
599              * a multiple of 64 for cpp==1 attachment to work as input
600              * attachment
601              */
602             if (cpp1 && tile_align_w % 64 != 0) {
603                tile_align_w *= 2;
604                block_align_shift -= 1;
605             }
606          }
607       }
608 
609       pass->tile_align_w = tile_align_w;
610       pass->min_cpp = min_cpp;
611 
612       /* no gmem attachments */
613       if (cpp_total == 0) {
614          /* any value non-zero value so tiling config works with no
615           * attachments
616           */
617          pass->gmem_pixels[layout] = 1024 * 1024;
618          continue;
619       }
620 
621       /* TODO: this algorithm isn't optimal
622        * for example, two attachments with cpp = {1, 4}
623        * result:  nblocks = {12, 52}, pixels = 196608
624        * optimal: nblocks = {13, 51}, pixels = 208896
625        */
626       uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
627                               ? phys_dev->usable_gmem_size_gmem
628                               : phys_dev->ccu_offset_gmem;
629       uint32_t gmem_blocks = gmem_size / gmem_align;
630       uint32_t offset = 0, pixels = ~0u, i;
631       for (i = 0; i < pass->attachment_count; i++) {
632          struct tu_render_pass_attachment *att = &pass->attachments[i];
633          if (!att->gmem)
634             continue;
635 
636          att->gmem_offset[layout] = offset;
637 
638          uint32_t align = MAX2(1, att->cpp >> block_align_shift);
639          uint32_t nblocks =
640             MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
641 
642          if (nblocks > gmem_blocks)
643             break;
644 
645          gmem_blocks -= nblocks;
646          cpp_total -= att->cpp;
647          offset += nblocks * gmem_align;
648          pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
649 
650          /* repeat the same for separate stencil */
651          if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
652             att->gmem_offset_stencil[layout] = offset;
653 
654             /* note: for s8_uint, block align is always 1 */
655             uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
656             if (nblocks > gmem_blocks)
657                break;
658 
659             gmem_blocks -= nblocks;
660             cpp_total -= att->samples;
661             offset += nblocks * gmem_align;
662             pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
663          }
664       }
665 
666       /* if the loop didn't complete then the gmem config is impossible */
667       if (i == pass->attachment_count)
668          pass->gmem_pixels[layout] = pixels;
669    }
670 }
671 
672 static void
tu_render_pass_bandwidth_config(struct tu_render_pass * pass)673 tu_render_pass_bandwidth_config(struct tu_render_pass *pass)
674 {
675    pass->gmem_bandwidth_per_pixel = 0;
676    pass->sysmem_bandwidth_per_pixel = 0;
677 
678    for (uint32_t i = 0; i < pass->attachment_count; i++) {
679       const struct tu_render_pass_attachment *att = &pass->attachments[i];
680 
681       /* approximate tu_load_gmem_attachment */
682       if (att->load)
683          pass->gmem_bandwidth_per_pixel += att->cpp;
684 
685       /* approximate tu_store_gmem_attachment */
686       if (att->store)
687          pass->gmem_bandwidth_per_pixel += att->cpp;
688 
689       /* approximate tu_clear_sysmem_attachment */
690       if (att->clear_mask)
691          pass->sysmem_bandwidth_per_pixel += att->cpp;
692 
693       /* approximate tu6_emit_sysmem_resolves */
694       if (att->will_be_resolved) {
695          pass->sysmem_bandwidth_per_pixel +=
696             att->cpp + att->cpp / att->samples;
697       }
698    }
699 }
700 
701 static void
attachment_set_ops(struct tu_device * device,struct tu_render_pass_attachment * att,VkAttachmentLoadOp load_op,VkAttachmentLoadOp stencil_load_op,VkAttachmentStoreOp store_op,VkAttachmentStoreOp stencil_store_op)702 attachment_set_ops(struct tu_device *device,
703                    struct tu_render_pass_attachment *att,
704                    VkAttachmentLoadOp load_op,
705                    VkAttachmentLoadOp stencil_load_op,
706                    VkAttachmentStoreOp store_op,
707                    VkAttachmentStoreOp stencil_store_op)
708 {
709    if (unlikely(device->instance->dont_care_as_load)) {
710       if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
711          load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
712       if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
713          stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
714    }
715 
716    /* load/store ops */
717    att->clear_mask =
718       (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
719    att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
720    att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
721 
722    bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
723    bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
724    bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
725 
726    switch (att->format) {
727    case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
728       if (att->clear_mask)
729          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
730       if (stencil_clear)
731          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
732       if (stencil_load)
733          att->load = true;
734       if (stencil_store)
735          att->store = true;
736       /* If depth or stencil is passthrough (STORE_OP_NONE), then we need to
737        * preserve the contents when storing by loading even if neither
738        * component needs to be loaded.
739        */
740       if ((store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT ||
741            stencil_store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT) &&
742           att->store) {
743          att->load = true;
744       }
745       break;
746    case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
747       att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
748       att->load = stencil_load;
749       att->store = stencil_store;
750       break;
751    case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
752       if (att->clear_mask)
753          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
754       if (stencil_clear)
755          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
756       if (stencil_load)
757          att->load_stencil = true;
758       if (stencil_store)
759          att->store_stencil = true;
760       break;
761    default:
762       break;
763    }
764 }
765 
766 static bool
is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve * depth_stencil_resolve)767 is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
768 {
769    if (depth_stencil_resolve &&
770        depth_stencil_resolve->pDepthStencilResolveAttachment &&
771        depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
772       return true;
773    }
774    return false;
775 }
776 
777 static void
tu_subpass_use_attachment(struct tu_render_pass * pass,int i,uint32_t a,const VkRenderPassCreateInfo2 * pCreateInfo)778 tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
779 {
780    struct tu_subpass *subpass = &pass->subpasses[i];
781    struct tu_render_pass_attachment *att = &pass->attachments[a];
782 
783    att->gmem = true;
784    update_samples(subpass, pCreateInfo->pAttachments[a].samples);
785    att->clear_views |= subpass->multiview_mask;
786 
787    /* Loads and clears are emitted at the start of the subpass that needs them. */
788    att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
789 
790    /* Stores are emitted at vkEndRenderPass() time. */
791    if (att->store || att->store_stencil)
792       att->last_subpass_idx = pass->subpass_count - 1;
793    else
794       att->last_subpass_idx = MAX2(i, att->last_subpass_idx);
795 }
796 
797 static void
tu_subpass_resolve_attachment(struct tu_render_pass * pass,int i,uint32_t dst_a,uint32_t src_a)798 tu_subpass_resolve_attachment(struct tu_render_pass *pass, int i, uint32_t dst_a, uint32_t src_a)
799 {
800    if (src_a != VK_ATTACHMENT_UNUSED && dst_a != VK_ATTACHMENT_UNUSED) {
801       struct tu_render_pass_attachment *src_att = &pass->attachments[src_a];
802       struct tu_render_pass_attachment *dst_att = &pass->attachments[dst_a];
803       src_att->will_be_resolved = true;
804 
805       src_att->first_subpass_idx = MIN2(i, src_att->first_subpass_idx);
806       src_att->last_subpass_idx = MAX2(i, src_att->last_subpass_idx);
807       dst_att->first_subpass_idx = MIN2(i, dst_att->first_subpass_idx);
808       dst_att->last_subpass_idx = MAX2(i, dst_att->last_subpass_idx);
809    }
810 }
811 
812 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)813 tu_CreateRenderPass2(VkDevice _device,
814                      const VkRenderPassCreateInfo2 *pCreateInfo,
815                      const VkAllocationCallbacks *pAllocator,
816                      VkRenderPass *pRenderPass)
817 {
818    VK_FROM_HANDLE(tu_device, device, _device);
819 
820    if (TU_DEBUG(DYNAMIC))
821       return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
822                                          pRenderPass);
823 
824    struct tu_render_pass *pass;
825    size_t size;
826    size_t attachments_offset;
827 
828    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
829 
830    size = sizeof(*pass);
831    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
832    attachments_offset = size;
833    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
834 
835    pass = (struct tu_render_pass *) vk_object_zalloc(
836       &device->vk, pAllocator, size, VK_OBJECT_TYPE_RENDER_PASS);
837    if (pass == NULL)
838       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
839 
840    pass->attachment_count = pCreateInfo->attachmentCount;
841    pass->subpass_count = pCreateInfo->subpassCount;
842    pass->attachments =
843       (struct tu_render_pass_attachment *) ((char *) pass +
844                                             attachments_offset);
845 
846    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
847       struct tu_render_pass_attachment *att = &pass->attachments[i];
848 
849       att->format = pCreateInfo->pAttachments[i].format;
850       att->samples = pCreateInfo->pAttachments[i].samples;
851       /* for d32s8, cpp is for the depth image, and
852        * att->samples will be used as the cpp for the stencil image
853        */
854       if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
855          att->cpp = 4 * att->samples;
856       else
857          att->cpp = vk_format_get_blocksize(att->format) * att->samples;
858       /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
859       att->gmem = false;
860 
861       VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
862       VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
863 
864       attachment_set_ops(device, att, loadOp, stencilLoadOp,
865                          pCreateInfo->pAttachments[i].storeOp,
866                          pCreateInfo->pAttachments[i].stencilStoreOp);
867 
868       att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
869       att->last_subpass_idx = 0;
870    }
871    uint32_t subpass_attachment_count = 0;
872    struct tu_subpass_attachment *p;
873    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
874       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
875       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
876          vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
877 
878       subpass_attachment_count +=
879          desc->inputAttachmentCount + desc->colorAttachmentCount +
880          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
881          (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
882    }
883 
884    if (subpass_attachment_count) {
885       pass->subpass_attachments = (struct tu_subpass_attachment *) vk_alloc2(
886          &device->vk.alloc, pAllocator,
887          subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
888          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
889       if (pass->subpass_attachments == NULL) {
890          vk_object_free(&device->vk, pAllocator, pass);
891          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
892       }
893    } else
894       pass->subpass_attachments = NULL;
895 
896    const VkRenderPassFragmentDensityMapCreateInfoEXT *fdm_info =
897       vk_find_struct_const(pCreateInfo->pNext,
898                            RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT);
899    if (fdm_info && !tu_render_pass_disable_fdm(pass)) {
900       pass->fragment_density_map.attachment =
901          fdm_info->fragmentDensityMapAttachment.attachment;
902       pass->has_fdm = true;
903    } else {
904       pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
905    }
906 
907    if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
908       pass->has_fdm = true;
909 
910    p = pass->subpass_attachments;
911    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
912       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
913       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
914          vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
915       struct tu_subpass *subpass = &pass->subpasses[i];
916 
917       subpass->input_count = desc->inputAttachmentCount;
918       subpass->color_count = desc->colorAttachmentCount;
919       subpass->resolve_count = 0;
920       subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
921       subpass->samples = (VkSampleCountFlagBits) 0;
922       subpass->srgb_cntl = 0;
923       subpass->legacy_dithering_enabled = desc->flags &
924          VK_SUBPASS_DESCRIPTION_ENABLE_LEGACY_DITHERING_BIT_EXT;
925 
926       const BITMASK_ENUM(VkSubpassDescriptionFlagBits) raster_order_access_bits =
927          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT |
928          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_EXT |
929          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_EXT;
930 
931       subpass->raster_order_attachment_access = raster_order_access_bits & desc->flags;
932 
933       subpass->multiview_mask = desc->viewMask;
934 
935       if (desc->inputAttachmentCount > 0) {
936          subpass->input_attachments = p;
937          p += desc->inputAttachmentCount;
938 
939          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
940             uint32_t a = desc->pInputAttachments[j].attachment;
941             subpass->input_attachments[j].attachment = a;
942             if (a != VK_ATTACHMENT_UNUSED) {
943                struct tu_render_pass_attachment *att = &pass->attachments[a];
944                /* Note: attachments only used as input attachments will be read
945                 * directly instead of through gmem, so we don't mark input
946                 * attachments as needing gmem.
947                 */
948                att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
949                att->last_subpass_idx = MAX2(i, att->last_subpass_idx);
950             }
951          }
952       }
953 
954       if (desc->colorAttachmentCount > 0) {
955          subpass->color_attachments = p;
956          p += desc->colorAttachmentCount;
957 
958          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
959             uint32_t a = desc->pColorAttachments[j].attachment;
960             subpass->color_attachments[j].attachment = a;
961 
962             if (a != VK_ATTACHMENT_UNUSED) {
963                tu_subpass_use_attachment(pass, i, a, pCreateInfo);
964 
965                if (vk_format_is_srgb(pass->attachments[a].format))
966                   subpass->srgb_cntl |= 1 << j;
967             }
968          }
969       }
970 
971       subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
972       if (desc->pResolveAttachments) {
973          p += desc->colorAttachmentCount;
974          subpass->resolve_count += desc->colorAttachmentCount;
975          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
976             uint32_t a = desc->pResolveAttachments[j].attachment;
977             uint32_t src_a = desc->pColorAttachments[j].attachment;
978             subpass->resolve_attachments[j].attachment = a;
979 
980             tu_subpass_resolve_attachment(pass, i, a, src_a);
981          }
982       }
983 
984       if (subpass->resolve_depth_stencil) {
985          p++;
986          subpass->resolve_count++;
987          uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
988          uint32_t src_a = desc->pDepthStencilAttachment->attachment;
989          subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
990 
991          tu_subpass_resolve_attachment(pass, i, a, src_a);
992       }
993 
994       uint32_t a = desc->pDepthStencilAttachment ?
995          desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
996       subpass->depth_stencil_attachment.attachment = a;
997       subpass->depth_used = a != VK_ATTACHMENT_UNUSED;
998       subpass->stencil_used = a != VK_ATTACHMENT_UNUSED;
999       if (a != VK_ATTACHMENT_UNUSED) {
1000          tu_subpass_use_attachment(pass, i, a, pCreateInfo);
1001       }
1002    }
1003 
1004    tu_render_pass_patch_input_gmem(pass);
1005 
1006    tu_render_pass_check_feedback_loop(pass);
1007 
1008    /* disable unused attachments */
1009    for (uint32_t i = 0; i < pass->attachment_count; i++) {
1010       struct tu_render_pass_attachment *att = &pass->attachments[i];
1011       if (!att->gmem) {
1012          att->clear_mask = 0;
1013          att->load = false;
1014       }
1015    }
1016 
1017    tu_render_pass_cond_config(device, pass);
1018    tu_render_pass_gmem_config(pass, device->physical_device);
1019    tu_render_pass_bandwidth_config(pass);
1020    tu_render_pass_calc_views(pass);
1021    tu_render_pass_calc_hash(pass);
1022 
1023    for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
1024       tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
1025    }
1026 
1027    tu_render_pass_add_implicit_deps(pass, pCreateInfo);
1028 
1029    *pRenderPass = tu_render_pass_to_handle(pass);
1030 
1031    return VK_SUCCESS;
1032 }
1033 
1034 VKAPI_ATTR void VKAPI_CALL
tu_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)1035 tu_DestroyRenderPass(VkDevice _device,
1036                      VkRenderPass _pass,
1037                      const VkAllocationCallbacks *pAllocator)
1038 {
1039    VK_FROM_HANDLE(tu_device, device, _device);
1040 
1041    if (TU_DEBUG(DYNAMIC)) {
1042       vk_common_DestroyRenderPass(_device, _pass, pAllocator);
1043       return;
1044    }
1045 
1046    VK_FROM_HANDLE(tu_render_pass, pass, _pass);
1047 
1048    if (!_pass)
1049       return;
1050 
1051    vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
1052    vk_object_free(&device->vk, pAllocator, pass);
1053 }
1054 
1055 static void
tu_setup_dynamic_attachment(struct tu_render_pass_attachment * att,struct tu_image_view * view)1056 tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
1057                             struct tu_image_view *view)
1058 {
1059    *att = {};
1060    att->format = view->vk.format;
1061    att->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1062 
1063    /* for d32s8, cpp is for the depth image, and
1064     * att->samples will be used as the cpp for the stencil image
1065     */
1066    if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1067       att->cpp = 4 * att->samples;
1068    else
1069       att->cpp = vk_format_get_blocksize(att->format) * att->samples;
1070 }
1071 
1072 void
tu_setup_dynamic_render_pass(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * info)1073 tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
1074                              const VkRenderingInfo *info)
1075 {
1076    struct tu_device *device = cmd_buffer->device;
1077    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1078    struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1079 
1080    *pass = {};
1081    *subpass = {};
1082 
1083    pass->subpass_count = 1;
1084    pass->attachments = cmd_buffer->dynamic_rp_attachments;
1085 
1086    subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
1087    subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1088    subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
1089    subpass->multiview_mask = info->viewMask;
1090    subpass->legacy_dithering_enabled = info->flags &
1091       VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT;
1092 
1093    uint32_t a = 0;
1094    for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1095       struct tu_render_pass_attachment *att = &pass->attachments[a];
1096       const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1097 
1098       if (att_info->imageView == VK_NULL_HANDLE) {
1099          subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1100          subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1101          continue;
1102       }
1103 
1104       VK_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1105       tu_setup_dynamic_attachment(att, view);
1106       att->gmem = true;
1107       att->clear_views = info->viewMask;
1108       attachment_set_ops(device, att, att_info->loadOp,
1109                          VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
1110                          VK_ATTACHMENT_STORE_OP_DONT_CARE);
1111       subpass->color_attachments[i].attachment = a++;
1112 
1113       subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1114 
1115       if (vk_format_is_srgb(view->vk.format))
1116          subpass->srgb_cntl |= 1 << i;
1117 
1118       if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1119          struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1120          VK_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1121          tu_setup_dynamic_attachment(resolve_att, resolve_view);
1122          resolve_att->gmem = false;
1123          attachment_set_ops(
1124             device, resolve_att, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1125             VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_STORE,
1126             VK_ATTACHMENT_STORE_OP_DONT_CARE);
1127          subpass->resolve_attachments[i].attachment = a++;
1128          att->will_be_resolved = true;
1129       } else {
1130          subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1131          att->will_be_resolved = false;
1132       }
1133    }
1134 
1135    if (info->pDepthAttachment || info->pStencilAttachment) {
1136       const struct VkRenderingAttachmentInfo *common_info =
1137          (info->pDepthAttachment &&
1138           info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1139          info->pDepthAttachment :
1140          info->pStencilAttachment;
1141 
1142       if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1143          VK_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1144 
1145          struct tu_render_pass_attachment *att = &pass->attachments[a];
1146          tu_setup_dynamic_attachment(att, view);
1147          att->gmem = true;
1148          att->clear_views = info->viewMask;
1149          subpass->depth_stencil_attachment.attachment = a++;
1150 
1151          subpass->depth_used = (bool) info->pDepthAttachment;
1152          subpass->stencil_used = (bool) info->pStencilAttachment;
1153 
1154          attachment_set_ops(
1155             device, att,
1156             (info->pDepthAttachment && info->pDepthAttachment->imageView) ?
1157                info->pDepthAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT,
1158             (info->pStencilAttachment && info->pStencilAttachment->imageView) ?
1159                info->pStencilAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT,
1160             (info->pDepthAttachment && info->pDepthAttachment->imageView) ?
1161                info->pDepthAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT,
1162             (info->pStencilAttachment && info->pStencilAttachment->imageView) ?
1163                info->pStencilAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT);
1164 
1165          subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1166 
1167          if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1168             unsigned i = subpass->resolve_count++;
1169             struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1170             VK_FROM_HANDLE(tu_image_view, resolve_view,
1171                            common_info->resolveImageView);
1172             tu_setup_dynamic_attachment(resolve_att, resolve_view);
1173             resolve_att->gmem = false;
1174             attachment_set_ops(device, resolve_att,
1175                                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1176                                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1177                                VK_ATTACHMENT_STORE_OP_STORE,
1178                                VK_ATTACHMENT_STORE_OP_STORE);
1179             subpass->resolve_attachments[i].attachment = a++;
1180             att->will_be_resolved = true;
1181             subpass->resolve_depth_stencil = true;
1182          } else {
1183             att->will_be_resolved = false;
1184          }
1185       } else {
1186          subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1187       }
1188    } else {
1189       subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1190    }
1191 
1192    pass->attachment_count = a;
1193 
1194    const VkRenderingFragmentDensityMapAttachmentInfoEXT *fdm_info =
1195       vk_find_struct_const(info->pNext,
1196                            RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT);
1197    if (fdm_info && fdm_info->imageView != VK_NULL_HANDLE &&
1198        !tu_render_pass_disable_fdm(pass)) {
1199       VK_FROM_HANDLE(tu_image_view, view, fdm_info->imageView);
1200 
1201       struct tu_render_pass_attachment *att = &pass->attachments[a];
1202       tu_setup_dynamic_attachment(att, view);
1203       pass->fragment_density_map.attachment = a++;
1204       attachment_set_ops(device, att,
1205                          VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1206                          VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1207                          VK_ATTACHMENT_STORE_OP_DONT_CARE,
1208                          VK_ATTACHMENT_STORE_OP_DONT_CARE);
1209       pass->has_fdm = true;
1210    } else {
1211       pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
1212       pass->has_fdm = false;
1213    }
1214 
1215    if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
1216       pass->has_fdm = true;
1217 
1218    pass->attachment_count = a;
1219 
1220    tu_render_pass_cond_config(device, pass);
1221    tu_render_pass_gmem_config(pass, device->physical_device);
1222    tu_render_pass_bandwidth_config(pass);
1223    tu_render_pass_calc_views(pass);
1224    tu_render_pass_calc_hash(pass);
1225 }
1226 
1227 void
tu_setup_dynamic_inheritance(struct tu_cmd_buffer * cmd_buffer,const VkCommandBufferInheritanceRenderingInfo * info)1228 tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1229                              const VkCommandBufferInheritanceRenderingInfo *info)
1230 {
1231    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1232    struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1233 
1234    pass->subpass_count = 1;
1235    pass->attachments = cmd_buffer->dynamic_rp_attachments;
1236    pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
1237 
1238    subpass->color_count = info->colorAttachmentCount;
1239    subpass->resolve_count = 0;
1240    subpass->resolve_depth_stencil = false;
1241    subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1242    subpass->resolve_attachments = NULL;
1243    subpass->feedback_invalidate = false;
1244    subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1245    subpass->input_count = 0;
1246    subpass->samples = (VkSampleCountFlagBits) 0;
1247    subpass->srgb_cntl = 0;
1248    subpass->raster_order_attachment_access = false;
1249    subpass->multiview_mask = info->viewMask;
1250    subpass->samples = info->rasterizationSamples;
1251 
1252    unsigned a = 0;
1253    for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1254       struct tu_render_pass_attachment *att = &pass->attachments[a];
1255       VkFormat format = info->pColorAttachmentFormats[i];
1256 
1257       if (format == VK_FORMAT_UNDEFINED) {
1258          subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1259          continue;
1260       }
1261 
1262       att->format = format;
1263       att->samples = info->rasterizationSamples;
1264       subpass->samples = info->rasterizationSamples;
1265       subpass->color_attachments[i].attachment = a++;
1266 
1267       /* conservatively assume that the attachment may be conditionally
1268        * loaded/stored.
1269        */
1270       att->cond_load_allowed = att->cond_store_allowed = true;
1271    }
1272 
1273    if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1274        info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1275       struct tu_render_pass_attachment *att = &pass->attachments[a];
1276       att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1277          info->depthAttachmentFormat : info->stencilAttachmentFormat;
1278       att->samples = info->rasterizationSamples;
1279       subpass->depth_stencil_attachment.attachment = a++;
1280       subpass->depth_used =
1281          info->depthAttachmentFormat != VK_FORMAT_UNDEFINED;
1282       subpass->stencil_used =
1283          info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED;
1284       att->cond_load_allowed = att->cond_store_allowed = true;
1285    } else {
1286       subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1287       subpass->depth_used = false;
1288       subpass->stencil_used = false;
1289    }
1290 
1291    tu_render_pass_calc_views(pass);
1292 }
1293 
1294 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)1295 tu_GetRenderAreaGranularity(VkDevice _device,
1296                             VkRenderPass renderPass,
1297                             VkExtent2D *pGranularity)
1298 {
1299    VK_FROM_HANDLE(tu_device, device, _device);
1300    pGranularity->width = device->physical_device->info->gmem_align_w;
1301    pGranularity->height = device->physical_device->info->gmem_align_h;
1302 }
1303 
1304 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderingAreaGranularityKHR(VkDevice _device,const VkRenderingAreaInfoKHR * pRenderingAreaInfo,VkExtent2D * pGranularity)1305 tu_GetRenderingAreaGranularityKHR(VkDevice _device,
1306                                   const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
1307                                   VkExtent2D *pGranularity)
1308 {
1309    VK_FROM_HANDLE(tu_device, device, _device);
1310    pGranularity->width = device->physical_device->info->gmem_align_w;
1311    pGranularity->height = device->physical_device->info->gmem_align_h;
1312 }
1313 
1314 uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass * subpass,uint32_t index)1315 tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1316 {
1317    if (subpass->resolve_depth_stencil &&
1318        index == (subpass->resolve_count - 1))
1319       return subpass->depth_stencil_attachment.attachment;
1320 
1321    return subpass->color_attachments[index].attachment;
1322 }
1323