1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_pass.h"
11
12 #include "vk_util.h"
13 #include "vk_render_pass.h"
14
15 #include "tu_cmd_buffer.h"
16 #include "tu_device.h"
17 #include "tu_image.h"
18
19 static void
tu_render_pass_add_subpass_dep(struct tu_render_pass * pass,const VkSubpassDependency2 * dep)20 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
21 const VkSubpassDependency2 *dep)
22 {
23 uint32_t src = dep->srcSubpass;
24 uint32_t dst = dep->dstSubpass;
25
26 /* Ignore subpass self-dependencies as they allow the app to call
27 * vkCmdPipelineBarrier() inside the render pass and the driver should only
28 * do the barrier when called, not when starting the render pass.
29 *
30 * We cannot decide whether to allow gmem rendering before a barrier
31 * is actually emitted, so we delay the decision until then.
32 */
33 if (src == dst)
34 return;
35
36 /* From the Vulkan 1.2.195 spec:
37 *
38 * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
39 * dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
40 * and access scopes instead are defined by the parameters of VkMemoryBarrier2."
41 */
42 const VkMemoryBarrier2 *barrier =
43 vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
44 VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
45 VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
46 VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
47 VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
48
49 /* We can conceptually break down the process of rewriting a sysmem
50 * renderpass into a gmem one into two parts:
51 *
52 * 1. Split each draw and multisample resolve into N copies, one for each
53 * bin. (If hardware binning, add one more copy where the FS is disabled
54 * for the binning pass). This is always allowed because the vertex stage
55 * is allowed to run an arbitrary number of times and there are no extra
56 * ordering constraints within a draw.
57 * 2. Take the last copy of the second-to-last draw and slide it down to
58 * before the last copy of the last draw. Repeat for each earlier draw
59 * until the draw pass for the last bin is complete, then repeat for each
60 * earlier bin until we finish with the first bin.
61 *
62 * During this rearranging process, we can't slide draws past each other in
63 * a way that breaks the subpass dependencies. For each draw, we must slide
64 * it past (copies of) the rest of the draws in the renderpass. We can
65 * slide a draw past another if there isn't a dependency between them, or
66 * if the dependenc(ies) are dependencies between framebuffer-space stages
67 * only with the BY_REGION bit set. Note that this includes
68 * self-dependencies, since these may result in pipeline barriers that also
69 * break the rearranging process.
70 */
71
72 if (!vk_subpass_dependency_is_fb_local(dep, src_stage_mask, dst_stage_mask)) {
73 perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
74 for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
75 pass->gmem_pixels[i] = 0;
76 }
77
78 struct tu_subpass_barrier *dst_barrier;
79 if (dst == VK_SUBPASS_EXTERNAL) {
80 dst_barrier = &pass->end_barrier;
81 } else {
82 dst_barrier = &pass->subpasses[dst].start_barrier;
83 }
84
85 dst_barrier->src_stage_mask |= src_stage_mask;
86 dst_barrier->dst_stage_mask |= dst_stage_mask;
87 dst_barrier->src_access_mask |= src_access_mask;
88 dst_barrier->dst_access_mask |= dst_access_mask;
89 }
90
91 /* We currently only care about undefined layouts, because we have to
92 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
93 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
94 * images used are tiled, so just assume they are.
95 */
96
97 static bool
layout_undefined(VkImageLayout layout)98 layout_undefined(VkImageLayout layout)
99 {
100 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
101 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
102 }
103
104 /* This implements the following bit of spec text:
105 *
106 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
107 * first subpass that uses an attachment, then an implicit subpass
108 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
109 * used in. The implicit subpass dependency only exists if there
110 * exists an automatic layout transition away from initialLayout.
111 * The subpass dependency operates as if defined with the
112 * following parameters:
113 *
114 * VkSubpassDependency implicitDependency = {
115 * .srcSubpass = VK_SUBPASS_EXTERNAL;
116 * .dstSubpass = firstSubpass; // First subpass attachment is used in
117 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
118 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
119 * .srcAccessMask = 0;
120 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
121 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
122 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
123 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
124 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
125 * .dependencyFlags = 0;
126 * };
127 *
128 * Similarly, if there is no subpass dependency from the last subpass
129 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
130 * subpass dependency exists from the last subpass it is used in to
131 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
132 * if there exists an automatic layout transition into finalLayout.
133 * The subpass dependency operates as if defined with the following
134 * parameters:
135 *
136 * VkSubpassDependency implicitDependency = {
137 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
138 * .dstSubpass = VK_SUBPASS_EXTERNAL;
139 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
140 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
141 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
142 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
143 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
144 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
145 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
146 * .dstAccessMask = 0;
147 * .dependencyFlags = 0;
148 * };
149 *
150 * Note: currently this is the only use we have for layout transitions,
151 * besides needing to invalidate CCU at the beginning, so we also flag
152 * transitions from UNDEFINED here.
153 */
154 static void
tu_render_pass_add_implicit_deps(struct tu_render_pass * pass,const VkRenderPassCreateInfo2 * info)155 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
156 const VkRenderPassCreateInfo2 *info)
157 {
158 const VkAttachmentDescription2* att = info->pAttachments;
159 bool has_external_src[info->subpassCount];
160 bool has_external_dst[info->subpassCount];
161 bool att_used[pass->attachment_count];
162
163 memset(has_external_src, 0, sizeof(has_external_src));
164 memset(has_external_dst, 0, sizeof(has_external_dst));
165
166 for (uint32_t i = 0; i < info->dependencyCount; i++) {
167 uint32_t src = info->pDependencies[i].srcSubpass;
168 uint32_t dst = info->pDependencies[i].dstSubpass;
169
170 if (src == dst)
171 continue;
172
173 if (src == VK_SUBPASS_EXTERNAL)
174 has_external_src[dst] = true;
175 if (dst == VK_SUBPASS_EXTERNAL)
176 has_external_dst[src] = true;
177 }
178
179 memset(att_used, 0, sizeof(att_used));
180
181 for (unsigned i = 0; i < info->subpassCount; i++) {
182 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
183 bool src_implicit_dep = false;
184
185 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
186 uint32_t a = subpass->pInputAttachments[j].attachment;
187
188 if (a == VK_ATTACHMENT_UNUSED)
189 continue;
190
191 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
192 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
193 VK_IMAGE_LAYOUT_UNDEFINED;
194 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
195
196 if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
197 stencil_initial_layout != stencil_layout) &&
198 !att_used[a] && !has_external_src[i])
199 src_implicit_dep = true;
200 att_used[a] = true;
201 }
202
203 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
204 uint32_t a = subpass->pColorAttachments[j].attachment;
205 if (a == VK_ATTACHMENT_UNUSED)
206 continue;
207 if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
208 !att_used[a] && !has_external_src[i])
209 src_implicit_dep = true;
210 att_used[a] = true;
211 }
212
213 if (subpass->pDepthStencilAttachment &&
214 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
215 uint32_t a = subpass->pDepthStencilAttachment->attachment;
216 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
217 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
218
219 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
220 stencil_initial_layout != stencil_layout) &&
221 !att_used[a] && !has_external_src[i]) {
222 src_implicit_dep = true;
223 }
224 att_used[a] = true;
225 }
226
227 if (subpass->pResolveAttachments) {
228 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
229 uint32_t a = subpass->pResolveAttachments[j].attachment;
230 if (a == VK_ATTACHMENT_UNUSED)
231 continue;
232 if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
233 !att_used[a] && !has_external_src[i])
234 src_implicit_dep = true;
235 att_used[a] = true;
236 }
237 }
238
239 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
240 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
241
242 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
243 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
244 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
245 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
246 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
247
248 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
249 stencil_initial_layout != stencil_layout) &&
250 !att_used[a] && !has_external_src[i])
251 src_implicit_dep = true;
252 att_used[a] = true;
253 }
254
255 if (src_implicit_dep) {
256 const VkSubpassDependency2 dep = {
257 .srcSubpass = VK_SUBPASS_EXTERNAL,
258 .dstSubpass = i,
259 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
260 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
261 .srcAccessMask = 0,
262 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
263 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
264 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
265 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
266 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
267 .dependencyFlags = 0,
268 };
269
270 tu_render_pass_add_subpass_dep(pass, &dep);
271 }
272 }
273
274 memset(att_used, 0, sizeof(att_used));
275
276 for (int i = info->subpassCount - 1; i >= 0; i--) {
277 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
278 bool dst_implicit_dep = false;
279
280 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
281 uint32_t a = subpass->pInputAttachments[j].attachment;
282 if (a == VK_ATTACHMENT_UNUSED)
283 continue;
284
285 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
286 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
287 VK_IMAGE_LAYOUT_UNDEFINED;
288 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
289
290 if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
291 stencil_final_layout != stencil_layout) &&
292 !att_used[a] && !has_external_dst[i])
293 dst_implicit_dep = true;
294 att_used[a] = true;
295 }
296
297 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
298 uint32_t a = subpass->pColorAttachments[j].attachment;
299 if (a == VK_ATTACHMENT_UNUSED)
300 continue;
301 if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
302 !att_used[a] && !has_external_dst[i])
303 dst_implicit_dep = true;
304 att_used[a] = true;
305 }
306
307 if (subpass->pDepthStencilAttachment &&
308 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
309 uint32_t a = subpass->pDepthStencilAttachment->attachment;
310 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
311 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
312
313 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
314 stencil_final_layout != stencil_layout) &&
315 !att_used[a] && !has_external_dst[i]) {
316 dst_implicit_dep = true;
317 }
318 att_used[a] = true;
319 }
320
321 if (subpass->pResolveAttachments) {
322 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
323 uint32_t a = subpass->pResolveAttachments[j].attachment;
324 if (a == VK_ATTACHMENT_UNUSED)
325 continue;
326 if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
327 !att_used[a] && !has_external_dst[i])
328 dst_implicit_dep = true;
329 att_used[a] = true;
330 }
331 }
332
333 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
334 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
335
336 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
337 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
338 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
339 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
340 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
341
342 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
343 stencil_final_layout != stencil_layout) &&
344 !att_used[a] && !has_external_src[i])
345 dst_implicit_dep = true;
346 att_used[a] = true;
347 }
348
349 if (dst_implicit_dep) {
350 VkSubpassDependency2 dep = {
351 .srcSubpass = i,
352 .dstSubpass = VK_SUBPASS_EXTERNAL,
353 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
354 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
355 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
356 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
357 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
358 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
359 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
360 .dstAccessMask = 0,
361 .dependencyFlags = 0,
362 };
363 tu_render_pass_add_subpass_dep(pass, &dep);
364 }
365 }
366
367 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
368 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
369 * transitioned eventually.
370 */
371 for (unsigned i = 0; i < info->attachmentCount; i++) {
372 if (layout_undefined(att[i].initialLayout)) {
373 if (vk_format_is_depth_or_stencil(att[i].format)) {
374 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
375 } else {
376 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
377 }
378 }
379 }
380 }
381
382 /* If an input attachment is used without an intervening write to the same
383 * attachment, then we can just use the original image, even in GMEM mode.
384 * This is an optimization, but it's also important because it allows us to
385 * avoid having to invalidate UCHE at the beginning of each tile due to it
386 * becoming invalid. The only reads of GMEM via UCHE should be after an
387 * earlier subpass modified it, which only works if there's already an
388 * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
389 * don't consider this in the dependency code, so this is also required for
390 * correctness.
391 */
392 static void
tu_render_pass_patch_input_gmem(struct tu_render_pass * pass)393 tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
394 {
395 bool written[pass->attachment_count];
396
397 memset(written, 0, sizeof(written));
398
399 for (unsigned i = 0; i < pass->subpass_count; i++) {
400 struct tu_subpass *subpass = &pass->subpasses[i];
401
402 for (unsigned j = 0; j < subpass->input_count; j++) {
403 uint32_t a = subpass->input_attachments[j].attachment;
404 if (a == VK_ATTACHMENT_UNUSED)
405 continue;
406 subpass->input_attachments[j].patch_input_gmem = written[a];
407 }
408
409 for (unsigned j = 0; j < subpass->color_count; j++) {
410 uint32_t a = subpass->color_attachments[j].attachment;
411 if (a == VK_ATTACHMENT_UNUSED)
412 continue;
413 written[a] = true;
414
415 for (unsigned k = 0; k < subpass->input_count; k++) {
416 if (subpass->input_attachments[k].attachment == a &&
417 !subpass->input_attachments[k].patch_input_gmem) {
418 /* For render feedback loops, we have no idea whether the use
419 * as a color attachment or input attachment will come first,
420 * so we have to always use GMEM in case the color attachment
421 * comes first and defensively invalidate UCHE in case the
422 * input attachment comes first.
423 */
424 subpass->feedback_invalidate = true;
425 subpass->input_attachments[k].patch_input_gmem = true;
426 }
427 }
428 }
429
430 for (unsigned j = 0; j < subpass->resolve_count; j++) {
431 uint32_t a = subpass->resolve_attachments[j].attachment;
432 if (a == VK_ATTACHMENT_UNUSED)
433 continue;
434 written[a] = true;
435 }
436
437 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
438 written[subpass->depth_stencil_attachment.attachment] = true;
439 for (unsigned k = 0; k < subpass->input_count; k++) {
440 if (subpass->input_attachments[k].attachment ==
441 subpass->depth_stencil_attachment.attachment &&
442 !subpass->input_attachments[k].patch_input_gmem) {
443 subpass->feedback_invalidate = true;
444 subpass->input_attachments[k].patch_input_gmem = true;
445 }
446 }
447 }
448 }
449 }
450
451 static void
tu_render_pass_check_feedback_loop(struct tu_render_pass * pass)452 tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
453 {
454 for (unsigned i = 0; i < pass->subpass_count; i++) {
455 struct tu_subpass *subpass = &pass->subpasses[i];
456
457 for (unsigned j = 0; j < subpass->color_count; j++) {
458 uint32_t a = subpass->color_attachments[j].attachment;
459 if (a == VK_ATTACHMENT_UNUSED)
460 continue;
461 for (unsigned k = 0; k < subpass->input_count; k++) {
462 if (subpass->input_attachments[k].attachment == a) {
463 subpass->feedback_loop_color = true;
464 break;
465 }
466 }
467 }
468
469 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
470 for (unsigned k = 0; k < subpass->input_count; k++) {
471 if (subpass->input_attachments[k].attachment ==
472 subpass->depth_stencil_attachment.attachment) {
473 subpass->feedback_loop_ds = true;
474 break;
475 }
476 }
477 }
478 }
479 }
480
update_samples(struct tu_subpass * subpass,VkSampleCountFlagBits samples)481 static void update_samples(struct tu_subpass *subpass,
482 VkSampleCountFlagBits samples)
483 {
484 assert(subpass->samples == 0 || subpass->samples == samples);
485 subpass->samples = samples;
486 }
487
488 static void
tu_render_pass_calc_views(struct tu_render_pass * pass)489 tu_render_pass_calc_views(struct tu_render_pass *pass)
490 {
491 uint32_t view_mask = 0;
492 for (unsigned i = 0; i < pass->subpass_count; i++)
493 view_mask |= pass->subpasses[i].multiview_mask;
494 pass->num_views = util_last_bit(view_mask);
495 }
496
497 /* If there are any multisample attachments with a load op other than
498 * clear/don't-care/none and store op other than don't-care/none, then we'd
499 * have to load/store a scaled multisample image which doesn't make much
500 * sense. Just disable fragment_density_map in this case.
501 */
502 static bool
tu_render_pass_disable_fdm(struct tu_render_pass * pass)503 tu_render_pass_disable_fdm(struct tu_render_pass *pass)
504 {
505 for (uint32_t i = 0; i < pass->attachment_count; i++) {
506 struct tu_render_pass_attachment *att = &pass->attachments[i];
507
508 if (att->samples > 1 &&
509 (att->load || att->load_stencil ||
510 att->store || att->store_stencil)) {
511 return true;
512 }
513 }
514
515 return false;
516 }
517
518 static void
tu_render_pass_calc_hash(struct tu_render_pass * pass)519 tu_render_pass_calc_hash(struct tu_render_pass *pass)
520 {
521 #define HASH(hash, data) XXH64(&(data), sizeof(data), hash)
522
523 uint64_t hash = HASH(0, pass->attachment_count);
524 hash = XXH64(pass->attachments,
525 pass->attachment_count * sizeof(pass->attachments[0]), hash);
526 hash = HASH(hash, pass->subpass_count);
527 for (unsigned i = 0; i < pass->subpass_count; i++) {
528 hash = HASH(hash, pass->subpasses[i].samples);
529 hash = HASH(hash, pass->subpasses[i].input_count);
530 hash = HASH(hash, pass->subpasses[i].color_count);
531 hash = HASH(hash, pass->subpasses[i].resolve_count);
532 }
533
534 pass->autotune_hash = hash;
535
536 #undef HASH
537 }
538
539 static void
tu_render_pass_cond_config(struct tu_device * device,struct tu_render_pass * pass)540 tu_render_pass_cond_config(struct tu_device *device,
541 struct tu_render_pass *pass)
542 {
543 /* With generic clears CmdClearAttachments isn't a draw and doesn't
544 * contribute to bin's geometry.
545 */
546 if (device->physical_device->info->a7xx.has_generic_clear)
547 return;
548
549 for (uint32_t i = 0; i < pass->attachment_count; i++) {
550 struct tu_render_pass_attachment *att = &pass->attachments[i];
551
552 /* When there is no geometry in a tile, and there is no other operations to
553 * read/write the tile, we can skip load/store.
554 *
555 * The only other operations are clear and resolve, which disable
556 * conditional load/store.
557 */
558 att->cond_load_allowed =
559 (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
560 att->cond_store_allowed =
561 (att->store || att->store_stencil) && !att->clear_mask;
562
563 pass->has_cond_load_store |=
564 att->cond_load_allowed | att->cond_store_allowed;
565 }
566 }
567
568 static void
tu_render_pass_gmem_config(struct tu_render_pass * pass,const struct tu_physical_device * phys_dev)569 tu_render_pass_gmem_config(struct tu_render_pass *pass,
570 const struct tu_physical_device *phys_dev)
571 {
572 for (enum tu_gmem_layout layout = (enum tu_gmem_layout) 0;
573 layout < TU_GMEM_LAYOUT_COUNT;
574 layout = (enum tu_gmem_layout)(layout + 1)) {
575 /* log2(gmem_align/(tile_align_w*tile_align_h)) */
576 uint32_t block_align_shift = 3;
577 uint32_t tile_align_w = phys_dev->info->tile_align_w;
578 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
579 phys_dev->info->tile_align_h;
580
581 /* calculate total bytes per pixel */
582 uint32_t cpp_total = 0;
583 uint32_t min_cpp = UINT32_MAX;
584 for (uint32_t i = 0; i < pass->attachment_count; i++) {
585 struct tu_render_pass_attachment *att = &pass->attachments[i];
586 bool cpp1 = (att->cpp == 1);
587 if (att->gmem) {
588 cpp_total += att->cpp;
589 min_cpp = MIN2(min_cpp, att->cpp);
590
591 /* take into account the separate stencil: */
592 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
593 min_cpp = MIN2(min_cpp, att->samples);
594 cpp1 = (att->samples == 1);
595 cpp_total += att->samples;
596 }
597
598 /* texture pitch must be aligned to 64, use a tile_align_w that is
599 * a multiple of 64 for cpp==1 attachment to work as input
600 * attachment
601 */
602 if (cpp1 && tile_align_w % 64 != 0) {
603 tile_align_w *= 2;
604 block_align_shift -= 1;
605 }
606 }
607 }
608
609 pass->tile_align_w = tile_align_w;
610 pass->min_cpp = min_cpp;
611
612 /* no gmem attachments */
613 if (cpp_total == 0) {
614 /* any value non-zero value so tiling config works with no
615 * attachments
616 */
617 pass->gmem_pixels[layout] = 1024 * 1024;
618 continue;
619 }
620
621 /* TODO: this algorithm isn't optimal
622 * for example, two attachments with cpp = {1, 4}
623 * result: nblocks = {12, 52}, pixels = 196608
624 * optimal: nblocks = {13, 51}, pixels = 208896
625 */
626 uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
627 ? phys_dev->usable_gmem_size_gmem
628 : phys_dev->ccu_offset_gmem;
629 uint32_t gmem_blocks = gmem_size / gmem_align;
630 uint32_t offset = 0, pixels = ~0u, i;
631 for (i = 0; i < pass->attachment_count; i++) {
632 struct tu_render_pass_attachment *att = &pass->attachments[i];
633 if (!att->gmem)
634 continue;
635
636 att->gmem_offset[layout] = offset;
637
638 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
639 uint32_t nblocks =
640 MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
641
642 if (nblocks > gmem_blocks)
643 break;
644
645 gmem_blocks -= nblocks;
646 cpp_total -= att->cpp;
647 offset += nblocks * gmem_align;
648 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
649
650 /* repeat the same for separate stencil */
651 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
652 att->gmem_offset_stencil[layout] = offset;
653
654 /* note: for s8_uint, block align is always 1 */
655 uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
656 if (nblocks > gmem_blocks)
657 break;
658
659 gmem_blocks -= nblocks;
660 cpp_total -= att->samples;
661 offset += nblocks * gmem_align;
662 pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
663 }
664 }
665
666 /* if the loop didn't complete then the gmem config is impossible */
667 if (i == pass->attachment_count)
668 pass->gmem_pixels[layout] = pixels;
669 }
670 }
671
672 static void
tu_render_pass_bandwidth_config(struct tu_render_pass * pass)673 tu_render_pass_bandwidth_config(struct tu_render_pass *pass)
674 {
675 pass->gmem_bandwidth_per_pixel = 0;
676 pass->sysmem_bandwidth_per_pixel = 0;
677
678 for (uint32_t i = 0; i < pass->attachment_count; i++) {
679 const struct tu_render_pass_attachment *att = &pass->attachments[i];
680
681 /* approximate tu_load_gmem_attachment */
682 if (att->load)
683 pass->gmem_bandwidth_per_pixel += att->cpp;
684
685 /* approximate tu_store_gmem_attachment */
686 if (att->store)
687 pass->gmem_bandwidth_per_pixel += att->cpp;
688
689 /* approximate tu_clear_sysmem_attachment */
690 if (att->clear_mask)
691 pass->sysmem_bandwidth_per_pixel += att->cpp;
692
693 /* approximate tu6_emit_sysmem_resolves */
694 if (att->will_be_resolved) {
695 pass->sysmem_bandwidth_per_pixel +=
696 att->cpp + att->cpp / att->samples;
697 }
698 }
699 }
700
701 static void
attachment_set_ops(struct tu_device * device,struct tu_render_pass_attachment * att,VkAttachmentLoadOp load_op,VkAttachmentLoadOp stencil_load_op,VkAttachmentStoreOp store_op,VkAttachmentStoreOp stencil_store_op)702 attachment_set_ops(struct tu_device *device,
703 struct tu_render_pass_attachment *att,
704 VkAttachmentLoadOp load_op,
705 VkAttachmentLoadOp stencil_load_op,
706 VkAttachmentStoreOp store_op,
707 VkAttachmentStoreOp stencil_store_op)
708 {
709 if (unlikely(device->instance->dont_care_as_load)) {
710 if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
711 load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
712 if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
713 stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
714 }
715
716 /* load/store ops */
717 att->clear_mask =
718 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
719 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
720 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
721
722 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
723 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
724 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
725
726 switch (att->format) {
727 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
728 if (att->clear_mask)
729 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
730 if (stencil_clear)
731 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
732 if (stencil_load)
733 att->load = true;
734 if (stencil_store)
735 att->store = true;
736 /* If depth or stencil is passthrough (STORE_OP_NONE), then we need to
737 * preserve the contents when storing by loading even if neither
738 * component needs to be loaded.
739 */
740 if ((store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT ||
741 stencil_store_op == VK_ATTACHMENT_STORE_OP_NONE_EXT) &&
742 att->store) {
743 att->load = true;
744 }
745 break;
746 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
747 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
748 att->load = stencil_load;
749 att->store = stencil_store;
750 break;
751 case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
752 if (att->clear_mask)
753 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
754 if (stencil_clear)
755 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
756 if (stencil_load)
757 att->load_stencil = true;
758 if (stencil_store)
759 att->store_stencil = true;
760 break;
761 default:
762 break;
763 }
764 }
765
766 static bool
is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve * depth_stencil_resolve)767 is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
768 {
769 if (depth_stencil_resolve &&
770 depth_stencil_resolve->pDepthStencilResolveAttachment &&
771 depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
772 return true;
773 }
774 return false;
775 }
776
777 static void
tu_subpass_use_attachment(struct tu_render_pass * pass,int i,uint32_t a,const VkRenderPassCreateInfo2 * pCreateInfo)778 tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
779 {
780 struct tu_subpass *subpass = &pass->subpasses[i];
781 struct tu_render_pass_attachment *att = &pass->attachments[a];
782
783 att->gmem = true;
784 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
785 att->clear_views |= subpass->multiview_mask;
786
787 /* Loads and clears are emitted at the start of the subpass that needs them. */
788 att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
789
790 /* Stores are emitted at vkEndRenderPass() time. */
791 if (att->store || att->store_stencil)
792 att->last_subpass_idx = pass->subpass_count - 1;
793 else
794 att->last_subpass_idx = MAX2(i, att->last_subpass_idx);
795 }
796
797 static void
tu_subpass_resolve_attachment(struct tu_render_pass * pass,int i,uint32_t dst_a,uint32_t src_a)798 tu_subpass_resolve_attachment(struct tu_render_pass *pass, int i, uint32_t dst_a, uint32_t src_a)
799 {
800 if (src_a != VK_ATTACHMENT_UNUSED && dst_a != VK_ATTACHMENT_UNUSED) {
801 struct tu_render_pass_attachment *src_att = &pass->attachments[src_a];
802 struct tu_render_pass_attachment *dst_att = &pass->attachments[dst_a];
803 src_att->will_be_resolved = true;
804
805 src_att->first_subpass_idx = MIN2(i, src_att->first_subpass_idx);
806 src_att->last_subpass_idx = MAX2(i, src_att->last_subpass_idx);
807 dst_att->first_subpass_idx = MIN2(i, dst_att->first_subpass_idx);
808 dst_att->last_subpass_idx = MAX2(i, dst_att->last_subpass_idx);
809 }
810 }
811
812 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)813 tu_CreateRenderPass2(VkDevice _device,
814 const VkRenderPassCreateInfo2 *pCreateInfo,
815 const VkAllocationCallbacks *pAllocator,
816 VkRenderPass *pRenderPass)
817 {
818 VK_FROM_HANDLE(tu_device, device, _device);
819
820 if (TU_DEBUG(DYNAMIC))
821 return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
822 pRenderPass);
823
824 struct tu_render_pass *pass;
825 size_t size;
826 size_t attachments_offset;
827
828 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
829
830 size = sizeof(*pass);
831 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
832 attachments_offset = size;
833 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
834
835 pass = (struct tu_render_pass *) vk_object_zalloc(
836 &device->vk, pAllocator, size, VK_OBJECT_TYPE_RENDER_PASS);
837 if (pass == NULL)
838 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
839
840 pass->attachment_count = pCreateInfo->attachmentCount;
841 pass->subpass_count = pCreateInfo->subpassCount;
842 pass->attachments =
843 (struct tu_render_pass_attachment *) ((char *) pass +
844 attachments_offset);
845
846 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
847 struct tu_render_pass_attachment *att = &pass->attachments[i];
848
849 att->format = pCreateInfo->pAttachments[i].format;
850 att->samples = pCreateInfo->pAttachments[i].samples;
851 /* for d32s8, cpp is for the depth image, and
852 * att->samples will be used as the cpp for the stencil image
853 */
854 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
855 att->cpp = 4 * att->samples;
856 else
857 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
858 /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
859 att->gmem = false;
860
861 VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
862 VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
863
864 attachment_set_ops(device, att, loadOp, stencilLoadOp,
865 pCreateInfo->pAttachments[i].storeOp,
866 pCreateInfo->pAttachments[i].stencilStoreOp);
867
868 att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
869 att->last_subpass_idx = 0;
870 }
871 uint32_t subpass_attachment_count = 0;
872 struct tu_subpass_attachment *p;
873 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
874 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
875 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
876 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
877
878 subpass_attachment_count +=
879 desc->inputAttachmentCount + desc->colorAttachmentCount +
880 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
881 (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
882 }
883
884 if (subpass_attachment_count) {
885 pass->subpass_attachments = (struct tu_subpass_attachment *) vk_alloc2(
886 &device->vk.alloc, pAllocator,
887 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
888 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
889 if (pass->subpass_attachments == NULL) {
890 vk_object_free(&device->vk, pAllocator, pass);
891 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
892 }
893 } else
894 pass->subpass_attachments = NULL;
895
896 const VkRenderPassFragmentDensityMapCreateInfoEXT *fdm_info =
897 vk_find_struct_const(pCreateInfo->pNext,
898 RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT);
899 if (fdm_info && !tu_render_pass_disable_fdm(pass)) {
900 pass->fragment_density_map.attachment =
901 fdm_info->fragmentDensityMapAttachment.attachment;
902 pass->has_fdm = true;
903 } else {
904 pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
905 }
906
907 if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
908 pass->has_fdm = true;
909
910 p = pass->subpass_attachments;
911 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
912 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
913 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
914 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
915 struct tu_subpass *subpass = &pass->subpasses[i];
916
917 subpass->input_count = desc->inputAttachmentCount;
918 subpass->color_count = desc->colorAttachmentCount;
919 subpass->resolve_count = 0;
920 subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
921 subpass->samples = (VkSampleCountFlagBits) 0;
922 subpass->srgb_cntl = 0;
923 subpass->legacy_dithering_enabled = desc->flags &
924 VK_SUBPASS_DESCRIPTION_ENABLE_LEGACY_DITHERING_BIT_EXT;
925
926 const BITMASK_ENUM(VkSubpassDescriptionFlagBits) raster_order_access_bits =
927 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_EXT |
928 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_EXT |
929 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_EXT;
930
931 subpass->raster_order_attachment_access = raster_order_access_bits & desc->flags;
932
933 subpass->multiview_mask = desc->viewMask;
934
935 if (desc->inputAttachmentCount > 0) {
936 subpass->input_attachments = p;
937 p += desc->inputAttachmentCount;
938
939 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
940 uint32_t a = desc->pInputAttachments[j].attachment;
941 subpass->input_attachments[j].attachment = a;
942 if (a != VK_ATTACHMENT_UNUSED) {
943 struct tu_render_pass_attachment *att = &pass->attachments[a];
944 /* Note: attachments only used as input attachments will be read
945 * directly instead of through gmem, so we don't mark input
946 * attachments as needing gmem.
947 */
948 att->first_subpass_idx = MIN2(i, att->first_subpass_idx);
949 att->last_subpass_idx = MAX2(i, att->last_subpass_idx);
950 }
951 }
952 }
953
954 if (desc->colorAttachmentCount > 0) {
955 subpass->color_attachments = p;
956 p += desc->colorAttachmentCount;
957
958 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
959 uint32_t a = desc->pColorAttachments[j].attachment;
960 subpass->color_attachments[j].attachment = a;
961
962 if (a != VK_ATTACHMENT_UNUSED) {
963 tu_subpass_use_attachment(pass, i, a, pCreateInfo);
964
965 if (vk_format_is_srgb(pass->attachments[a].format))
966 subpass->srgb_cntl |= 1 << j;
967 }
968 }
969 }
970
971 subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
972 if (desc->pResolveAttachments) {
973 p += desc->colorAttachmentCount;
974 subpass->resolve_count += desc->colorAttachmentCount;
975 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
976 uint32_t a = desc->pResolveAttachments[j].attachment;
977 uint32_t src_a = desc->pColorAttachments[j].attachment;
978 subpass->resolve_attachments[j].attachment = a;
979
980 tu_subpass_resolve_attachment(pass, i, a, src_a);
981 }
982 }
983
984 if (subpass->resolve_depth_stencil) {
985 p++;
986 subpass->resolve_count++;
987 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
988 uint32_t src_a = desc->pDepthStencilAttachment->attachment;
989 subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
990
991 tu_subpass_resolve_attachment(pass, i, a, src_a);
992 }
993
994 uint32_t a = desc->pDepthStencilAttachment ?
995 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
996 subpass->depth_stencil_attachment.attachment = a;
997 subpass->depth_used = a != VK_ATTACHMENT_UNUSED;
998 subpass->stencil_used = a != VK_ATTACHMENT_UNUSED;
999 if (a != VK_ATTACHMENT_UNUSED) {
1000 tu_subpass_use_attachment(pass, i, a, pCreateInfo);
1001 }
1002 }
1003
1004 tu_render_pass_patch_input_gmem(pass);
1005
1006 tu_render_pass_check_feedback_loop(pass);
1007
1008 /* disable unused attachments */
1009 for (uint32_t i = 0; i < pass->attachment_count; i++) {
1010 struct tu_render_pass_attachment *att = &pass->attachments[i];
1011 if (!att->gmem) {
1012 att->clear_mask = 0;
1013 att->load = false;
1014 }
1015 }
1016
1017 tu_render_pass_cond_config(device, pass);
1018 tu_render_pass_gmem_config(pass, device->physical_device);
1019 tu_render_pass_bandwidth_config(pass);
1020 tu_render_pass_calc_views(pass);
1021 tu_render_pass_calc_hash(pass);
1022
1023 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
1024 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
1025 }
1026
1027 tu_render_pass_add_implicit_deps(pass, pCreateInfo);
1028
1029 *pRenderPass = tu_render_pass_to_handle(pass);
1030
1031 return VK_SUCCESS;
1032 }
1033
1034 VKAPI_ATTR void VKAPI_CALL
tu_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)1035 tu_DestroyRenderPass(VkDevice _device,
1036 VkRenderPass _pass,
1037 const VkAllocationCallbacks *pAllocator)
1038 {
1039 VK_FROM_HANDLE(tu_device, device, _device);
1040
1041 if (TU_DEBUG(DYNAMIC)) {
1042 vk_common_DestroyRenderPass(_device, _pass, pAllocator);
1043 return;
1044 }
1045
1046 VK_FROM_HANDLE(tu_render_pass, pass, _pass);
1047
1048 if (!_pass)
1049 return;
1050
1051 vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
1052 vk_object_free(&device->vk, pAllocator, pass);
1053 }
1054
1055 static void
tu_setup_dynamic_attachment(struct tu_render_pass_attachment * att,struct tu_image_view * view)1056 tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
1057 struct tu_image_view *view)
1058 {
1059 *att = {};
1060 att->format = view->vk.format;
1061 att->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1062
1063 /* for d32s8, cpp is for the depth image, and
1064 * att->samples will be used as the cpp for the stencil image
1065 */
1066 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1067 att->cpp = 4 * att->samples;
1068 else
1069 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
1070 }
1071
1072 void
tu_setup_dynamic_render_pass(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * info)1073 tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
1074 const VkRenderingInfo *info)
1075 {
1076 struct tu_device *device = cmd_buffer->device;
1077 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1078 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1079
1080 *pass = {};
1081 *subpass = {};
1082
1083 pass->subpass_count = 1;
1084 pass->attachments = cmd_buffer->dynamic_rp_attachments;
1085
1086 subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
1087 subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1088 subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
1089 subpass->multiview_mask = info->viewMask;
1090 subpass->legacy_dithering_enabled = info->flags &
1091 VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT;
1092
1093 uint32_t a = 0;
1094 for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1095 struct tu_render_pass_attachment *att = &pass->attachments[a];
1096 const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1097
1098 if (att_info->imageView == VK_NULL_HANDLE) {
1099 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1100 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1101 continue;
1102 }
1103
1104 VK_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1105 tu_setup_dynamic_attachment(att, view);
1106 att->gmem = true;
1107 att->clear_views = info->viewMask;
1108 attachment_set_ops(device, att, att_info->loadOp,
1109 VK_ATTACHMENT_LOAD_OP_DONT_CARE, att_info->storeOp,
1110 VK_ATTACHMENT_STORE_OP_DONT_CARE);
1111 subpass->color_attachments[i].attachment = a++;
1112
1113 subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1114
1115 if (vk_format_is_srgb(view->vk.format))
1116 subpass->srgb_cntl |= 1 << i;
1117
1118 if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1119 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1120 VK_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1121 tu_setup_dynamic_attachment(resolve_att, resolve_view);
1122 resolve_att->gmem = false;
1123 attachment_set_ops(
1124 device, resolve_att, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1125 VK_ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_STORE,
1126 VK_ATTACHMENT_STORE_OP_DONT_CARE);
1127 subpass->resolve_attachments[i].attachment = a++;
1128 att->will_be_resolved = true;
1129 } else {
1130 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1131 att->will_be_resolved = false;
1132 }
1133 }
1134
1135 if (info->pDepthAttachment || info->pStencilAttachment) {
1136 const struct VkRenderingAttachmentInfo *common_info =
1137 (info->pDepthAttachment &&
1138 info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1139 info->pDepthAttachment :
1140 info->pStencilAttachment;
1141
1142 if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1143 VK_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1144
1145 struct tu_render_pass_attachment *att = &pass->attachments[a];
1146 tu_setup_dynamic_attachment(att, view);
1147 att->gmem = true;
1148 att->clear_views = info->viewMask;
1149 subpass->depth_stencil_attachment.attachment = a++;
1150
1151 subpass->depth_used = (bool) info->pDepthAttachment;
1152 subpass->stencil_used = (bool) info->pStencilAttachment;
1153
1154 attachment_set_ops(
1155 device, att,
1156 (info->pDepthAttachment && info->pDepthAttachment->imageView) ?
1157 info->pDepthAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT,
1158 (info->pStencilAttachment && info->pStencilAttachment->imageView) ?
1159 info->pStencilAttachment->loadOp : VK_ATTACHMENT_LOAD_OP_NONE_EXT,
1160 (info->pDepthAttachment && info->pDepthAttachment->imageView) ?
1161 info->pDepthAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT,
1162 (info->pStencilAttachment && info->pStencilAttachment->imageView) ?
1163 info->pStencilAttachment->storeOp : VK_ATTACHMENT_STORE_OP_NONE_EXT);
1164
1165 subpass->samples = (VkSampleCountFlagBits) view->image->layout->nr_samples;
1166
1167 if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1168 unsigned i = subpass->resolve_count++;
1169 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1170 VK_FROM_HANDLE(tu_image_view, resolve_view,
1171 common_info->resolveImageView);
1172 tu_setup_dynamic_attachment(resolve_att, resolve_view);
1173 resolve_att->gmem = false;
1174 attachment_set_ops(device, resolve_att,
1175 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1176 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1177 VK_ATTACHMENT_STORE_OP_STORE,
1178 VK_ATTACHMENT_STORE_OP_STORE);
1179 subpass->resolve_attachments[i].attachment = a++;
1180 att->will_be_resolved = true;
1181 subpass->resolve_depth_stencil = true;
1182 } else {
1183 att->will_be_resolved = false;
1184 }
1185 } else {
1186 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1187 }
1188 } else {
1189 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1190 }
1191
1192 pass->attachment_count = a;
1193
1194 const VkRenderingFragmentDensityMapAttachmentInfoEXT *fdm_info =
1195 vk_find_struct_const(info->pNext,
1196 RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT);
1197 if (fdm_info && fdm_info->imageView != VK_NULL_HANDLE &&
1198 !tu_render_pass_disable_fdm(pass)) {
1199 VK_FROM_HANDLE(tu_image_view, view, fdm_info->imageView);
1200
1201 struct tu_render_pass_attachment *att = &pass->attachments[a];
1202 tu_setup_dynamic_attachment(att, view);
1203 pass->fragment_density_map.attachment = a++;
1204 attachment_set_ops(device, att,
1205 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1206 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1207 VK_ATTACHMENT_STORE_OP_DONT_CARE,
1208 VK_ATTACHMENT_STORE_OP_DONT_CARE);
1209 pass->has_fdm = true;
1210 } else {
1211 pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
1212 pass->has_fdm = false;
1213 }
1214
1215 if (TU_DEBUG(FDM) && !tu_render_pass_disable_fdm(pass))
1216 pass->has_fdm = true;
1217
1218 pass->attachment_count = a;
1219
1220 tu_render_pass_cond_config(device, pass);
1221 tu_render_pass_gmem_config(pass, device->physical_device);
1222 tu_render_pass_bandwidth_config(pass);
1223 tu_render_pass_calc_views(pass);
1224 tu_render_pass_calc_hash(pass);
1225 }
1226
1227 void
tu_setup_dynamic_inheritance(struct tu_cmd_buffer * cmd_buffer,const VkCommandBufferInheritanceRenderingInfo * info)1228 tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1229 const VkCommandBufferInheritanceRenderingInfo *info)
1230 {
1231 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1232 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1233
1234 pass->subpass_count = 1;
1235 pass->attachments = cmd_buffer->dynamic_rp_attachments;
1236 pass->fragment_density_map.attachment = VK_ATTACHMENT_UNUSED;
1237
1238 subpass->color_count = info->colorAttachmentCount;
1239 subpass->resolve_count = 0;
1240 subpass->resolve_depth_stencil = false;
1241 subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1242 subpass->resolve_attachments = NULL;
1243 subpass->feedback_invalidate = false;
1244 subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1245 subpass->input_count = 0;
1246 subpass->samples = (VkSampleCountFlagBits) 0;
1247 subpass->srgb_cntl = 0;
1248 subpass->raster_order_attachment_access = false;
1249 subpass->multiview_mask = info->viewMask;
1250 subpass->samples = info->rasterizationSamples;
1251
1252 unsigned a = 0;
1253 for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1254 struct tu_render_pass_attachment *att = &pass->attachments[a];
1255 VkFormat format = info->pColorAttachmentFormats[i];
1256
1257 if (format == VK_FORMAT_UNDEFINED) {
1258 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1259 continue;
1260 }
1261
1262 att->format = format;
1263 att->samples = info->rasterizationSamples;
1264 subpass->samples = info->rasterizationSamples;
1265 subpass->color_attachments[i].attachment = a++;
1266
1267 /* conservatively assume that the attachment may be conditionally
1268 * loaded/stored.
1269 */
1270 att->cond_load_allowed = att->cond_store_allowed = true;
1271 }
1272
1273 if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1274 info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1275 struct tu_render_pass_attachment *att = &pass->attachments[a];
1276 att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1277 info->depthAttachmentFormat : info->stencilAttachmentFormat;
1278 att->samples = info->rasterizationSamples;
1279 subpass->depth_stencil_attachment.attachment = a++;
1280 subpass->depth_used =
1281 info->depthAttachmentFormat != VK_FORMAT_UNDEFINED;
1282 subpass->stencil_used =
1283 info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED;
1284 att->cond_load_allowed = att->cond_store_allowed = true;
1285 } else {
1286 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1287 subpass->depth_used = false;
1288 subpass->stencil_used = false;
1289 }
1290
1291 tu_render_pass_calc_views(pass);
1292 }
1293
1294 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)1295 tu_GetRenderAreaGranularity(VkDevice _device,
1296 VkRenderPass renderPass,
1297 VkExtent2D *pGranularity)
1298 {
1299 VK_FROM_HANDLE(tu_device, device, _device);
1300 pGranularity->width = device->physical_device->info->gmem_align_w;
1301 pGranularity->height = device->physical_device->info->gmem_align_h;
1302 }
1303
1304 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderingAreaGranularityKHR(VkDevice _device,const VkRenderingAreaInfoKHR * pRenderingAreaInfo,VkExtent2D * pGranularity)1305 tu_GetRenderingAreaGranularityKHR(VkDevice _device,
1306 const VkRenderingAreaInfoKHR *pRenderingAreaInfo,
1307 VkExtent2D *pGranularity)
1308 {
1309 VK_FROM_HANDLE(tu_device, device, _device);
1310 pGranularity->width = device->physical_device->info->gmem_align_w;
1311 pGranularity->height = device->physical_device->info->gmem_align_h;
1312 }
1313
1314 uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass * subpass,uint32_t index)1315 tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1316 {
1317 if (subpass->resolve_depth_stencil &&
1318 index == (subpass->resolve_count - 1))
1319 return subpass->depth_stencil_attachment.attachment;
1320
1321 return subpass->color_attachments[index].attachment;
1322 }
1323