xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_dynamic_rendering.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /* When using dynamic rendering with the suspend/resume functionality, we
7  * sometimes need to merge together multiple suspended render passes
8  * dynamically at submit time. This involves combining all the saved-up IBs,
9  * emitting the rendering commands usually emitted by
10  * CmdEndRenderPass()/CmdEndRendering(), and inserting them in between the
11  * user command buffers. This gets tricky, because the same command buffer can
12  * be submitted multiple times, each time with a different other set of
13  * command buffers, and with VK_COMMAND_BUFFER_SIMULTANEOUS_USE_BIT, this can
14  * happen before the previous submission of the same command buffer has
15  * finished. At some point we have to free these commands and the BOs they are
16  * contained in, and we can't do that when resubmitting the last command
17  * buffer in the sequence because it may still be in use. This means we have
18  * to make the commands owned by the device and roll our own memory tracking.
19  */
20 
21 #include "tu_dynamic_rendering.h"
22 
23 #include "tu_cmd_buffer.h"
24 #include "tu_cs.h"
25 
26 struct dynamic_rendering_entry {
27    struct tu_cmd_buffer *cmd_buffer;
28    uint32_t fence; /* The fence value when cmd_buffer becomes available */
29 };
30 
31 static VkResult
get_cmd_buffer(struct tu_device * dev,struct tu_cmd_buffer ** cmd_buffer_out)32 get_cmd_buffer(struct tu_device *dev, struct tu_cmd_buffer **cmd_buffer_out)
33 {
34    struct tu6_global *global = dev->global_bo_map;
35 
36    /* Note: because QueueSubmit is serialized, we don't need any locks here.
37     */
38    uint32_t fence = global->dynamic_rendering_fence;
39 
40    /* Go through the entries and return the finished ones to the pool,
41     * shrinking the array of pending entries.
42     */
43    struct dynamic_rendering_entry *new_entry =
44       (struct dynamic_rendering_entry *) util_dynarray_begin(
45          &dev->dynamic_rendering_pending);
46    uint32_t entries = 0;
47    util_dynarray_foreach(&dev->dynamic_rendering_pending,
48                          struct dynamic_rendering_entry, entry) {
49       if (entry->fence <= fence) {
50          VkCommandBuffer vk_buf = tu_cmd_buffer_to_handle(entry->cmd_buffer);
51          vk_common_FreeCommandBuffers(tu_device_to_handle(dev),
52                                       dev->dynamic_rendering_pool, 1, &vk_buf);
53       } else {
54          *new_entry = *entry;
55          new_entry++;
56          entries++;
57       }
58    }
59    UNUSED void *dummy =
60      util_dynarray_resize(&dev->dynamic_rendering_pending,
61                           struct dynamic_rendering_entry, entries);
62 
63    VkCommandBuffer vk_buf;
64    const VkCommandBufferAllocateInfo info = {
65       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
66       .pNext = NULL,
67       .commandPool = dev->dynamic_rendering_pool,
68       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
69       .commandBufferCount = 1,
70    };
71    VkResult result =
72       vk_common_AllocateCommandBuffers(tu_device_to_handle(dev), &info, &vk_buf);
73    if (result != VK_SUCCESS)
74       return result;
75 
76    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, vk_buf);
77 
78    struct dynamic_rendering_entry entry = {
79       .cmd_buffer = cmd_buffer,
80       .fence = ++dev->dynamic_rendering_fence,
81    };
82 
83    util_dynarray_append(&dev->dynamic_rendering_pending,
84                         struct dynamic_rendering_entry, entry);
85    *cmd_buffer_out = cmd_buffer;
86 
87    return VK_SUCCESS;
88 }
89 
90 VkResult
tu_init_dynamic_rendering(struct tu_device * dev)91 tu_init_dynamic_rendering(struct tu_device *dev)
92 {
93    util_dynarray_init(&dev->dynamic_rendering_pending, NULL);
94    dev->dynamic_rendering_fence = 0;
95 
96    const VkCommandPoolCreateInfo create_info = {
97       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
98       .pNext = NULL,
99       .flags = 0,
100       .queueFamilyIndex = 0,
101    };
102 
103    return vk_common_CreateCommandPool(tu_device_to_handle(dev), &create_info,
104                                       &dev->vk.alloc,
105                                       &dev->dynamic_rendering_pool);
106 }
107 
108 void
tu_destroy_dynamic_rendering(struct tu_device * dev)109 tu_destroy_dynamic_rendering(struct tu_device *dev)
110 {
111    vk_common_DestroyCommandPool(tu_device_to_handle(dev),
112                                 dev->dynamic_rendering_pool,
113                                 &dev->vk.alloc);
114    util_dynarray_fini(&dev->dynamic_rendering_pending);
115 }
116 
117 VkResult
tu_insert_dynamic_cmdbufs(struct tu_device * dev,struct tu_cmd_buffer *** cmds_ptr,uint32_t * size)118 tu_insert_dynamic_cmdbufs(struct tu_device *dev,
119                           struct tu_cmd_buffer ***cmds_ptr,
120                           uint32_t *size)
121 {
122    struct tu_cmd_buffer **old_cmds = *cmds_ptr;
123 
124    bool has_dynamic = false;
125    for (unsigned i = 0; i < *size; i++) {
126       if (old_cmds[i]->state.suspend_resume != SR_NONE) {
127          has_dynamic = true;
128          break;
129       }
130    }
131 
132    if (!has_dynamic)
133       return VK_SUCCESS;
134 
135    struct util_dynarray cmds = {0};
136    struct tu_cmd_buffer *cmd_buffer = NULL;
137 
138    for (unsigned i = 0; i < *size; i++) {
139       switch (old_cmds[i]->state.suspend_resume) {
140       case SR_NONE:
141       case SR_IN_CHAIN:
142       case SR_IN_PRE_CHAIN:
143          break;
144 
145       case SR_AFTER_PRE_CHAIN:
146       case SR_IN_CHAIN_AFTER_PRE_CHAIN:
147          tu_append_pre_chain(cmd_buffer, old_cmds[i]);
148 
149          if (!(old_cmds[i]->usage_flags &
150                VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
151             u_trace_disable_event_range(old_cmds[i]->pre_chain.trace_renderpass_start,
152                                         old_cmds[i]->pre_chain.trace_renderpass_end);
153          }
154 
155          TU_CALLX(dev, tu_cmd_render)(cmd_buffer);
156 
157          tu_cs_emit_pkt7(&cmd_buffer->cs, CP_MEM_WRITE, 3);
158          tu_cs_emit_qw(&cmd_buffer->cs,
159                        global_iova(cmd_buffer, dynamic_rendering_fence));
160          tu_cs_emit(&cmd_buffer->cs, dev->dynamic_rendering_fence);
161 
162          TU_CALLX(dev, tu_EndCommandBuffer)(tu_cmd_buffer_to_handle(cmd_buffer));
163          util_dynarray_append(&cmds, struct tu_cmd_buffer *, cmd_buffer);
164          cmd_buffer = NULL;
165          break;
166       }
167 
168       util_dynarray_append(&cmds, struct tu_cmd_buffer *, old_cmds[i]);
169 
170       switch (old_cmds[i]->state.suspend_resume) {
171       case SR_NONE:
172       case SR_AFTER_PRE_CHAIN:
173          break;
174       case SR_IN_CHAIN:
175       case SR_IN_CHAIN_AFTER_PRE_CHAIN: {
176          assert(!cmd_buffer);
177          VkResult result = get_cmd_buffer(dev, &cmd_buffer);
178          if (result != VK_SUCCESS)
179             return result;
180 
181          const VkCommandBufferBeginInfo begin = {
182             .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
183             .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
184          };
185          tu_cmd_buffer_begin(cmd_buffer, &begin);
186 
187          /* Setup the render pass using the first command buffer involved in
188           * the chain, so that it will look like we're inside a render pass
189           * for tu_cmd_render().
190           */
191          tu_restore_suspended_pass(cmd_buffer, old_cmds[i]);
192          FALLTHROUGH;
193       }
194       case SR_IN_PRE_CHAIN:
195          assert(cmd_buffer);
196 
197          tu_append_pre_post_chain(cmd_buffer, old_cmds[i]);
198 
199          if (old_cmds[i]->usage_flags &
200              VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT) {
201             u_trace_disable_event_range(old_cmds[i]->trace_renderpass_start,
202                                         old_cmds[i]->trace_renderpass_end);
203          }
204 
205          /* When the command buffer is finally recorded, we need its state
206           * to be the state of the command buffer before it. We need this
207           * because we skip tu6_emit_hw().
208           */
209          cmd_buffer->state.ccu_state = old_cmds[i]->state.ccu_state;
210          break;
211       }
212    }
213 
214    struct tu_cmd_buffer **new_cmds = (struct tu_cmd_buffer **)
215       vk_alloc(&dev->vk.alloc, cmds.size, alignof(struct tu_cmd_buffer *),
216                VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
217    if (!new_cmds)
218       return VK_ERROR_OUT_OF_HOST_MEMORY;
219    memcpy(new_cmds, cmds.data, cmds.size);
220    *cmds_ptr = new_cmds;
221    *size = util_dynarray_num_elements(&cmds, struct tu_cmd_buffer *);
222    util_dynarray_fini(&cmds);
223 
224    return VK_SUCCESS;
225 }
226