xref: /aosp_15_r20/external/mesa3d/src/panfrost/vulkan/panvk_vX_cmd_desc_state.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  *
4  * Derived from tu_cmd_buffer.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * SPDX-License-Identifier: MIT
10  */
11 
12 #include "genxml/gen_macros.h"
13 
14 #include "panvk_buffer.h"
15 #include "panvk_cmd_alloc.h"
16 #include "panvk_cmd_buffer.h"
17 #include "panvk_cmd_desc_state.h"
18 #include "panvk_entrypoints.h"
19 
20 #include "pan_pool.h"
21 
22 #include "util/rounding.h"
23 
24 #include "vk_alloc.h"
25 #include "vk_command_buffer.h"
26 #include "vk_command_pool.h"
27 
28 static void
cmd_desc_state_bind_sets(struct panvk_descriptor_state * desc_state,const VkBindDescriptorSetsInfoKHR * info)29 cmd_desc_state_bind_sets(struct panvk_descriptor_state *desc_state,
30                          const VkBindDescriptorSetsInfoKHR *info)
31 {
32    unsigned dynoffset_idx = 0;
33    for (unsigned i = 0; i < info->descriptorSetCount; ++i) {
34       unsigned set_idx = i + info->firstSet;
35       VK_FROM_HANDLE(panvk_descriptor_set, set, info->pDescriptorSets[i]);
36 
37       /* Invalidate the push set. */
38       if (desc_state->sets[set_idx] &&
39           desc_state->sets[set_idx] == desc_state->push_sets[set_idx])
40          desc_state->push_sets[set_idx]->descs.dev = 0;
41 
42       desc_state->sets[set_idx] = set;
43 
44       if (!set || !set->layout->dyn_buf_count)
45          continue;
46 
47       for (unsigned b = 0; b < set->layout->binding_count; b++) {
48          VkDescriptorType type = set->layout->bindings[b].type;
49 
50          if (type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
51              type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
52             continue;
53 
54          unsigned dyn_buf_idx = set->layout->bindings[b].desc_idx;
55          for (unsigned e = 0; e < set->layout->bindings[b].desc_count; e++) {
56             desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx++] =
57                info->pDynamicOffsets[dynoffset_idx++];
58          }
59       }
60    }
61 
62    assert(dynoffset_idx == info->dynamicOffsetCount);
63 }
64 
65 static struct panvk_descriptor_set *
cmd_get_push_desc_set(struct vk_command_buffer * vk_cmdbuf,struct panvk_descriptor_state * desc_state,uint32_t set_idx)66 cmd_get_push_desc_set(struct vk_command_buffer *vk_cmdbuf,
67                       struct panvk_descriptor_state *desc_state,
68                       uint32_t set_idx)
69 {
70    struct panvk_cmd_buffer *cmdbuf =
71       container_of(vk_cmdbuf, struct panvk_cmd_buffer, vk);
72    struct panvk_cmd_pool *pool =
73       container_of(cmdbuf->vk.pool, struct panvk_cmd_pool, vk);
74    struct panvk_push_set *push_set;
75 
76    assert(set_idx < MAX_SETS);
77 
78    if (likely(desc_state->push_sets[set_idx])) {
79       push_set = container_of(desc_state->push_sets[set_idx],
80                               struct panvk_push_set, set);
81    } else if (!list_is_empty(&pool->push_sets)) {
82       push_set =
83          list_first_entry(&pool->push_sets, struct panvk_push_set, base.node);
84       list_del(&push_set->base.node);
85       list_addtail(&push_set->base.node, &cmdbuf->push_sets);
86       memset(push_set->descs, 0, sizeof(push_set->descs));
87    } else {
88       push_set = vk_zalloc(&pool->vk.alloc, sizeof(*push_set), 8,
89                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
90       list_addtail(&push_set->base.node, &cmdbuf->push_sets);
91    }
92 
93    if (unlikely(!push_set)) {
94       vk_command_buffer_set_error(&cmdbuf->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
95       return NULL;
96    }
97 
98    if (desc_state->push_sets[set_idx] == NULL) {
99       desc_state->push_sets[set_idx] = &push_set->set;
100       push_set->set.descs.host = push_set->descs;
101    }
102 
103    struct panvk_descriptor_set *set = desc_state->push_sets[set_idx];
104 
105    /* Pushing descriptors replaces whatever sets are bound */
106    desc_state->sets[set_idx] = set;
107    return set;
108 }
109 
110 #if PAN_ARCH <= 7
111 VkResult
panvk_per_arch(cmd_prepare_dyn_ssbos)112 panvk_per_arch(cmd_prepare_dyn_ssbos)(
113    struct panvk_cmd_buffer *cmdbuf,
114    const struct panvk_descriptor_state *desc_state,
115    const struct panvk_shader *shader,
116    struct panvk_shader_desc_state *shader_desc_state)
117 {
118    if (!shader || !shader->desc_info.dyn_ssbos.count ||
119        shader_desc_state->dyn_ssbos)
120       return VK_SUCCESS;
121 
122    struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
123       cmdbuf, desc, shader->desc_info.dyn_ssbos.count * PANVK_DESCRIPTOR_SIZE,
124       PANVK_DESCRIPTOR_SIZE);
125    if (!ptr.gpu)
126       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
127 
128    struct panvk_ssbo_addr *ssbos = ptr.cpu;
129    for (uint32_t i = 0; i < shader->desc_info.dyn_ssbos.count; i++) {
130       uint32_t src_handle = shader->desc_info.dyn_ssbos.map[i];
131       uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
132       uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
133       const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
134       const uint32_t dyn_buf_offset =
135          desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
136 
137       assert(set_idx < MAX_SETS);
138       assert(set);
139 
140       ssbos[i] = (struct panvk_ssbo_addr){
141          .base_addr = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset,
142          .size = set->dyn_bufs[dyn_buf_idx].size,
143       };
144    }
145 
146    shader_desc_state->dyn_ssbos = ptr.gpu;
147    return VK_SUCCESS;
148 }
149 
150 static void
panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state * desc_state,const struct panvk_shader * shader,struct mali_uniform_buffer_packed * ubos,uint32_t ubo_count)151 panvk_cmd_fill_dyn_ubos(const struct panvk_descriptor_state *desc_state,
152                         const struct panvk_shader *shader,
153                         struct mali_uniform_buffer_packed *ubos,
154                         uint32_t ubo_count)
155 {
156    for (uint32_t i = 0; i < shader->desc_info.dyn_ubos.count; i++) {
157       uint32_t src_handle = shader->desc_info.dyn_ubos.map[i];
158       uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
159       uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
160       uint32_t ubo_idx =
161          i + shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_UBO];
162       const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
163       const uint32_t dyn_buf_offset =
164          desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
165 
166       assert(set_idx < MAX_SETS);
167       assert(set);
168       assert(ubo_idx < ubo_count);
169 
170       pan_pack(&ubos[ubo_idx], UNIFORM_BUFFER, cfg) {
171          cfg.pointer = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
172          cfg.entries = DIV_ROUND_UP(set->dyn_bufs[dyn_buf_idx].size, 16);
173       }
174    }
175 }
176 
177 VkResult
panvk_per_arch(cmd_prepare_shader_desc_tables)178 panvk_per_arch(cmd_prepare_shader_desc_tables)(
179    struct panvk_cmd_buffer *cmdbuf,
180    const struct panvk_descriptor_state *desc_state,
181    const struct panvk_shader *shader,
182    struct panvk_shader_desc_state *shader_desc_state)
183 {
184    if (!shader)
185       return VK_SUCCESS;
186 
187    for (uint32_t i = 0; i < ARRAY_SIZE(shader->desc_info.others.count); i++) {
188       uint32_t desc_count =
189          shader->desc_info.others.count[i] +
190          (i == PANVK_BIFROST_DESC_TABLE_UBO ? shader->desc_info.dyn_ubos.count
191                                             : 0);
192       uint32_t desc_size =
193          i == PANVK_BIFROST_DESC_TABLE_UBO ? 8 : PANVK_DESCRIPTOR_SIZE;
194 
195       if (!desc_count || shader_desc_state->tables[i])
196          continue;
197 
198       struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
199          cmdbuf, desc, desc_count * desc_size, PANVK_DESCRIPTOR_SIZE);
200       if (!ptr.gpu)
201          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
202 
203       shader_desc_state->tables[i] = ptr.gpu;
204 
205       if (i == PANVK_BIFROST_DESC_TABLE_UBO)
206          panvk_cmd_fill_dyn_ubos(desc_state, shader, ptr.cpu, desc_count);
207 
208       /* The image table being actually the attribute table, this is handled
209        * separately for vertex shaders. */
210       if (i == PANVK_BIFROST_DESC_TABLE_IMG &&
211           shader->info.stage != MESA_SHADER_VERTEX) {
212          assert(!shader_desc_state->img_attrib_table);
213 
214          ptr = panvk_cmd_alloc_desc_array(cmdbuf, desc_count, ATTRIBUTE);
215          if (!ptr.gpu)
216             return VK_ERROR_OUT_OF_DEVICE_MEMORY;
217 
218          shader_desc_state->img_attrib_table = ptr.gpu;
219       }
220    }
221 
222    uint32_t tex_count =
223       shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_TEXTURE];
224    uint32_t sampler_count =
225       shader->desc_info.others.count[PANVK_BIFROST_DESC_TABLE_SAMPLER];
226 
227    if (tex_count && !sampler_count) {
228       struct panfrost_ptr sampler = panvk_cmd_alloc_desc(cmdbuf, SAMPLER);
229       if (!sampler.gpu)
230          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
231 
232       /* Emit a dummy sampler if we have to. */
233       pan_pack(sampler.cpu, SAMPLER, _) {
234       }
235 
236       shader_desc_state->tables[PANVK_BIFROST_DESC_TABLE_SAMPLER] = sampler.gpu;
237    }
238 
239    return VK_SUCCESS;
240 }
241 #else
242 void
panvk_per_arch(cmd_fill_dyn_bufs)243 panvk_per_arch(cmd_fill_dyn_bufs)(
244    const struct panvk_descriptor_state *desc_state,
245    const struct panvk_shader *shader, struct mali_buffer_packed *buffers)
246 {
247    if (!shader)
248       return;
249 
250    for (uint32_t i = 0; i < shader->desc_info.dyn_bufs.count; i++) {
251       uint32_t src_handle = shader->desc_info.dyn_bufs.map[i];
252       uint32_t set_idx = COPY_DESC_HANDLE_EXTRACT_TABLE(src_handle);
253       uint32_t dyn_buf_idx = COPY_DESC_HANDLE_EXTRACT_INDEX(src_handle);
254       const struct panvk_descriptor_set *set = desc_state->sets[set_idx];
255       const uint32_t dyn_buf_offset =
256          desc_state->dyn_buf_offsets[set_idx][dyn_buf_idx];
257 
258       assert(set_idx < MAX_SETS);
259       assert(set);
260 
261       pan_pack(&buffers[i], BUFFER, cfg) {
262          cfg.size = set->dyn_bufs[dyn_buf_idx].size;
263          cfg.address = set->dyn_bufs[dyn_buf_idx].dev_addr + dyn_buf_offset;
264       }
265    }
266 }
267 
268 VkResult
panvk_per_arch(cmd_prepare_shader_res_table)269 panvk_per_arch(cmd_prepare_shader_res_table)(
270    struct panvk_cmd_buffer *cmdbuf,
271    const struct panvk_descriptor_state *desc_state,
272    const struct panvk_shader *shader,
273    struct panvk_shader_desc_state *shader_desc_state)
274 {
275    if (!shader || shader_desc_state->res_table)
276       return VK_SUCCESS;
277 
278    uint32_t first_unused_set = util_last_bit(shader->desc_info.used_set_mask);
279    uint32_t res_count = 1 + first_unused_set;
280    struct panfrost_ptr ptr =
281       panvk_cmd_alloc_desc_array(cmdbuf, res_count, RESOURCE);
282    if (!ptr.gpu)
283       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
284 
285    struct mali_resource_packed *res_table = ptr.cpu;
286 
287    /* First entry is the driver set table, where we store the vertex attributes,
288     * the dummy sampler, the dynamic buffers and the vertex buffers. */
289    pan_pack(&res_table[0], RESOURCE, cfg) {
290       cfg.address = shader_desc_state->driver_set.dev_addr;
291       cfg.size = shader_desc_state->driver_set.size;
292       cfg.contains_descriptors = cfg.size > 0;
293    }
294 
295    for (uint32_t i = 0; i < first_unused_set; i++) {
296       const struct panvk_descriptor_set *set = desc_state->sets[i];
297 
298       pan_pack(&res_table[i + 1], RESOURCE, cfg) {
299          if (shader->desc_info.used_set_mask & BITFIELD_BIT(i)) {
300             cfg.address = set->descs.dev;
301             cfg.contains_descriptors = true;
302             cfg.size = set->desc_count * PANVK_DESCRIPTOR_SIZE;
303          } else {
304             cfg.address = 0;
305             cfg.contains_descriptors = false;
306             cfg.size = 0;
307          }
308       }
309    }
310 
311    shader_desc_state->res_table = ptr.gpu | res_count;
312    return VK_SUCCESS;
313 }
314 #endif
315 
316 VkResult
panvk_per_arch(cmd_prepare_push_descs)317 panvk_per_arch(cmd_prepare_push_descs)(struct panvk_cmd_buffer *cmdbuf,
318                                        struct panvk_descriptor_state *desc_state,
319                                        uint32_t used_set_mask)
320 {
321    for (unsigned i = 0; i < ARRAY_SIZE(desc_state->push_sets); i++) {
322       struct panvk_descriptor_set *push_set = desc_state->push_sets[i];
323 
324       if (!(used_set_mask & BITFIELD_BIT(i)) || !push_set ||
325           desc_state->sets[i] != push_set || push_set->descs.dev)
326          continue;
327 
328       struct panfrost_ptr ptr = panvk_cmd_alloc_dev_mem(
329          cmdbuf, desc, push_set->desc_count * PANVK_DESCRIPTOR_SIZE,
330          PANVK_DESCRIPTOR_SIZE);
331       if (!ptr.gpu)
332          return VK_ERROR_OUT_OF_DEVICE_MEMORY;
333 
334       memcpy(ptr.cpu, push_set->descs.host,
335              push_set->desc_count * PANVK_DESCRIPTOR_SIZE);
336       push_set->descs.dev = ptr.gpu;
337    }
338 
339    return VK_SUCCESS;
340 }
341 
342 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindDescriptorSets2KHR)343 panvk_per_arch(CmdBindDescriptorSets2KHR)(
344    VkCommandBuffer commandBuffer,
345    const VkBindDescriptorSetsInfoKHR *pBindDescriptorSetsInfo)
346 {
347    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
348 
349    /* TODO: Invalidate only if the shader tables are disturbed */
350    if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
351       cmd_desc_state_bind_sets(&cmdbuf->state.gfx.desc_state,
352                                pBindDescriptorSetsInfo);
353 
354       memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
355       memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
356    }
357 
358    if (pBindDescriptorSetsInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
359       cmd_desc_state_bind_sets(&cmdbuf->state.compute.desc_state,
360                                pBindDescriptorSetsInfo);
361 
362       memset(&cmdbuf->state.compute.cs.desc, 0,
363              sizeof(cmdbuf->state.compute.cs.desc));
364    }
365 }
366 
367 static void
push_desc_set_write(struct panvk_cmd_buffer * cmd,struct panvk_descriptor_state * desc,const VkPushDescriptorSetInfoKHR * info)368 push_desc_set_write(struct panvk_cmd_buffer *cmd,
369                     struct panvk_descriptor_state *desc,
370                     const VkPushDescriptorSetInfoKHR *info)
371 {
372    VK_FROM_HANDLE(vk_pipeline_layout, playout, info->layout);
373 
374    const struct panvk_descriptor_set_layout *set_layout =
375       to_panvk_descriptor_set_layout(playout->set_layouts[info->set]);
376 
377    struct panvk_descriptor_set *push_set =
378       cmd_get_push_desc_set(&cmd->vk, desc, info->set);
379    if (!push_set)
380       return;
381 
382    push_set->layout = set_layout;
383    push_set->desc_count = set_layout->desc_count;
384 
385    for (uint32_t i = 0; i < info->descriptorWriteCount; i++)
386       panvk_per_arch(descriptor_set_write)(push_set,
387                                            &info->pDescriptorWrites[i], true);
388 
389    push_set->descs.dev = 0;
390    push_set->layout = NULL;
391 }
392 
393 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSet2KHR)394 panvk_per_arch(CmdPushDescriptorSet2KHR)(
395    VkCommandBuffer commandBuffer,
396    const VkPushDescriptorSetInfoKHR *pPushDescriptorSetInfo)
397 {
398    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
399 
400    if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_ALL_GRAPHICS) {
401       push_desc_set_write(cmdbuf, &cmdbuf->state.gfx.desc_state,
402                           pPushDescriptorSetInfo);
403 
404       memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
405       memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
406    }
407 
408    if (pPushDescriptorSetInfo->stageFlags & VK_SHADER_STAGE_COMPUTE_BIT) {
409       push_desc_set_write(cmdbuf, &cmdbuf->state.compute.desc_state,
410                           pPushDescriptorSetInfo);
411 
412       memset(&cmdbuf->state.compute.cs.desc, 0,
413              sizeof(cmdbuf->state.compute.cs.desc));
414    }
415 }
416 
417 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)418 panvk_per_arch(CmdPushDescriptorSetWithTemplate2KHR)(
419    VkCommandBuffer commandBuffer, const VkPushDescriptorSetWithTemplateInfoKHR
420                                      *pPushDescriptorSetWithTemplateInfo)
421 {
422    VK_FROM_HANDLE(vk_descriptor_update_template, template,
423                   pPushDescriptorSetWithTemplateInfo->descriptorUpdateTemplate);
424    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
425    VK_FROM_HANDLE(vk_pipeline_layout, playout,
426                   pPushDescriptorSetWithTemplateInfo->layout);
427    const uint32_t set = pPushDescriptorSetWithTemplateInfo->set;
428    const struct panvk_descriptor_set_layout *set_layout =
429       to_panvk_descriptor_set_layout(playout->set_layouts[set]);
430    struct panvk_descriptor_state *desc_state =
431       panvk_cmd_get_desc_state(cmdbuf, template->bind_point);
432    struct panvk_descriptor_set *push_set =
433       cmd_get_push_desc_set(&cmdbuf->vk, desc_state, set);
434    if (!push_set)
435       return;
436 
437    push_set->layout = set_layout;
438    push_set->desc_count = set_layout->desc_count;
439 
440    panvk_per_arch(descriptor_set_write_template)(
441       push_set, template, pPushDescriptorSetWithTemplateInfo->pData, true);
442 
443    push_set->descs.dev = 0;
444    push_set->layout = NULL;
445 
446    if (template->bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
447       memset(&cmdbuf->state.gfx.vs.desc, 0, sizeof(cmdbuf->state.gfx.vs.desc));
448       memset(&cmdbuf->state.gfx.fs.desc, 0, sizeof(cmdbuf->state.gfx.fs.desc));
449    } else {
450       memset(&cmdbuf->state.compute.cs.desc, 0,
451              sizeof(cmdbuf->state.compute.cs.desc));
452    }
453 }
454