xref: /aosp_15_r20/external/mesa3d/src/virtio/vulkan/vn_feedback.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2022 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "vn_feedback.h"
7 
8 #include "vn_command_buffer.h"
9 #include "vn_device.h"
10 #include "vn_physical_device.h"
11 #include "vn_query_pool.h"
12 #include "vn_queue.h"
13 
14 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)15 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
16                          uint32_t mem_type_bits,
17                          VkMemoryPropertyFlags required_mem_flags)
18 {
19    u_foreach_bit(mem_type_index, mem_type_bits)
20    {
21       assert(mem_type_index < mem_props->memoryTypeCount);
22       if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
23            required_mem_flags) == required_mem_flags)
24          return mem_type_index;
25    }
26 
27    return UINT32_MAX;
28 }
29 
30 VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_fb_buf)31 vn_feedback_buffer_create(struct vn_device *dev,
32                           uint32_t size,
33                           const VkAllocationCallbacks *alloc,
34                           struct vn_feedback_buffer **out_fb_buf)
35 {
36    const bool exclusive = dev->queue_family_count == 1;
37    const VkPhysicalDeviceMemoryProperties *mem_props =
38       &dev->physical_device->memory_properties;
39    VkDevice dev_handle = vn_device_to_handle(dev);
40    VkResult result;
41 
42    struct vn_feedback_buffer *fb_buf =
43       vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN,
44                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
45    if (!fb_buf)
46       return VK_ERROR_OUT_OF_HOST_MEMORY;
47 
48    /* use concurrent to avoid explicit queue family ownership transfer for
49     * device created with queues from multiple queue families
50     */
51    const VkBufferCreateInfo buf_create_info = {
52       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
53       .size = size,
54       /* Feedback for fences and timeline semaphores will write to this buffer
55        * as a DST when signalling. Timeline semaphore feedback will also read
56        * from this buffer as a SRC to retrieve the counter value to signal.
57        */
58       .usage =
59          VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
60       .sharingMode =
61          exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
62       /* below favors the current venus protocol */
63       .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
64       .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
65    };
66    result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
67                             &fb_buf->buf_handle);
68    if (result != VK_SUCCESS)
69       goto out_free_feedback_buffer;
70 
71    struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
72    const VkMemoryRequirements *mem_req =
73       &buf->requirements.memory.memoryRequirements;
74    const uint32_t mem_type_index =
75       vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
76                                VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
77    if (mem_type_index >= mem_props->memoryTypeCount) {
78       result = VK_ERROR_INITIALIZATION_FAILED;
79       goto out_destroy_buffer;
80    }
81 
82    const VkMemoryAllocateInfo mem_alloc_info = {
83       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
84       .allocationSize = mem_req->size,
85       .memoryTypeIndex = mem_type_index,
86    };
87    result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
88                               &fb_buf->mem_handle);
89    if (result != VK_SUCCESS)
90       goto out_destroy_buffer;
91 
92    const VkBindBufferMemoryInfo bind_info = {
93       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
94       .buffer = fb_buf->buf_handle,
95       .memory = fb_buf->mem_handle,
96       .memoryOffset = 0,
97    };
98    result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
99    if (result != VK_SUCCESS)
100       goto out_free_memory;
101 
102    result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0,
103                          &fb_buf->data);
104    if (result != VK_SUCCESS)
105       goto out_free_memory;
106 
107    *out_fb_buf = fb_buf;
108 
109    return VK_SUCCESS;
110 
111 out_free_memory:
112    vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
113 
114 out_destroy_buffer:
115    vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
116 
117 out_free_feedback_buffer:
118    vk_free(alloc, fb_buf);
119 
120    return result;
121 }
122 
123 void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * fb_buf,const VkAllocationCallbacks * alloc)124 vn_feedback_buffer_destroy(struct vn_device *dev,
125                            struct vn_feedback_buffer *fb_buf,
126                            const VkAllocationCallbacks *alloc)
127 {
128    VkDevice dev_handle = vn_device_to_handle(dev);
129 
130    vn_UnmapMemory(dev_handle, fb_buf->mem_handle);
131    vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
132    vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
133    vk_free(alloc, fb_buf);
134 }
135 
136 static inline uint32_t
vn_get_feedback_buffer_alignment(struct vn_device * dev,struct vn_feedback_buffer * fb_buf)137 vn_get_feedback_buffer_alignment(struct vn_device *dev,
138                                  struct vn_feedback_buffer *fb_buf)
139 {
140    struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
141    return align(buf->requirements.memory.memoryRequirements.alignment,
142                 dev->physical_device->wa_min_fb_align);
143 }
144 
145 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)146 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
147 {
148    VN_TRACE_FUNC();
149    struct vn_feedback_buffer *fb_buf = NULL;
150    VkResult result;
151 
152    result =
153       vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf);
154    if (result != VK_SUCCESS)
155       return result;
156 
157    pool->used = 0;
158    pool->alignment = vn_get_feedback_buffer_alignment(pool->dev, fb_buf);
159 
160    list_add(&fb_buf->head, &pool->fb_bufs);
161 
162    return VK_SUCCESS;
163 }
164 
165 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)166 vn_feedback_pool_init(struct vn_device *dev,
167                       struct vn_feedback_pool *pool,
168                       uint32_t size,
169                       const VkAllocationCallbacks *alloc)
170 {
171    simple_mtx_init(&pool->mutex, mtx_plain);
172 
173    pool->dev = dev;
174    pool->alloc = alloc;
175    pool->size = size;
176    pool->used = size;
177    pool->alignment = 1;
178    list_inithead(&pool->fb_bufs);
179    list_inithead(&pool->free_slots);
180 
181    return VK_SUCCESS;
182 }
183 
184 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)185 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
186 {
187    list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
188                             head)
189       vk_free(pool->alloc, slot);
190 
191    list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs,
192                             head)
193       vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc);
194 
195    simple_mtx_destroy(&pool->mutex);
196 }
197 
198 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)199 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
200                               uint32_t size,
201                               uint32_t *out_offset)
202 {
203    /* Default values of pool->used and pool->alignment are used to trigger the
204     * initial pool grow, and will be properly initialized after that.
205     */
206    if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) {
207       VkResult result = vn_feedback_pool_grow_locked(pool);
208       if (result != VK_SUCCESS)
209          return NULL;
210 
211       assert(align(size, pool->alignment) <= pool->size - pool->used);
212    }
213 
214    *out_offset = pool->used;
215    pool->used += align(size, pool->alignment);
216 
217    return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head);
218 }
219 
220 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)221 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
222                        enum vn_feedback_type type)
223 {
224    static const uint32_t slot_size = 8;
225    struct vn_feedback_buffer *fb_buf;
226    uint32_t offset;
227    struct vn_feedback_slot *slot;
228 
229    simple_mtx_lock(&pool->mutex);
230    if (!list_is_empty(&pool->free_slots)) {
231       slot =
232          list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
233       list_del(&slot->head);
234       simple_mtx_unlock(&pool->mutex);
235 
236       slot->type = type;
237       return slot;
238    }
239 
240    slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
241                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
242    if (!slot) {
243       simple_mtx_unlock(&pool->mutex);
244       return NULL;
245    }
246 
247    fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
248    simple_mtx_unlock(&pool->mutex);
249 
250    if (!fb_buf) {
251       vk_free(pool->alloc, slot);
252       return NULL;
253    }
254 
255    slot->type = type;
256    slot->offset = offset;
257    slot->buf_handle = fb_buf->buf_handle;
258    slot->data = fb_buf->data + offset;
259 
260    return slot;
261 }
262 
263 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)264 vn_feedback_pool_free(struct vn_feedback_pool *pool,
265                       struct vn_feedback_slot *slot)
266 {
267    simple_mtx_lock(&pool->mutex);
268    list_add(&slot->head, &pool->free_slots);
269    simple_mtx_unlock(&pool->mutex);
270 }
271 
272 static inline bool
mask_is_32bit(uint64_t x)273 mask_is_32bit(uint64_t x)
274 {
275    return (x & 0xffffffff00000000) == 0;
276 }
277 
278 static void
vn_build_buffer_memory_barrier(const VkDependencyInfo * dep_info,VkBufferMemoryBarrier * barrier1,VkPipelineStageFlags * src_stage_mask,VkPipelineStageFlags * dst_stage_mask)279 vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info,
280                                VkBufferMemoryBarrier *barrier1,
281                                VkPipelineStageFlags *src_stage_mask,
282                                VkPipelineStageFlags *dst_stage_mask)
283 {
284 
285    assert(dep_info->pNext == NULL);
286    assert(dep_info->memoryBarrierCount == 0);
287    assert(dep_info->bufferMemoryBarrierCount == 1);
288    assert(dep_info->imageMemoryBarrierCount == 0);
289 
290    const VkBufferMemoryBarrier2 *barrier2 =
291       &dep_info->pBufferMemoryBarriers[0];
292    assert(barrier2->pNext == NULL);
293    assert(mask_is_32bit(barrier2->srcStageMask));
294    assert(mask_is_32bit(barrier2->srcAccessMask));
295    assert(mask_is_32bit(barrier2->dstStageMask));
296    assert(mask_is_32bit(barrier2->dstAccessMask));
297 
298    *barrier1 = (VkBufferMemoryBarrier){
299       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
300       .pNext = NULL,
301       .srcAccessMask = barrier2->srcAccessMask,
302       .dstAccessMask = barrier2->dstAccessMask,
303       .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex,
304       .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex,
305       .buffer = barrier2->buffer,
306       .offset = barrier2->offset,
307       .size = barrier2->size,
308    };
309 
310    *src_stage_mask = barrier2->srcStageMask;
311    *dst_stage_mask = barrier2->dstStageMask;
312 }
313 
314 static void
vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,const VkDependencyInfo * dep_info,bool sync2)315 vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,
316                              const VkDependencyInfo *dep_info,
317                              bool sync2)
318 {
319    if (sync2)
320       vn_CmdPipelineBarrier2(cmd_handle, dep_info);
321    else {
322       VkBufferMemoryBarrier barrier1;
323       VkPipelineStageFlags src_stage_mask;
324       VkPipelineStageFlags dst_stage_mask;
325 
326       vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask,
327                                      &dst_stage_mask);
328       vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask,
329                             dep_info->dependencyFlags, 0, NULL, 1, &barrier1,
330                             0, NULL);
331    }
332 }
333 
334 void
vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags2 src_stage_mask,VkResult status,bool sync2)335 vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,
336                              VkEvent ev_handle,
337                              VkPipelineStageFlags2 src_stage_mask,
338                              VkResult status,
339                              bool sync2)
340 {
341    /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
342     *
343     * The injection point is after the event call to avoid introducing
344     * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
345     * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
346     * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
347     * feedback signal is acceptable for the nature of VkEvent, and the event
348     * feedback cmds lifecycle is guarded by the intercepted command buffer.
349     */
350    struct vn_event *ev = vn_event_from_handle(ev_handle);
351    struct vn_feedback_slot *slot = ev->feedback_slot;
352 
353    if (!slot)
354       return;
355 
356    STATIC_ASSERT(sizeof(*slot->status) == 4);
357 
358    const VkDependencyInfo dep_before = {
359       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
360       .dependencyFlags = 0,
361       .bufferMemoryBarrierCount = 1,
362       .pBufferMemoryBarriers =
363          (VkBufferMemoryBarrier2[]){
364             {
365                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
366                .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
367                                VK_PIPELINE_STAGE_TRANSFER_BIT,
368                .srcAccessMask =
369                   VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
370                .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
371                .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
372                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
373                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
374                .buffer = slot->buf_handle,
375                .offset = slot->offset,
376                .size = 4,
377             },
378          },
379    };
380    vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2);
381 
382    vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status);
383 
384    const VkDependencyInfo dep_after = {
385       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
386       .dependencyFlags = 0,
387       .bufferMemoryBarrierCount = 1,
388       .pBufferMemoryBarriers =
389          (VkBufferMemoryBarrier2[]){
390             {
391                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
392                .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
393                .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
394                .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT,
395                .dstAccessMask =
396                   VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
397                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
398                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
399                .buffer = slot->buf_handle,
400                .offset = slot->offset,
401                .size = 4,
402             },
403          },
404    };
405    vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2);
406 }
407 
408 static inline void
vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size)409 vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,
410                                      VkBuffer buffer,
411                                      VkDeviceSize offset,
412                                      VkDeviceSize size)
413 {
414    const VkBufferMemoryBarrier buf_flush_barrier = {
415       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
416       .pNext = NULL,
417       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
418       .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
419       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
420       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
421       .buffer = buffer,
422       .offset = offset,
423       .size = size,
424    };
425    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
426                          VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
427                          &buf_flush_barrier, 0, NULL);
428 }
429 
430 static VkResult
vn_feedback_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot)431 vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
432                        struct vn_feedback_slot *dst_slot,
433                        struct vn_feedback_slot *src_slot)
434 {
435    STATIC_ASSERT(sizeof(*dst_slot->status) == 4);
436    STATIC_ASSERT(sizeof(*dst_slot->counter) == 8);
437    STATIC_ASSERT(sizeof(*src_slot->counter) == 8);
438 
439    /* slot size is 8 bytes for timeline semaphore and 4 bytes fence.
440     * src slot is non-null for timeline semaphore.
441     */
442    const VkDeviceSize buf_size = src_slot ? 8 : 4;
443 
444    static const VkCommandBufferBeginInfo begin_info = {
445       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
446       .pNext = NULL,
447       .flags = 0,
448       .pInheritanceInfo = NULL,
449    };
450    VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
451    if (result != VK_SUCCESS)
452       return result;
453 
454    static const VkMemoryBarrier mem_barrier_before = {
455       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
456       .pNext = NULL,
457       /* make pending writes available to stay close to signal op */
458       .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
459       /* no need to make all memory visible for feedback update */
460       .dstAccessMask = 0,
461    };
462 
463    const VkBufferMemoryBarrier buf_barrier_before = {
464       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
465       .pNext = NULL,
466       /* slot memory has been made available via mem_barrier_before */
467       .srcAccessMask = 0,
468       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
469       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
470       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
471       .buffer = dst_slot->buf_handle,
472       .offset = dst_slot->offset,
473       .size = buf_size,
474    };
475 
476    /* host writes for src_slots should implicitly be made visible upon
477     * QueueSubmit call */
478    vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
479                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
480                          &mem_barrier_before, 1, &buf_barrier_before, 0,
481                          NULL);
482 
483    /* If passed a src_slot, timeline semaphore feedback records a
484     * cmd to copy the counter value from the src slot to the dst slot.
485     * If src_slot is NULL, then fence feedback records a cmd to fill
486     * the dst slot with VK_SUCCESS.
487     */
488    if (src_slot) {
489       assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
490       assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
491 
492       const VkBufferCopy buffer_copy = {
493          .srcOffset = src_slot->offset,
494          .dstOffset = dst_slot->offset,
495          .size = buf_size,
496       };
497       vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle,
498                        1, &buffer_copy);
499    } else {
500       assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE);
501 
502       vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset,
503                        buf_size, VK_SUCCESS);
504    }
505 
506    vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle,
507                                         dst_slot->offset, buf_size);
508 
509    return vn_EndCommandBuffer(cmd_handle);
510 }
511 
512 struct vn_semaphore_feedback_cmd *
vn_semaphore_feedback_cmd_alloc(struct vn_device * dev,struct vn_feedback_slot * dst_slot)513 vn_semaphore_feedback_cmd_alloc(struct vn_device *dev,
514                                 struct vn_feedback_slot *dst_slot)
515 {
516    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
517    struct vn_semaphore_feedback_cmd *sfb_cmd;
518    VkCommandBuffer *cmd_handles;
519 
520    VK_MULTIALLOC(ma);
521    vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1);
522    vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles),
523                      dev->queue_family_count);
524    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
525       return NULL;
526 
527    struct vn_feedback_slot *src_slot =
528       vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE);
529    if (!src_slot) {
530       vk_free(alloc, sfb_cmd);
531       return NULL;
532    }
533 
534    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
535       VkDevice dev_handle = vn_device_to_handle(dev);
536       VkResult result =
537          vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot,
538                                src_slot, &cmd_handles[i]);
539       if (result != VK_SUCCESS) {
540          for (uint32_t j = 0; j < i; j++) {
541             vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j],
542                                  cmd_handles[j]);
543          }
544 
545          vn_feedback_pool_free(&dev->feedback_pool, src_slot);
546          vk_free(alloc, sfb_cmd);
547          return NULL;
548       }
549    }
550 
551    sfb_cmd->cmd_handles = cmd_handles;
552    sfb_cmd->src_slot = src_slot;
553    return sfb_cmd;
554 }
555 
556 void
vn_semaphore_feedback_cmd_free(struct vn_device * dev,struct vn_semaphore_feedback_cmd * sfb_cmd)557 vn_semaphore_feedback_cmd_free(struct vn_device *dev,
558                                struct vn_semaphore_feedback_cmd *sfb_cmd)
559 {
560    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
561 
562    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
563       vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i],
564                            sfb_cmd->cmd_handles[i]);
565    }
566 
567    vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot);
568    vk_free(alloc, sfb_cmd);
569 }
570 
571 static void
vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle,VkQueryPool pool_handle,uint32_t query,uint32_t count,bool copy)572 vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle,
573                                       VkQueryPool pool_handle,
574                                       uint32_t query,
575                                       uint32_t count,
576                                       bool copy)
577 {
578    struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle);
579    assert(pool->fb_buf);
580 
581    /* Results are always 64 bit and include availability bit (also 64 bit) */
582    const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8;
583    const VkDeviceSize offset = slot_size * query;
584    const VkDeviceSize buf_size = slot_size * count;
585 
586    /* The first synchronization scope of vkCmdCopyQueryPoolResults does not
587     * include the query feedback buffer. Insert a barrier to ensure ordering
588     * against feedback buffer fill cmd injected in vkCmdResetQueryPool.
589     *
590     * The second synchronization scope of vkCmdResetQueryPool does not include
591     * the query feedback buffer. Insert a barrer to ensure ordering against
592     * prior cmds referencing the queries.
593     *
594     * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the
595     * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is
596     * done implicitly via queue submission.
597     */
598    const VkPipelineStageFlags src_stage_mask =
599       copy ? VK_PIPELINE_STAGE_TRANSFER_BIT
600            : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
601 
602    const VkBufferMemoryBarrier buf_barrier_before = {
603       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
604       .pNext = NULL,
605       .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
606       .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
607       .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608       .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
609       .buffer = pool->fb_buf->buf_handle,
610       .offset = offset,
611       .size = buf_size,
612    };
613    vn_CmdPipelineBarrier(cmd_handle, src_stage_mask,
614                          VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
615                          &buf_barrier_before, 0, NULL);
616 
617    if (copy) {
618       /* Per spec: "The first synchronization scope includes all commands
619        * which reference the queries in queryPool indicated by query that
620        * occur earlier in submission order. If flags does not include
621        * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT,
622        * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are
623        * excluded from this scope."
624        *
625        * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after
626        * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available.
627        *
628        * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app
629        * requested that.
630        *
631        * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer
632        * operation, and its writes to buffer memory must be synchronized using
633        * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT
634        * before using the results."
635        *
636        * So we can reuse the flush barrier after this copy cmd.
637        */
638       vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count,
639                                  pool->fb_buf->buf_handle, offset, slot_size,
640                                  VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
641                                     VK_QUERY_RESULT_64_BIT |
642                                     VK_QUERY_RESULT_WAIT_BIT);
643    } else {
644       vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size,
645                        0);
646    }
647 
648    vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle,
649                                         offset, buf_size);
650 }
651 
652 static VkResult
vn_query_feedback_cmd_record(VkDevice dev_handle,struct list_head * query_records,struct vn_query_feedback_cmd * qfb_cmd)653 vn_query_feedback_cmd_record(VkDevice dev_handle,
654                              struct list_head *query_records,
655                              struct vn_query_feedback_cmd *qfb_cmd)
656 {
657    assert(!list_is_empty(query_records));
658 
659    static const VkCommandBufferBeginInfo begin_info = {
660       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
661    };
662    VkResult result = vn_BeginCommandBuffer(qfb_cmd->cmd_handle, &begin_info);
663    if (result != VK_SUCCESS)
664       return result;
665 
666    list_for_each_entry_safe(struct vn_cmd_query_record, record, query_records,
667                             head) {
668       vn_query_feedback_cmd_record_internal(
669          qfb_cmd->cmd_handle, vn_query_pool_to_handle(record->query_pool),
670          record->query, record->query_count, record->copy);
671    }
672 
673    return vn_EndCommandBuffer(qfb_cmd->cmd_handle);
674 }
675 
676 VkResult
vn_query_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct list_head * query_records,struct vn_query_feedback_cmd ** out_qfb_cmd)677 vn_query_feedback_cmd_alloc(VkDevice dev_handle,
678                             struct vn_feedback_cmd_pool *fb_cmd_pool,
679                             struct list_head *query_records,
680                             struct vn_query_feedback_cmd **out_qfb_cmd)
681 {
682    struct vn_query_feedback_cmd *qfb_cmd;
683    VkResult result;
684 
685    simple_mtx_lock(&fb_cmd_pool->mutex);
686 
687    if (list_is_empty(&fb_cmd_pool->free_qfb_cmds)) {
688       struct vn_command_pool *cmd_pool =
689          vn_command_pool_from_handle(fb_cmd_pool->pool_handle);
690 
691       qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd),
692                          VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
693       if (!qfb_cmd) {
694          result = VK_ERROR_OUT_OF_HOST_MEMORY;
695          goto out_unlock;
696       }
697 
698       const VkCommandBufferAllocateInfo info = {
699          .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
700          .commandPool = fb_cmd_pool->pool_handle,
701          .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
702          .commandBufferCount = 1,
703       };
704       VkCommandBuffer qfb_cmd_handle;
705       result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle);
706       if (result != VK_SUCCESS) {
707          vk_free(&cmd_pool->allocator, qfb_cmd);
708          goto out_unlock;
709       }
710 
711       qfb_cmd->fb_cmd_pool = fb_cmd_pool;
712       qfb_cmd->cmd_handle = qfb_cmd_handle;
713    } else {
714       qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds,
715                                  struct vn_query_feedback_cmd, head);
716       list_del(&qfb_cmd->head);
717       vn_ResetCommandBuffer(qfb_cmd->cmd_handle, 0);
718    }
719 
720    result = vn_query_feedback_cmd_record(dev_handle, query_records, qfb_cmd);
721    if (result != VK_SUCCESS) {
722       list_add(&qfb_cmd->head, &fb_cmd_pool->free_qfb_cmds);
723       goto out_unlock;
724    }
725 
726    *out_qfb_cmd = qfb_cmd;
727 
728 out_unlock:
729    simple_mtx_unlock(&fb_cmd_pool->mutex);
730 
731    return result;
732 }
733 
734 void
vn_query_feedback_cmd_free(struct vn_query_feedback_cmd * qfb_cmd)735 vn_query_feedback_cmd_free(struct vn_query_feedback_cmd *qfb_cmd)
736 {
737    simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
738    list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds);
739    simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
740 }
741 
742 VkResult
vn_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot,VkCommandBuffer * out_cmd_handle)743 vn_feedback_cmd_alloc(VkDevice dev_handle,
744                       struct vn_feedback_cmd_pool *fb_cmd_pool,
745                       struct vn_feedback_slot *dst_slot,
746                       struct vn_feedback_slot *src_slot,
747                       VkCommandBuffer *out_cmd_handle)
748 {
749    VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
750    const VkCommandBufferAllocateInfo info = {
751       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
752       .pNext = NULL,
753       .commandPool = cmd_pool_handle,
754       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
755       .commandBufferCount = 1,
756    };
757    VkCommandBuffer cmd_handle;
758    VkResult result;
759 
760    simple_mtx_lock(&fb_cmd_pool->mutex);
761    result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
762    if (result != VK_SUCCESS)
763       goto out_unlock;
764 
765    result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot);
766    if (result != VK_SUCCESS) {
767       vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle);
768       goto out_unlock;
769    }
770 
771    *out_cmd_handle = cmd_handle;
772 
773 out_unlock:
774    simple_mtx_unlock(&fb_cmd_pool->mutex);
775 
776    return result;
777 }
778 
779 void
vn_feedback_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,VkCommandBuffer cmd_handle)780 vn_feedback_cmd_free(VkDevice dev_handle,
781                      struct vn_feedback_cmd_pool *fb_cmd_pool,
782                      VkCommandBuffer cmd_handle)
783 {
784    simple_mtx_lock(&fb_cmd_pool->mutex);
785    vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1,
786                          &cmd_handle);
787    simple_mtx_unlock(&fb_cmd_pool->mutex);
788 }
789 
790 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)791 vn_feedback_cmd_pools_init(struct vn_device *dev)
792 {
793    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
794    VkDevice dev_handle = vn_device_to_handle(dev);
795    struct vn_feedback_cmd_pool *fb_cmd_pools;
796    VkCommandPoolCreateInfo info = {
797       .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
798       .pNext = NULL,
799       .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
800    };
801 
802    if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) &&
803        VN_PERF(NO_QUERY_FEEDBACK))
804       return VK_SUCCESS;
805 
806    assert(dev->queue_family_count);
807 
808    fb_cmd_pools =
809       vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count,
810                 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
811    if (!fb_cmd_pools)
812       return VK_ERROR_OUT_OF_HOST_MEMORY;
813 
814    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
815       VkResult result;
816 
817       info.queueFamilyIndex = dev->queue_families[i];
818       result = vn_CreateCommandPool(dev_handle, &info, alloc,
819                                     &fb_cmd_pools[i].pool_handle);
820       if (result != VK_SUCCESS) {
821          for (uint32_t j = 0; j < i; j++) {
822             vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle,
823                                   alloc);
824             simple_mtx_destroy(&fb_cmd_pools[j].mutex);
825          }
826 
827          vk_free(alloc, fb_cmd_pools);
828          return result;
829       }
830 
831       simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain);
832       list_inithead(&fb_cmd_pools[i].free_qfb_cmds);
833    }
834 
835    dev->fb_cmd_pools = fb_cmd_pools;
836 
837    return VK_SUCCESS;
838 }
839 
840 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)841 vn_feedback_cmd_pools_fini(struct vn_device *dev)
842 {
843    const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
844    VkDevice dev_handle = vn_device_to_handle(dev);
845 
846    if (!dev->fb_cmd_pools)
847       return;
848 
849    for (uint32_t i = 0; i < dev->queue_family_count; i++) {
850       list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd,
851                                &dev->fb_cmd_pools[i].free_qfb_cmds, head)
852          vk_free(alloc, feedback_cmd);
853 
854       vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle,
855                             alloc);
856       simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex);
857    }
858 
859    vk_free(alloc, dev->fb_cmd_pools);
860 }
861