1 /*
2 * Copyright 2022 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "vn_feedback.h"
7
8 #include "vn_command_buffer.h"
9 #include "vn_device.h"
10 #include "vn_physical_device.h"
11 #include "vn_query_pool.h"
12 #include "vn_queue.h"
13
14 static uint32_t
vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties * mem_props,uint32_t mem_type_bits,VkMemoryPropertyFlags required_mem_flags)15 vn_get_memory_type_index(const VkPhysicalDeviceMemoryProperties *mem_props,
16 uint32_t mem_type_bits,
17 VkMemoryPropertyFlags required_mem_flags)
18 {
19 u_foreach_bit(mem_type_index, mem_type_bits)
20 {
21 assert(mem_type_index < mem_props->memoryTypeCount);
22 if ((mem_props->memoryTypes[mem_type_index].propertyFlags &
23 required_mem_flags) == required_mem_flags)
24 return mem_type_index;
25 }
26
27 return UINT32_MAX;
28 }
29
30 VkResult
vn_feedback_buffer_create(struct vn_device * dev,uint32_t size,const VkAllocationCallbacks * alloc,struct vn_feedback_buffer ** out_fb_buf)31 vn_feedback_buffer_create(struct vn_device *dev,
32 uint32_t size,
33 const VkAllocationCallbacks *alloc,
34 struct vn_feedback_buffer **out_fb_buf)
35 {
36 const bool exclusive = dev->queue_family_count == 1;
37 const VkPhysicalDeviceMemoryProperties *mem_props =
38 &dev->physical_device->memory_properties;
39 VkDevice dev_handle = vn_device_to_handle(dev);
40 VkResult result;
41
42 struct vn_feedback_buffer *fb_buf =
43 vk_zalloc(alloc, sizeof(*fb_buf), VN_DEFAULT_ALIGN,
44 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
45 if (!fb_buf)
46 return VK_ERROR_OUT_OF_HOST_MEMORY;
47
48 /* use concurrent to avoid explicit queue family ownership transfer for
49 * device created with queues from multiple queue families
50 */
51 const VkBufferCreateInfo buf_create_info = {
52 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
53 .size = size,
54 /* Feedback for fences and timeline semaphores will write to this buffer
55 * as a DST when signalling. Timeline semaphore feedback will also read
56 * from this buffer as a SRC to retrieve the counter value to signal.
57 */
58 .usage =
59 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
60 .sharingMode =
61 exclusive ? VK_SHARING_MODE_EXCLUSIVE : VK_SHARING_MODE_CONCURRENT,
62 /* below favors the current venus protocol */
63 .queueFamilyIndexCount = exclusive ? 0 : dev->queue_family_count,
64 .pQueueFamilyIndices = exclusive ? NULL : dev->queue_families,
65 };
66 result = vn_CreateBuffer(dev_handle, &buf_create_info, alloc,
67 &fb_buf->buf_handle);
68 if (result != VK_SUCCESS)
69 goto out_free_feedback_buffer;
70
71 struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
72 const VkMemoryRequirements *mem_req =
73 &buf->requirements.memory.memoryRequirements;
74 const uint32_t mem_type_index =
75 vn_get_memory_type_index(mem_props, mem_req->memoryTypeBits,
76 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
77 if (mem_type_index >= mem_props->memoryTypeCount) {
78 result = VK_ERROR_INITIALIZATION_FAILED;
79 goto out_destroy_buffer;
80 }
81
82 const VkMemoryAllocateInfo mem_alloc_info = {
83 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
84 .allocationSize = mem_req->size,
85 .memoryTypeIndex = mem_type_index,
86 };
87 result = vn_AllocateMemory(dev_handle, &mem_alloc_info, alloc,
88 &fb_buf->mem_handle);
89 if (result != VK_SUCCESS)
90 goto out_destroy_buffer;
91
92 const VkBindBufferMemoryInfo bind_info = {
93 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
94 .buffer = fb_buf->buf_handle,
95 .memory = fb_buf->mem_handle,
96 .memoryOffset = 0,
97 };
98 result = vn_BindBufferMemory2(dev_handle, 1, &bind_info);
99 if (result != VK_SUCCESS)
100 goto out_free_memory;
101
102 result = vn_MapMemory(dev_handle, fb_buf->mem_handle, 0, VK_WHOLE_SIZE, 0,
103 &fb_buf->data);
104 if (result != VK_SUCCESS)
105 goto out_free_memory;
106
107 *out_fb_buf = fb_buf;
108
109 return VK_SUCCESS;
110
111 out_free_memory:
112 vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
113
114 out_destroy_buffer:
115 vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
116
117 out_free_feedback_buffer:
118 vk_free(alloc, fb_buf);
119
120 return result;
121 }
122
123 void
vn_feedback_buffer_destroy(struct vn_device * dev,struct vn_feedback_buffer * fb_buf,const VkAllocationCallbacks * alloc)124 vn_feedback_buffer_destroy(struct vn_device *dev,
125 struct vn_feedback_buffer *fb_buf,
126 const VkAllocationCallbacks *alloc)
127 {
128 VkDevice dev_handle = vn_device_to_handle(dev);
129
130 vn_UnmapMemory(dev_handle, fb_buf->mem_handle);
131 vn_FreeMemory(dev_handle, fb_buf->mem_handle, alloc);
132 vn_DestroyBuffer(dev_handle, fb_buf->buf_handle, alloc);
133 vk_free(alloc, fb_buf);
134 }
135
136 static inline uint32_t
vn_get_feedback_buffer_alignment(struct vn_device * dev,struct vn_feedback_buffer * fb_buf)137 vn_get_feedback_buffer_alignment(struct vn_device *dev,
138 struct vn_feedback_buffer *fb_buf)
139 {
140 struct vn_buffer *buf = vn_buffer_from_handle(fb_buf->buf_handle);
141 return align(buf->requirements.memory.memoryRequirements.alignment,
142 dev->physical_device->wa_min_fb_align);
143 }
144
145 static VkResult
vn_feedback_pool_grow_locked(struct vn_feedback_pool * pool)146 vn_feedback_pool_grow_locked(struct vn_feedback_pool *pool)
147 {
148 VN_TRACE_FUNC();
149 struct vn_feedback_buffer *fb_buf = NULL;
150 VkResult result;
151
152 result =
153 vn_feedback_buffer_create(pool->dev, pool->size, pool->alloc, &fb_buf);
154 if (result != VK_SUCCESS)
155 return result;
156
157 pool->used = 0;
158 pool->alignment = vn_get_feedback_buffer_alignment(pool->dev, fb_buf);
159
160 list_add(&fb_buf->head, &pool->fb_bufs);
161
162 return VK_SUCCESS;
163 }
164
165 VkResult
vn_feedback_pool_init(struct vn_device * dev,struct vn_feedback_pool * pool,uint32_t size,const VkAllocationCallbacks * alloc)166 vn_feedback_pool_init(struct vn_device *dev,
167 struct vn_feedback_pool *pool,
168 uint32_t size,
169 const VkAllocationCallbacks *alloc)
170 {
171 simple_mtx_init(&pool->mutex, mtx_plain);
172
173 pool->dev = dev;
174 pool->alloc = alloc;
175 pool->size = size;
176 pool->used = size;
177 pool->alignment = 1;
178 list_inithead(&pool->fb_bufs);
179 list_inithead(&pool->free_slots);
180
181 return VK_SUCCESS;
182 }
183
184 void
vn_feedback_pool_fini(struct vn_feedback_pool * pool)185 vn_feedback_pool_fini(struct vn_feedback_pool *pool)
186 {
187 list_for_each_entry_safe(struct vn_feedback_slot, slot, &pool->free_slots,
188 head)
189 vk_free(pool->alloc, slot);
190
191 list_for_each_entry_safe(struct vn_feedback_buffer, fb_buf, &pool->fb_bufs,
192 head)
193 vn_feedback_buffer_destroy(pool->dev, fb_buf, pool->alloc);
194
195 simple_mtx_destroy(&pool->mutex);
196 }
197
198 static struct vn_feedback_buffer *
vn_feedback_pool_alloc_locked(struct vn_feedback_pool * pool,uint32_t size,uint32_t * out_offset)199 vn_feedback_pool_alloc_locked(struct vn_feedback_pool *pool,
200 uint32_t size,
201 uint32_t *out_offset)
202 {
203 /* Default values of pool->used and pool->alignment are used to trigger the
204 * initial pool grow, and will be properly initialized after that.
205 */
206 if (unlikely(align(size, pool->alignment) > pool->size - pool->used)) {
207 VkResult result = vn_feedback_pool_grow_locked(pool);
208 if (result != VK_SUCCESS)
209 return NULL;
210
211 assert(align(size, pool->alignment) <= pool->size - pool->used);
212 }
213
214 *out_offset = pool->used;
215 pool->used += align(size, pool->alignment);
216
217 return list_first_entry(&pool->fb_bufs, struct vn_feedback_buffer, head);
218 }
219
220 struct vn_feedback_slot *
vn_feedback_pool_alloc(struct vn_feedback_pool * pool,enum vn_feedback_type type)221 vn_feedback_pool_alloc(struct vn_feedback_pool *pool,
222 enum vn_feedback_type type)
223 {
224 static const uint32_t slot_size = 8;
225 struct vn_feedback_buffer *fb_buf;
226 uint32_t offset;
227 struct vn_feedback_slot *slot;
228
229 simple_mtx_lock(&pool->mutex);
230 if (!list_is_empty(&pool->free_slots)) {
231 slot =
232 list_first_entry(&pool->free_slots, struct vn_feedback_slot, head);
233 list_del(&slot->head);
234 simple_mtx_unlock(&pool->mutex);
235
236 slot->type = type;
237 return slot;
238 }
239
240 slot = vk_alloc(pool->alloc, sizeof(*slot), VN_DEFAULT_ALIGN,
241 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
242 if (!slot) {
243 simple_mtx_unlock(&pool->mutex);
244 return NULL;
245 }
246
247 fb_buf = vn_feedback_pool_alloc_locked(pool, slot_size, &offset);
248 simple_mtx_unlock(&pool->mutex);
249
250 if (!fb_buf) {
251 vk_free(pool->alloc, slot);
252 return NULL;
253 }
254
255 slot->type = type;
256 slot->offset = offset;
257 slot->buf_handle = fb_buf->buf_handle;
258 slot->data = fb_buf->data + offset;
259
260 return slot;
261 }
262
263 void
vn_feedback_pool_free(struct vn_feedback_pool * pool,struct vn_feedback_slot * slot)264 vn_feedback_pool_free(struct vn_feedback_pool *pool,
265 struct vn_feedback_slot *slot)
266 {
267 simple_mtx_lock(&pool->mutex);
268 list_add(&slot->head, &pool->free_slots);
269 simple_mtx_unlock(&pool->mutex);
270 }
271
272 static inline bool
mask_is_32bit(uint64_t x)273 mask_is_32bit(uint64_t x)
274 {
275 return (x & 0xffffffff00000000) == 0;
276 }
277
278 static void
vn_build_buffer_memory_barrier(const VkDependencyInfo * dep_info,VkBufferMemoryBarrier * barrier1,VkPipelineStageFlags * src_stage_mask,VkPipelineStageFlags * dst_stage_mask)279 vn_build_buffer_memory_barrier(const VkDependencyInfo *dep_info,
280 VkBufferMemoryBarrier *barrier1,
281 VkPipelineStageFlags *src_stage_mask,
282 VkPipelineStageFlags *dst_stage_mask)
283 {
284
285 assert(dep_info->pNext == NULL);
286 assert(dep_info->memoryBarrierCount == 0);
287 assert(dep_info->bufferMemoryBarrierCount == 1);
288 assert(dep_info->imageMemoryBarrierCount == 0);
289
290 const VkBufferMemoryBarrier2 *barrier2 =
291 &dep_info->pBufferMemoryBarriers[0];
292 assert(barrier2->pNext == NULL);
293 assert(mask_is_32bit(barrier2->srcStageMask));
294 assert(mask_is_32bit(barrier2->srcAccessMask));
295 assert(mask_is_32bit(barrier2->dstStageMask));
296 assert(mask_is_32bit(barrier2->dstAccessMask));
297
298 *barrier1 = (VkBufferMemoryBarrier){
299 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
300 .pNext = NULL,
301 .srcAccessMask = barrier2->srcAccessMask,
302 .dstAccessMask = barrier2->dstAccessMask,
303 .srcQueueFamilyIndex = barrier2->srcQueueFamilyIndex,
304 .dstQueueFamilyIndex = barrier2->dstQueueFamilyIndex,
305 .buffer = barrier2->buffer,
306 .offset = barrier2->offset,
307 .size = barrier2->size,
308 };
309
310 *src_stage_mask = barrier2->srcStageMask;
311 *dst_stage_mask = barrier2->dstStageMask;
312 }
313
314 static void
vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,const VkDependencyInfo * dep_info,bool sync2)315 vn_cmd_buffer_memory_barrier(VkCommandBuffer cmd_handle,
316 const VkDependencyInfo *dep_info,
317 bool sync2)
318 {
319 if (sync2)
320 vn_CmdPipelineBarrier2(cmd_handle, dep_info);
321 else {
322 VkBufferMemoryBarrier barrier1;
323 VkPipelineStageFlags src_stage_mask;
324 VkPipelineStageFlags dst_stage_mask;
325
326 vn_build_buffer_memory_barrier(dep_info, &barrier1, &src_stage_mask,
327 &dst_stage_mask);
328 vn_CmdPipelineBarrier(cmd_handle, src_stage_mask, dst_stage_mask,
329 dep_info->dependencyFlags, 0, NULL, 1, &barrier1,
330 0, NULL);
331 }
332 }
333
334 void
vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,VkEvent ev_handle,VkPipelineStageFlags2 src_stage_mask,VkResult status,bool sync2)335 vn_event_feedback_cmd_record(VkCommandBuffer cmd_handle,
336 VkEvent ev_handle,
337 VkPipelineStageFlags2 src_stage_mask,
338 VkResult status,
339 bool sync2)
340 {
341 /* For vkCmdSetEvent and vkCmdResetEvent feedback interception.
342 *
343 * The injection point is after the event call to avoid introducing
344 * unexpected src stage waiting for VK_PIPELINE_STAGE_HOST_BIT and
345 * VK_PIPELINE_STAGE_TRANSFER_BIT if they are not already being waited by
346 * vkCmdSetEvent or vkCmdResetEvent. On the other hand, the delay in the
347 * feedback signal is acceptable for the nature of VkEvent, and the event
348 * feedback cmds lifecycle is guarded by the intercepted command buffer.
349 */
350 struct vn_event *ev = vn_event_from_handle(ev_handle);
351 struct vn_feedback_slot *slot = ev->feedback_slot;
352
353 if (!slot)
354 return;
355
356 STATIC_ASSERT(sizeof(*slot->status) == 4);
357
358 const VkDependencyInfo dep_before = {
359 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
360 .dependencyFlags = 0,
361 .bufferMemoryBarrierCount = 1,
362 .pBufferMemoryBarriers =
363 (VkBufferMemoryBarrier2[]){
364 {
365 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
366 .srcStageMask = src_stage_mask | VK_PIPELINE_STAGE_HOST_BIT |
367 VK_PIPELINE_STAGE_TRANSFER_BIT,
368 .srcAccessMask =
369 VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT,
370 .dstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
371 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
372 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
373 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
374 .buffer = slot->buf_handle,
375 .offset = slot->offset,
376 .size = 4,
377 },
378 },
379 };
380 vn_cmd_buffer_memory_barrier(cmd_handle, &dep_before, sync2);
381
382 vn_CmdFillBuffer(cmd_handle, slot->buf_handle, slot->offset, 4, status);
383
384 const VkDependencyInfo dep_after = {
385 .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
386 .dependencyFlags = 0,
387 .bufferMemoryBarrierCount = 1,
388 .pBufferMemoryBarriers =
389 (VkBufferMemoryBarrier2[]){
390 {
391 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
392 .srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT,
393 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
394 .dstStageMask = VK_PIPELINE_STAGE_HOST_BIT,
395 .dstAccessMask =
396 VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
397 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
398 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
399 .buffer = slot->buf_handle,
400 .offset = slot->offset,
401 .size = 4,
402 },
403 },
404 };
405 vn_cmd_buffer_memory_barrier(cmd_handle, &dep_after, sync2);
406 }
407
408 static inline void
vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,VkBuffer buffer,VkDeviceSize offset,VkDeviceSize size)409 vn_feedback_cmd_record_flush_barrier(VkCommandBuffer cmd_handle,
410 VkBuffer buffer,
411 VkDeviceSize offset,
412 VkDeviceSize size)
413 {
414 const VkBufferMemoryBarrier buf_flush_barrier = {
415 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
416 .pNext = NULL,
417 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
418 .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_HOST_WRITE_BIT,
419 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
420 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
421 .buffer = buffer,
422 .offset = offset,
423 .size = size,
424 };
425 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_TRANSFER_BIT,
426 VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
427 &buf_flush_barrier, 0, NULL);
428 }
429
430 static VkResult
vn_feedback_cmd_record(VkCommandBuffer cmd_handle,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot)431 vn_feedback_cmd_record(VkCommandBuffer cmd_handle,
432 struct vn_feedback_slot *dst_slot,
433 struct vn_feedback_slot *src_slot)
434 {
435 STATIC_ASSERT(sizeof(*dst_slot->status) == 4);
436 STATIC_ASSERT(sizeof(*dst_slot->counter) == 8);
437 STATIC_ASSERT(sizeof(*src_slot->counter) == 8);
438
439 /* slot size is 8 bytes for timeline semaphore and 4 bytes fence.
440 * src slot is non-null for timeline semaphore.
441 */
442 const VkDeviceSize buf_size = src_slot ? 8 : 4;
443
444 static const VkCommandBufferBeginInfo begin_info = {
445 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
446 .pNext = NULL,
447 .flags = 0,
448 .pInheritanceInfo = NULL,
449 };
450 VkResult result = vn_BeginCommandBuffer(cmd_handle, &begin_info);
451 if (result != VK_SUCCESS)
452 return result;
453
454 static const VkMemoryBarrier mem_barrier_before = {
455 .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
456 .pNext = NULL,
457 /* make pending writes available to stay close to signal op */
458 .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
459 /* no need to make all memory visible for feedback update */
460 .dstAccessMask = 0,
461 };
462
463 const VkBufferMemoryBarrier buf_barrier_before = {
464 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
465 .pNext = NULL,
466 /* slot memory has been made available via mem_barrier_before */
467 .srcAccessMask = 0,
468 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
469 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
470 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
471 .buffer = dst_slot->buf_handle,
472 .offset = dst_slot->offset,
473 .size = buf_size,
474 };
475
476 /* host writes for src_slots should implicitly be made visible upon
477 * QueueSubmit call */
478 vn_CmdPipelineBarrier(cmd_handle, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
479 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 1,
480 &mem_barrier_before, 1, &buf_barrier_before, 0,
481 NULL);
482
483 /* If passed a src_slot, timeline semaphore feedback records a
484 * cmd to copy the counter value from the src slot to the dst slot.
485 * If src_slot is NULL, then fence feedback records a cmd to fill
486 * the dst slot with VK_SUCCESS.
487 */
488 if (src_slot) {
489 assert(src_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
490 assert(dst_slot->type == VN_FEEDBACK_TYPE_SEMAPHORE);
491
492 const VkBufferCopy buffer_copy = {
493 .srcOffset = src_slot->offset,
494 .dstOffset = dst_slot->offset,
495 .size = buf_size,
496 };
497 vn_CmdCopyBuffer(cmd_handle, src_slot->buf_handle, dst_slot->buf_handle,
498 1, &buffer_copy);
499 } else {
500 assert(dst_slot->type == VN_FEEDBACK_TYPE_FENCE);
501
502 vn_CmdFillBuffer(cmd_handle, dst_slot->buf_handle, dst_slot->offset,
503 buf_size, VK_SUCCESS);
504 }
505
506 vn_feedback_cmd_record_flush_barrier(cmd_handle, dst_slot->buf_handle,
507 dst_slot->offset, buf_size);
508
509 return vn_EndCommandBuffer(cmd_handle);
510 }
511
512 struct vn_semaphore_feedback_cmd *
vn_semaphore_feedback_cmd_alloc(struct vn_device * dev,struct vn_feedback_slot * dst_slot)513 vn_semaphore_feedback_cmd_alloc(struct vn_device *dev,
514 struct vn_feedback_slot *dst_slot)
515 {
516 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
517 struct vn_semaphore_feedback_cmd *sfb_cmd;
518 VkCommandBuffer *cmd_handles;
519
520 VK_MULTIALLOC(ma);
521 vk_multialloc_add(&ma, &sfb_cmd, __typeof__(*sfb_cmd), 1);
522 vk_multialloc_add(&ma, &cmd_handles, __typeof__(*cmd_handles),
523 dev->queue_family_count);
524 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
525 return NULL;
526
527 struct vn_feedback_slot *src_slot =
528 vn_feedback_pool_alloc(&dev->feedback_pool, VN_FEEDBACK_TYPE_SEMAPHORE);
529 if (!src_slot) {
530 vk_free(alloc, sfb_cmd);
531 return NULL;
532 }
533
534 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
535 VkDevice dev_handle = vn_device_to_handle(dev);
536 VkResult result =
537 vn_feedback_cmd_alloc(dev_handle, &dev->fb_cmd_pools[i], dst_slot,
538 src_slot, &cmd_handles[i]);
539 if (result != VK_SUCCESS) {
540 for (uint32_t j = 0; j < i; j++) {
541 vn_feedback_cmd_free(dev_handle, &dev->fb_cmd_pools[j],
542 cmd_handles[j]);
543 }
544
545 vn_feedback_pool_free(&dev->feedback_pool, src_slot);
546 vk_free(alloc, sfb_cmd);
547 return NULL;
548 }
549 }
550
551 sfb_cmd->cmd_handles = cmd_handles;
552 sfb_cmd->src_slot = src_slot;
553 return sfb_cmd;
554 }
555
556 void
vn_semaphore_feedback_cmd_free(struct vn_device * dev,struct vn_semaphore_feedback_cmd * sfb_cmd)557 vn_semaphore_feedback_cmd_free(struct vn_device *dev,
558 struct vn_semaphore_feedback_cmd *sfb_cmd)
559 {
560 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
561
562 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
563 vn_feedback_cmd_free(vn_device_to_handle(dev), &dev->fb_cmd_pools[i],
564 sfb_cmd->cmd_handles[i]);
565 }
566
567 vn_feedback_pool_free(&dev->feedback_pool, sfb_cmd->src_slot);
568 vk_free(alloc, sfb_cmd);
569 }
570
571 static void
vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle,VkQueryPool pool_handle,uint32_t query,uint32_t count,bool copy)572 vn_query_feedback_cmd_record_internal(VkCommandBuffer cmd_handle,
573 VkQueryPool pool_handle,
574 uint32_t query,
575 uint32_t count,
576 bool copy)
577 {
578 struct vn_query_pool *pool = vn_query_pool_from_handle(pool_handle);
579 assert(pool->fb_buf);
580
581 /* Results are always 64 bit and include availability bit (also 64 bit) */
582 const VkDeviceSize slot_size = (pool->result_array_size * 8) + 8;
583 const VkDeviceSize offset = slot_size * query;
584 const VkDeviceSize buf_size = slot_size * count;
585
586 /* The first synchronization scope of vkCmdCopyQueryPoolResults does not
587 * include the query feedback buffer. Insert a barrier to ensure ordering
588 * against feedback buffer fill cmd injected in vkCmdResetQueryPool.
589 *
590 * The second synchronization scope of vkCmdResetQueryPool does not include
591 * the query feedback buffer. Insert a barrer to ensure ordering against
592 * prior cmds referencing the queries.
593 *
594 * For srcAccessMask, VK_ACCESS_TRANSFER_WRITE_BIT is sufficient since the
595 * gpu cache invalidation for feedback buffer fill in vkResetQueryPool is
596 * done implicitly via queue submission.
597 */
598 const VkPipelineStageFlags src_stage_mask =
599 copy ? VK_PIPELINE_STAGE_TRANSFER_BIT
600 : VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
601
602 const VkBufferMemoryBarrier buf_barrier_before = {
603 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
604 .pNext = NULL,
605 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
606 .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
607 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
608 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
609 .buffer = pool->fb_buf->buf_handle,
610 .offset = offset,
611 .size = buf_size,
612 };
613 vn_CmdPipelineBarrier(cmd_handle, src_stage_mask,
614 VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
615 &buf_barrier_before, 0, NULL);
616
617 if (copy) {
618 /* Per spec: "The first synchronization scope includes all commands
619 * which reference the queries in queryPool indicated by query that
620 * occur earlier in submission order. If flags does not include
621 * VK_QUERY_RESULT_WAIT_BIT, vkCmdEndQueryIndexedEXT,
622 * vkCmdWriteTimestamp2, vkCmdEndQuery, and vkCmdWriteTimestamp are
623 * excluded from this scope."
624 *
625 * Set VK_QUERY_RESULT_WAIT_BIT to ensure ordering after
626 * vkCmdEndQuery or vkCmdWriteTimestamp makes the query available.
627 *
628 * Set VK_QUERY_RESULT_64_BIT as we can convert it to 32 bit if app
629 * requested that.
630 *
631 * Per spec: "vkCmdCopyQueryPoolResults is considered to be a transfer
632 * operation, and its writes to buffer memory must be synchronized using
633 * VK_PIPELINE_STAGE_TRANSFER_BIT and VK_ACCESS_TRANSFER_WRITE_BIT
634 * before using the results."
635 *
636 * So we can reuse the flush barrier after this copy cmd.
637 */
638 vn_CmdCopyQueryPoolResults(cmd_handle, pool_handle, query, count,
639 pool->fb_buf->buf_handle, offset, slot_size,
640 VK_QUERY_RESULT_WITH_AVAILABILITY_BIT |
641 VK_QUERY_RESULT_64_BIT |
642 VK_QUERY_RESULT_WAIT_BIT);
643 } else {
644 vn_CmdFillBuffer(cmd_handle, pool->fb_buf->buf_handle, offset, buf_size,
645 0);
646 }
647
648 vn_feedback_cmd_record_flush_barrier(cmd_handle, pool->fb_buf->buf_handle,
649 offset, buf_size);
650 }
651
652 static VkResult
vn_query_feedback_cmd_record(VkDevice dev_handle,struct list_head * query_records,struct vn_query_feedback_cmd * qfb_cmd)653 vn_query_feedback_cmd_record(VkDevice dev_handle,
654 struct list_head *query_records,
655 struct vn_query_feedback_cmd *qfb_cmd)
656 {
657 assert(!list_is_empty(query_records));
658
659 static const VkCommandBufferBeginInfo begin_info = {
660 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
661 };
662 VkResult result = vn_BeginCommandBuffer(qfb_cmd->cmd_handle, &begin_info);
663 if (result != VK_SUCCESS)
664 return result;
665
666 list_for_each_entry_safe(struct vn_cmd_query_record, record, query_records,
667 head) {
668 vn_query_feedback_cmd_record_internal(
669 qfb_cmd->cmd_handle, vn_query_pool_to_handle(record->query_pool),
670 record->query, record->query_count, record->copy);
671 }
672
673 return vn_EndCommandBuffer(qfb_cmd->cmd_handle);
674 }
675
676 VkResult
vn_query_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct list_head * query_records,struct vn_query_feedback_cmd ** out_qfb_cmd)677 vn_query_feedback_cmd_alloc(VkDevice dev_handle,
678 struct vn_feedback_cmd_pool *fb_cmd_pool,
679 struct list_head *query_records,
680 struct vn_query_feedback_cmd **out_qfb_cmd)
681 {
682 struct vn_query_feedback_cmd *qfb_cmd;
683 VkResult result;
684
685 simple_mtx_lock(&fb_cmd_pool->mutex);
686
687 if (list_is_empty(&fb_cmd_pool->free_qfb_cmds)) {
688 struct vn_command_pool *cmd_pool =
689 vn_command_pool_from_handle(fb_cmd_pool->pool_handle);
690
691 qfb_cmd = vk_alloc(&cmd_pool->allocator, sizeof(*qfb_cmd),
692 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
693 if (!qfb_cmd) {
694 result = VK_ERROR_OUT_OF_HOST_MEMORY;
695 goto out_unlock;
696 }
697
698 const VkCommandBufferAllocateInfo info = {
699 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
700 .commandPool = fb_cmd_pool->pool_handle,
701 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
702 .commandBufferCount = 1,
703 };
704 VkCommandBuffer qfb_cmd_handle;
705 result = vn_AllocateCommandBuffers(dev_handle, &info, &qfb_cmd_handle);
706 if (result != VK_SUCCESS) {
707 vk_free(&cmd_pool->allocator, qfb_cmd);
708 goto out_unlock;
709 }
710
711 qfb_cmd->fb_cmd_pool = fb_cmd_pool;
712 qfb_cmd->cmd_handle = qfb_cmd_handle;
713 } else {
714 qfb_cmd = list_first_entry(&fb_cmd_pool->free_qfb_cmds,
715 struct vn_query_feedback_cmd, head);
716 list_del(&qfb_cmd->head);
717 vn_ResetCommandBuffer(qfb_cmd->cmd_handle, 0);
718 }
719
720 result = vn_query_feedback_cmd_record(dev_handle, query_records, qfb_cmd);
721 if (result != VK_SUCCESS) {
722 list_add(&qfb_cmd->head, &fb_cmd_pool->free_qfb_cmds);
723 goto out_unlock;
724 }
725
726 *out_qfb_cmd = qfb_cmd;
727
728 out_unlock:
729 simple_mtx_unlock(&fb_cmd_pool->mutex);
730
731 return result;
732 }
733
734 void
vn_query_feedback_cmd_free(struct vn_query_feedback_cmd * qfb_cmd)735 vn_query_feedback_cmd_free(struct vn_query_feedback_cmd *qfb_cmd)
736 {
737 simple_mtx_lock(&qfb_cmd->fb_cmd_pool->mutex);
738 list_add(&qfb_cmd->head, &qfb_cmd->fb_cmd_pool->free_qfb_cmds);
739 simple_mtx_unlock(&qfb_cmd->fb_cmd_pool->mutex);
740 }
741
742 VkResult
vn_feedback_cmd_alloc(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,struct vn_feedback_slot * dst_slot,struct vn_feedback_slot * src_slot,VkCommandBuffer * out_cmd_handle)743 vn_feedback_cmd_alloc(VkDevice dev_handle,
744 struct vn_feedback_cmd_pool *fb_cmd_pool,
745 struct vn_feedback_slot *dst_slot,
746 struct vn_feedback_slot *src_slot,
747 VkCommandBuffer *out_cmd_handle)
748 {
749 VkCommandPool cmd_pool_handle = fb_cmd_pool->pool_handle;
750 const VkCommandBufferAllocateInfo info = {
751 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
752 .pNext = NULL,
753 .commandPool = cmd_pool_handle,
754 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
755 .commandBufferCount = 1,
756 };
757 VkCommandBuffer cmd_handle;
758 VkResult result;
759
760 simple_mtx_lock(&fb_cmd_pool->mutex);
761 result = vn_AllocateCommandBuffers(dev_handle, &info, &cmd_handle);
762 if (result != VK_SUCCESS)
763 goto out_unlock;
764
765 result = vn_feedback_cmd_record(cmd_handle, dst_slot, src_slot);
766 if (result != VK_SUCCESS) {
767 vn_FreeCommandBuffers(dev_handle, cmd_pool_handle, 1, &cmd_handle);
768 goto out_unlock;
769 }
770
771 *out_cmd_handle = cmd_handle;
772
773 out_unlock:
774 simple_mtx_unlock(&fb_cmd_pool->mutex);
775
776 return result;
777 }
778
779 void
vn_feedback_cmd_free(VkDevice dev_handle,struct vn_feedback_cmd_pool * fb_cmd_pool,VkCommandBuffer cmd_handle)780 vn_feedback_cmd_free(VkDevice dev_handle,
781 struct vn_feedback_cmd_pool *fb_cmd_pool,
782 VkCommandBuffer cmd_handle)
783 {
784 simple_mtx_lock(&fb_cmd_pool->mutex);
785 vn_FreeCommandBuffers(dev_handle, fb_cmd_pool->pool_handle, 1,
786 &cmd_handle);
787 simple_mtx_unlock(&fb_cmd_pool->mutex);
788 }
789
790 VkResult
vn_feedback_cmd_pools_init(struct vn_device * dev)791 vn_feedback_cmd_pools_init(struct vn_device *dev)
792 {
793 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
794 VkDevice dev_handle = vn_device_to_handle(dev);
795 struct vn_feedback_cmd_pool *fb_cmd_pools;
796 VkCommandPoolCreateInfo info = {
797 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
798 .pNext = NULL,
799 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
800 };
801
802 if (VN_PERF(NO_FENCE_FEEDBACK) && VN_PERF(NO_SEMAPHORE_FEEDBACK) &&
803 VN_PERF(NO_QUERY_FEEDBACK))
804 return VK_SUCCESS;
805
806 assert(dev->queue_family_count);
807
808 fb_cmd_pools =
809 vk_zalloc(alloc, sizeof(*fb_cmd_pools) * dev->queue_family_count,
810 VN_DEFAULT_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
811 if (!fb_cmd_pools)
812 return VK_ERROR_OUT_OF_HOST_MEMORY;
813
814 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
815 VkResult result;
816
817 info.queueFamilyIndex = dev->queue_families[i];
818 result = vn_CreateCommandPool(dev_handle, &info, alloc,
819 &fb_cmd_pools[i].pool_handle);
820 if (result != VK_SUCCESS) {
821 for (uint32_t j = 0; j < i; j++) {
822 vn_DestroyCommandPool(dev_handle, fb_cmd_pools[j].pool_handle,
823 alloc);
824 simple_mtx_destroy(&fb_cmd_pools[j].mutex);
825 }
826
827 vk_free(alloc, fb_cmd_pools);
828 return result;
829 }
830
831 simple_mtx_init(&fb_cmd_pools[i].mutex, mtx_plain);
832 list_inithead(&fb_cmd_pools[i].free_qfb_cmds);
833 }
834
835 dev->fb_cmd_pools = fb_cmd_pools;
836
837 return VK_SUCCESS;
838 }
839
840 void
vn_feedback_cmd_pools_fini(struct vn_device * dev)841 vn_feedback_cmd_pools_fini(struct vn_device *dev)
842 {
843 const VkAllocationCallbacks *alloc = &dev->base.base.alloc;
844 VkDevice dev_handle = vn_device_to_handle(dev);
845
846 if (!dev->fb_cmd_pools)
847 return;
848
849 for (uint32_t i = 0; i < dev->queue_family_count; i++) {
850 list_for_each_entry_safe(struct vn_query_feedback_cmd, feedback_cmd,
851 &dev->fb_cmd_pools[i].free_qfb_cmds, head)
852 vk_free(alloc, feedback_cmd);
853
854 vn_DestroyCommandPool(dev_handle, dev->fb_cmd_pools[i].pool_handle,
855 alloc);
856 simple_mtx_destroy(&dev->fb_cmd_pools[i].mutex);
857 }
858
859 vk_free(alloc, dev->fb_cmd_pools);
860 }
861