1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on radv driver which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 * SOFTWARE.
26 */
27
28 /**
29 * This file implements VkQueue, VkFence, and VkSemaphore
30 */
31
32 #include <assert.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <unistd.h>
37 #include <vulkan/vulkan.h>
38
39 #include "pvr_job_compute.h"
40 #include "pvr_job_context.h"
41 #include "pvr_job_render.h"
42 #include "pvr_job_transfer.h"
43 #include "pvr_limits.h"
44 #include "pvr_private.h"
45 #include "util/macros.h"
46 #include "util/u_atomic.h"
47 #include "vk_alloc.h"
48 #include "vk_fence.h"
49 #include "vk_log.h"
50 #include "vk_object.h"
51 #include "vk_queue.h"
52 #include "vk_semaphore.h"
53 #include "vk_sync.h"
54 #include "vk_sync_dummy.h"
55 #include "vk_util.h"
56
57 static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
58 struct vk_queue_submit *submit);
59
pvr_queue_init(struct pvr_device * device,struct pvr_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)60 static VkResult pvr_queue_init(struct pvr_device *device,
61 struct pvr_queue *queue,
62 const VkDeviceQueueCreateInfo *pCreateInfo,
63 uint32_t index_in_family)
64 {
65 struct pvr_transfer_ctx *transfer_ctx;
66 struct pvr_compute_ctx *compute_ctx;
67 struct pvr_compute_ctx *query_ctx;
68 struct pvr_render_ctx *gfx_ctx;
69 VkResult result;
70
71 *queue = (struct pvr_queue){ 0 };
72
73 result =
74 vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
75 if (result != VK_SUCCESS)
76 return result;
77
78 if (device->ws->features.supports_threaded_submit) {
79 result = vk_queue_enable_submit_thread(&queue->vk);
80 if (result != VK_SUCCESS)
81 goto err_vk_queue_finish;
82 }
83
84 result = pvr_transfer_ctx_create(device,
85 PVR_WINSYS_CTX_PRIORITY_MEDIUM,
86 &transfer_ctx);
87 if (result != VK_SUCCESS)
88 goto err_vk_queue_finish;
89
90 result = pvr_compute_ctx_create(device,
91 PVR_WINSYS_CTX_PRIORITY_MEDIUM,
92 &compute_ctx);
93 if (result != VK_SUCCESS)
94 goto err_transfer_ctx_destroy;
95
96 result = pvr_compute_ctx_create(device,
97 PVR_WINSYS_CTX_PRIORITY_MEDIUM,
98 &query_ctx);
99 if (result != VK_SUCCESS)
100 goto err_compute_ctx_destroy;
101
102 result =
103 pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
104 if (result != VK_SUCCESS)
105 goto err_query_ctx_destroy;
106
107 queue->device = device;
108 queue->gfx_ctx = gfx_ctx;
109 queue->compute_ctx = compute_ctx;
110 queue->query_ctx = query_ctx;
111 queue->transfer_ctx = transfer_ctx;
112
113 queue->vk.driver_submit = pvr_driver_queue_submit;
114
115 return VK_SUCCESS;
116
117 err_query_ctx_destroy:
118 pvr_compute_ctx_destroy(query_ctx);
119
120 err_compute_ctx_destroy:
121 pvr_compute_ctx_destroy(compute_ctx);
122
123 err_transfer_ctx_destroy:
124 pvr_transfer_ctx_destroy(transfer_ctx);
125
126 err_vk_queue_finish:
127 vk_queue_finish(&queue->vk);
128
129 return result;
130 }
131
pvr_queues_create(struct pvr_device * device,const VkDeviceCreateInfo * pCreateInfo)132 VkResult pvr_queues_create(struct pvr_device *device,
133 const VkDeviceCreateInfo *pCreateInfo)
134 {
135 VkResult result;
136
137 /* Check requested queue families and queues */
138 assert(pCreateInfo->queueCreateInfoCount == 1);
139 assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
140 assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
141
142 const VkDeviceQueueCreateInfo *queue_create =
143 &pCreateInfo->pQueueCreateInfos[0];
144
145 device->queues = vk_alloc(&device->vk.alloc,
146 queue_create->queueCount * sizeof(*device->queues),
147 8,
148 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
149 if (!device->queues)
150 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
151
152 device->queue_count = 0;
153
154 for (uint32_t i = 0; i < queue_create->queueCount; i++) {
155 result = pvr_queue_init(device, &device->queues[i], queue_create, i);
156 if (result != VK_SUCCESS)
157 goto err_queues_finish;
158
159 device->queue_count++;
160 }
161
162 return VK_SUCCESS;
163
164 err_queues_finish:
165 pvr_queues_destroy(device);
166 return result;
167 }
168
pvr_queue_finish(struct pvr_queue * queue)169 static void pvr_queue_finish(struct pvr_queue *queue)
170 {
171 for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
172 if (queue->next_job_wait_sync[i])
173 vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
174 }
175
176 for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
177 if (queue->last_job_signal_sync[i])
178 vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
179 }
180
181 pvr_render_ctx_destroy(queue->gfx_ctx);
182 pvr_compute_ctx_destroy(queue->query_ctx);
183 pvr_compute_ctx_destroy(queue->compute_ctx);
184 pvr_transfer_ctx_destroy(queue->transfer_ctx);
185
186 vk_queue_finish(&queue->vk);
187 }
188
pvr_queues_destroy(struct pvr_device * device)189 void pvr_queues_destroy(struct pvr_device *device)
190 {
191 for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
192 pvr_queue_finish(&device->queues[q_idx]);
193
194 vk_free(&device->vk.alloc, device->queues);
195 }
196
pvr_update_job_syncs(struct pvr_device * device,struct pvr_queue * queue,struct vk_sync * new_signal_sync,enum pvr_job_type submitted_job_type)197 static void pvr_update_job_syncs(struct pvr_device *device,
198 struct pvr_queue *queue,
199 struct vk_sync *new_signal_sync,
200 enum pvr_job_type submitted_job_type)
201 {
202 if (queue->next_job_wait_sync[submitted_job_type]) {
203 vk_sync_destroy(&device->vk,
204 queue->next_job_wait_sync[submitted_job_type]);
205 queue->next_job_wait_sync[submitted_job_type] = NULL;
206 }
207
208 if (queue->last_job_signal_sync[submitted_job_type]) {
209 vk_sync_destroy(&device->vk,
210 queue->last_job_signal_sync[submitted_job_type]);
211 }
212
213 queue->last_job_signal_sync[submitted_job_type] = new_signal_sync;
214 }
215
pvr_process_graphics_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer,struct pvr_sub_cmd_gfx * sub_cmd)216 static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
217 struct pvr_queue *queue,
218 struct pvr_cmd_buffer *cmd_buffer,
219 struct pvr_sub_cmd_gfx *sub_cmd)
220 {
221 pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
222 struct vk_sync *geom_signal_sync;
223 struct vk_sync *frag_signal_sync = NULL;
224 VkResult result;
225
226 result = vk_sync_create(&device->vk,
227 &device->pdevice->ws->syncobj_type,
228 0U,
229 0UL,
230 &geom_signal_sync);
231 if (result != VK_SUCCESS)
232 return result;
233
234 if (sub_cmd->job.run_frag) {
235 result = vk_sync_create(&device->vk,
236 &device->pdevice->ws->syncobj_type,
237 0U,
238 0UL,
239 &frag_signal_sync);
240 if (result != VK_SUCCESS)
241 goto err_destroy_geom_sync;
242 }
243
244 /* FIXME: DoShadowLoadOrStore() */
245
246 /* Perform two render submits when using multiple framebuffer layers. The
247 * first submit contains just geometry, while the second only terminates
248 * (and triggers the fragment render if originally specified). This is needed
249 * because the render target cache gets cleared on terminating submits, which
250 * could result in missing primitives.
251 */
252 if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
253 /* If fragment work shouldn't be run there's no need for a split,
254 * and if geometry_terminate is false this kick can't have a fragment
255 * stage without another terminating geometry kick.
256 */
257 assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
258
259 /* First submit must not touch fragment work. */
260 sub_cmd->job.geometry_terminate = false;
261 sub_cmd->job.run_frag = false;
262
263 result =
264 pvr_render_job_submit(queue->gfx_ctx,
265 &sub_cmd->job,
266 queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
267 NULL,
268 NULL,
269 NULL);
270
271 sub_cmd->job.geometry_terminate = true;
272 sub_cmd->job.run_frag = true;
273
274 if (result != VK_SUCCESS)
275 goto err_destroy_frag_sync;
276
277 original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
278
279 /* Second submit contains only a trivial control stream to terminate the
280 * geometry work.
281 */
282 assert(sub_cmd->terminate_ctrl_stream);
283 sub_cmd->job.ctrl_stream_addr =
284 sub_cmd->terminate_ctrl_stream->vma->dev_addr;
285 }
286
287 result = pvr_render_job_submit(queue->gfx_ctx,
288 &sub_cmd->job,
289 queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
290 queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
291 geom_signal_sync,
292 frag_signal_sync);
293
294 if (original_ctrl_stream_addr.addr > 0)
295 sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
296
297 if (result != VK_SUCCESS)
298 goto err_destroy_frag_sync;
299
300 pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM);
301
302 if (sub_cmd->job.run_frag)
303 pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG);
304
305 /* FIXME: DoShadowLoadOrStore() */
306
307 return VK_SUCCESS;
308
309 err_destroy_frag_sync:
310 if (frag_signal_sync)
311 vk_sync_destroy(&device->vk, frag_signal_sync);
312 err_destroy_geom_sync:
313 vk_sync_destroy(&device->vk, geom_signal_sync);
314
315 return result;
316 }
317
pvr_process_compute_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_compute * sub_cmd)318 static VkResult pvr_process_compute_cmd(struct pvr_device *device,
319 struct pvr_queue *queue,
320 struct pvr_sub_cmd_compute *sub_cmd)
321 {
322 struct vk_sync *sync;
323 VkResult result;
324
325 result = vk_sync_create(&device->vk,
326 &device->pdevice->ws->syncobj_type,
327 0U,
328 0UL,
329 &sync);
330 if (result != VK_SUCCESS)
331 return result;
332
333 result =
334 pvr_compute_job_submit(queue->compute_ctx,
335 sub_cmd,
336 queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
337 sync);
338 if (result != VK_SUCCESS) {
339 vk_sync_destroy(&device->vk, sync);
340 return result;
341 }
342
343 pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_COMPUTE);
344
345 return result;
346 }
347
pvr_process_transfer_cmds(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_transfer * sub_cmd)348 static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
349 struct pvr_queue *queue,
350 struct pvr_sub_cmd_transfer *sub_cmd)
351 {
352 struct vk_sync *sync;
353 VkResult result;
354
355 result = vk_sync_create(&device->vk,
356 &device->pdevice->ws->syncobj_type,
357 0U,
358 0UL,
359 &sync);
360 if (result != VK_SUCCESS)
361 return result;
362
363 result =
364 pvr_transfer_job_submit(queue->transfer_ctx,
365 sub_cmd,
366 queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
367 sync);
368 if (result != VK_SUCCESS) {
369 vk_sync_destroy(&device->vk, sync);
370 return result;
371 }
372
373 pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_TRANSFER);
374
375 return result;
376 }
377
378 static VkResult
pvr_process_occlusion_query_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_compute * sub_cmd)379 pvr_process_occlusion_query_cmd(struct pvr_device *device,
380 struct pvr_queue *queue,
381 struct pvr_sub_cmd_compute *sub_cmd)
382 {
383 struct vk_sync *sync;
384 VkResult result;
385
386 /* TODO: Currently we add barrier event sub commands to handle the sync
387 * necessary for the different occlusion query types. Would we get any speed
388 * up in processing the queue by doing that sync here without using event sub
389 * commands?
390 */
391
392 result = vk_sync_create(&device->vk,
393 &device->pdevice->ws->syncobj_type,
394 0U,
395 0UL,
396 &sync);
397 if (result != VK_SUCCESS)
398 return result;
399
400 result = pvr_compute_job_submit(
401 queue->query_ctx,
402 sub_cmd,
403 queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
404 sync);
405 if (result != VK_SUCCESS) {
406 vk_sync_destroy(&device->vk, sync);
407 return result;
408 }
409
410 pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY);
411
412 return result;
413 }
414
415 static VkResult
pvr_process_event_cmd_barrier(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_barrier * sub_cmd)416 pvr_process_event_cmd_barrier(struct pvr_device *device,
417 struct pvr_queue *queue,
418 struct pvr_sub_cmd_event_barrier *sub_cmd)
419 {
420 const uint32_t src_mask = sub_cmd->wait_for_stage_mask;
421 const uint32_t dst_mask = sub_cmd->wait_at_stage_mask;
422 struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
423 uint32_t src_wait_count = 0;
424 VkResult result;
425
426 assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
427 PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
428 assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
429 PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
430
431 u_foreach_bit (stage, src_mask) {
432 if (queue->last_job_signal_sync[stage]) {
433 wait_syncs[src_wait_count++] = (struct vk_sync_wait){
434 .sync = queue->last_job_signal_sync[stage],
435 .stage_mask = ~(VkPipelineStageFlags2)0,
436 .wait_value = 0,
437 };
438 }
439 }
440
441 /* No previous src jobs that need finishing so no need for a barrier. */
442 if (src_wait_count == 0)
443 return VK_SUCCESS;
444
445 u_foreach_bit (stage, dst_mask) {
446 uint32_t wait_count = src_wait_count;
447 struct vk_sync_signal signal;
448 struct vk_sync *signal_sync;
449
450 result = vk_sync_create(&device->vk,
451 &device->pdevice->ws->syncobj_type,
452 0U,
453 0UL,
454 &signal_sync);
455 if (result != VK_SUCCESS)
456 return result;
457
458 signal = (struct vk_sync_signal){
459 .sync = signal_sync,
460 .stage_mask = ~(VkPipelineStageFlags2)0,
461 .signal_value = 0,
462 };
463
464 if (queue->next_job_wait_sync[stage]) {
465 wait_syncs[wait_count++] = (struct vk_sync_wait){
466 .sync = queue->next_job_wait_sync[stage],
467 .stage_mask = ~(VkPipelineStageFlags2)0,
468 .wait_value = 0,
469 };
470 }
471
472 result = device->ws->ops->null_job_submit(device->ws,
473 wait_syncs,
474 wait_count,
475 &signal);
476 if (result != VK_SUCCESS) {
477 vk_sync_destroy(&device->vk, signal_sync);
478 return result;
479 }
480
481 if (queue->next_job_wait_sync[stage])
482 vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
483
484 queue->next_job_wait_sync[stage] = signal_sync;
485 }
486
487 return VK_SUCCESS;
488 }
489
490 static VkResult
pvr_process_event_cmd_set_or_reset(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd,const enum pvr_event_state new_event_state)491 pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
492 struct pvr_queue *queue,
493 struct pvr_sub_cmd_event_set_reset *sub_cmd,
494 const enum pvr_event_state new_event_state)
495 {
496 /* Not PVR_JOB_TYPE_MAX since that also includes
497 * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
498 */
499 struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
500 struct vk_sync_signal signal;
501 struct vk_sync *signal_sync;
502
503 uint32_t wait_count = 0;
504 VkResult result;
505
506 assert(!(sub_cmd->wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
507
508 u_foreach_bit (stage, sub_cmd->wait_for_stage_mask) {
509 if (!queue->last_job_signal_sync[stage])
510 continue;
511
512 waits[wait_count++] = (struct vk_sync_wait){
513 .sync = queue->last_job_signal_sync[stage],
514 .stage_mask = ~(VkPipelineStageFlags2)0,
515 .wait_value = 0,
516 };
517 }
518
519 result = vk_sync_create(&device->vk,
520 &device->pdevice->ws->syncobj_type,
521 0U,
522 0UL,
523 &signal_sync);
524 if (result != VK_SUCCESS)
525 return result;
526
527 signal = (struct vk_sync_signal){
528 .sync = signal_sync,
529 .stage_mask = ~(VkPipelineStageFlags2)0,
530 .signal_value = 0,
531 };
532
533 result =
534 device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
535 if (result != VK_SUCCESS) {
536 vk_sync_destroy(&device->vk, signal_sync);
537 return result;
538 }
539
540 if (sub_cmd->event->sync)
541 vk_sync_destroy(&device->vk, sub_cmd->event->sync);
542
543 sub_cmd->event->sync = signal_sync;
544 sub_cmd->event->state = new_event_state;
545
546 return VK_SUCCESS;
547 }
548
549 static inline VkResult
pvr_process_event_cmd_set(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd)550 pvr_process_event_cmd_set(struct pvr_device *device,
551 struct pvr_queue *queue,
552 struct pvr_sub_cmd_event_set_reset *sub_cmd)
553 {
554 return pvr_process_event_cmd_set_or_reset(device,
555 queue,
556 sub_cmd,
557 PVR_EVENT_STATE_SET_BY_DEVICE);
558 }
559
560 static inline VkResult
pvr_process_event_cmd_reset(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd)561 pvr_process_event_cmd_reset(struct pvr_device *device,
562 struct pvr_queue *queue,
563 struct pvr_sub_cmd_event_set_reset *sub_cmd)
564 {
565 return pvr_process_event_cmd_set_or_reset(device,
566 queue,
567 sub_cmd,
568 PVR_EVENT_STATE_RESET_BY_DEVICE);
569 }
570
571 /**
572 * \brief Process an event sub command of wait type.
573 *
574 * This sets up barrier syncobjs to create a dependency from the event syncobjs
575 * onto the next job submissions.
576 *
577 * The barriers are setup by taking into consideration each event's dst stage
578 * mask so this is in line with vkCmdWaitEvents2().
579 *
580 * \param[in] device Device to create the syncobjs on.
581 * \param[in] sub_cmd Sub command to process.
582 * \param[in,out] barriers Current barriers as input. Barriers
583 * for the next jobs as output.
584 * \parma[in,out] per_cmd_buffer_syncobjs Completion syncobjs for the command
585 * buffer being processed.
586 */
587 static VkResult
pvr_process_event_cmd_wait(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_wait * sub_cmd)588 pvr_process_event_cmd_wait(struct pvr_device *device,
589 struct pvr_queue *queue,
590 struct pvr_sub_cmd_event_wait *sub_cmd)
591 {
592 uint32_t dst_mask = 0;
593 VkResult result;
594
595 STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->count + 1);
596 if (!waits)
597 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
598
599 for (uint32_t i = 0; i < sub_cmd->count; i++)
600 dst_mask |= sub_cmd->wait_at_stage_masks[i];
601
602 u_foreach_bit (stage, dst_mask) {
603 struct vk_sync_signal signal;
604 struct vk_sync *signal_sync;
605 uint32_t wait_count = 0;
606
607 for (uint32_t i = 0; i < sub_cmd->count; i++) {
608 if (sub_cmd->wait_at_stage_masks[i] & stage) {
609 waits[wait_count++] = (struct vk_sync_wait){
610 .sync = sub_cmd->events[i]->sync,
611 .stage_mask = ~(VkPipelineStageFlags2)0,
612 .wait_value = 0,
613 };
614 }
615 }
616
617 if (!wait_count)
618 continue;
619
620 if (queue->next_job_wait_sync[stage]) {
621 waits[wait_count++] = (struct vk_sync_wait){
622 .sync = queue->next_job_wait_sync[stage],
623 .stage_mask = ~(VkPipelineStageFlags2)0,
624 .wait_value = 0,
625 };
626 }
627
628 assert(wait_count <= (sub_cmd->count + 1));
629
630 result = vk_sync_create(&device->vk,
631 &device->pdevice->ws->syncobj_type,
632 0U,
633 0UL,
634 &signal_sync);
635 if (result != VK_SUCCESS)
636 goto err_free_waits;
637
638 signal = (struct vk_sync_signal){
639 .sync = signal_sync,
640 .stage_mask = ~(VkPipelineStageFlags2)0,
641 .signal_value = 0,
642 };
643
644 result = device->ws->ops->null_job_submit(device->ws,
645 waits,
646 wait_count,
647 &signal);
648 if (result != VK_SUCCESS) {
649 vk_sync_destroy(&device->vk, signal.sync);
650 goto err_free_waits;
651 }
652
653 if (queue->next_job_wait_sync[stage])
654 vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
655
656 queue->next_job_wait_sync[stage] = signal.sync;
657 }
658
659 STACK_ARRAY_FINISH(waits);
660
661 return VK_SUCCESS;
662
663 err_free_waits:
664 STACK_ARRAY_FINISH(waits);
665
666 return result;
667 }
668
pvr_process_event_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event * sub_cmd)669 static VkResult pvr_process_event_cmd(struct pvr_device *device,
670 struct pvr_queue *queue,
671 struct pvr_sub_cmd_event *sub_cmd)
672 {
673 switch (sub_cmd->type) {
674 case PVR_EVENT_TYPE_SET:
675 return pvr_process_event_cmd_set(device, queue, &sub_cmd->set_reset);
676 case PVR_EVENT_TYPE_RESET:
677 return pvr_process_event_cmd_reset(device, queue, &sub_cmd->set_reset);
678 case PVR_EVENT_TYPE_WAIT:
679 return pvr_process_event_cmd_wait(device, queue, &sub_cmd->wait);
680 case PVR_EVENT_TYPE_BARRIER:
681 return pvr_process_event_cmd_barrier(device, queue, &sub_cmd->barrier);
682 default:
683 unreachable("Invalid event sub-command type.");
684 };
685 }
686
pvr_process_cmd_buffer(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer)687 static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
688 struct pvr_queue *queue,
689 struct pvr_cmd_buffer *cmd_buffer)
690 {
691 VkResult result;
692
693 list_for_each_entry_safe (struct pvr_sub_cmd,
694 sub_cmd,
695 &cmd_buffer->sub_cmds,
696 link) {
697 switch (sub_cmd->type) {
698 case PVR_SUB_CMD_TYPE_GRAPHICS: {
699 /* If the fragment job utilizes occlusion queries, for data integrity
700 * it needs to wait for the occlusion query to be processed.
701 */
702 if (sub_cmd->gfx.has_occlusion_query) {
703 struct pvr_sub_cmd_event_barrier barrier = {
704 .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
705 .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
706 };
707
708 result = pvr_process_event_cmd_barrier(device, queue, &barrier);
709 if (result != VK_SUCCESS)
710 break;
711 }
712
713 if (sub_cmd->gfx.wait_on_previous_transfer) {
714 struct pvr_sub_cmd_event_barrier barrier = {
715 .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
716 .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
717 };
718
719 result = pvr_process_event_cmd_barrier(device, queue, &barrier);
720 if (result != VK_SUCCESS)
721 break;
722 }
723
724 result =
725 pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
726 break;
727 }
728
729 case PVR_SUB_CMD_TYPE_COMPUTE:
730 result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
731 break;
732
733 case PVR_SUB_CMD_TYPE_TRANSFER: {
734 const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
735
736 if (serialize_with_frag) {
737 struct pvr_sub_cmd_event_barrier barrier = {
738 .wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
739 .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
740 };
741
742 result = pvr_process_event_cmd_barrier(device, queue, &barrier);
743 if (result != VK_SUCCESS)
744 break;
745 }
746
747 result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
748
749 if (serialize_with_frag) {
750 struct pvr_sub_cmd_event_barrier barrier = {
751 .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
752 .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
753 };
754
755 if (result != VK_SUCCESS)
756 break;
757
758 result = pvr_process_event_cmd_barrier(device, queue, &barrier);
759 }
760
761 break;
762 }
763
764 case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
765 result =
766 pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
767 break;
768
769 case PVR_SUB_CMD_TYPE_EVENT:
770 result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
771 break;
772
773 default:
774 mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
775 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
776 }
777
778 if (result != VK_SUCCESS)
779 return result;
780
781 p_atomic_inc(&device->global_cmd_buffer_submit_count);
782 }
783
784 return VK_SUCCESS;
785 }
786
pvr_clear_last_submits_syncs(struct pvr_queue * queue)787 static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
788 {
789 struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
790 uint32_t wait_count = 0;
791 VkResult result;
792
793 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
794 if (queue->next_job_wait_sync[i]) {
795 waits[wait_count++] = (struct vk_sync_wait){
796 .sync = queue->next_job_wait_sync[i],
797 .stage_mask = ~(VkPipelineStageFlags2)0,
798 .wait_value = 0,
799 };
800 }
801
802 if (queue->last_job_signal_sync[i]) {
803 waits[wait_count++] = (struct vk_sync_wait){
804 .sync = queue->last_job_signal_sync[i],
805 .stage_mask = ~(VkPipelineStageFlags2)0,
806 .wait_value = 0,
807 };
808 }
809 }
810
811 result = vk_sync_wait_many(&queue->device->vk,
812 wait_count,
813 waits,
814 VK_SYNC_WAIT_COMPLETE,
815 UINT64_MAX);
816
817 if (result != VK_SUCCESS)
818 return vk_error(queue, result);
819
820 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
821 if (queue->next_job_wait_sync[i]) {
822 vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
823 queue->next_job_wait_sync[i] = NULL;
824 }
825
826 if (queue->last_job_signal_sync[i]) {
827 vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
828 queue->last_job_signal_sync[i] = NULL;
829 }
830 }
831
832 return VK_SUCCESS;
833 }
834
pvr_process_queue_signals(struct pvr_queue * queue,struct vk_sync_signal * signals,uint32_t signal_count)835 static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
836 struct vk_sync_signal *signals,
837 uint32_t signal_count)
838 {
839 struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
840 struct pvr_device *device = queue->device;
841 VkResult result;
842
843 for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
844 struct vk_sync_signal *signal = &signals[signal_idx];
845 const enum pvr_pipeline_stage_bits signal_stage_src =
846 pvr_stage_mask_src(signal->stage_mask);
847 uint32_t wait_count = 0;
848
849 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
850 /* Exception for occlusion query jobs since that's something internal,
851 * so the user provided syncs won't ever have it as a source stage.
852 */
853 if (!(signal_stage_src & BITFIELD_BIT(i)) &&
854 i != PVR_JOB_TYPE_OCCLUSION_QUERY)
855 continue;
856
857 if (!queue->last_job_signal_sync[i])
858 continue;
859
860 signal_waits[wait_count++] = (struct vk_sync_wait){
861 .sync = queue->last_job_signal_sync[i],
862 .stage_mask = ~(VkPipelineStageFlags2)0,
863 .wait_value = 0,
864 };
865 }
866
867 result = device->ws->ops->null_job_submit(device->ws,
868 signal_waits,
869 wait_count,
870 signal);
871 if (result != VK_SUCCESS)
872 return result;
873 }
874
875 return VK_SUCCESS;
876 }
877
pvr_process_queue_waits(struct pvr_queue * queue,struct vk_sync_wait * waits,uint32_t wait_count)878 static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
879 struct vk_sync_wait *waits,
880 uint32_t wait_count)
881 {
882 struct pvr_device *device = queue->device;
883 VkResult result;
884
885 STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
886 if (!stage_waits)
887 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
888
889 for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
890 struct vk_sync_signal next_job_wait_signal_sync;
891 uint32_t stage_wait_count = 0;
892
893 for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
894 if (!(pvr_stage_mask(waits[wait_idx].stage_mask) & BITFIELD_BIT(i)))
895 continue;
896
897 stage_waits[stage_wait_count++] = (struct vk_sync_wait){
898 .sync = waits[wait_idx].sync,
899 .stage_mask = ~(VkPipelineStageFlags2)0,
900 .wait_value = waits[wait_idx].wait_value,
901 };
902 }
903
904 result = vk_sync_create(&device->vk,
905 &device->pdevice->ws->syncobj_type,
906 0U,
907 0UL,
908 &queue->next_job_wait_sync[i]);
909 if (result != VK_SUCCESS)
910 goto err_free_waits;
911
912 next_job_wait_signal_sync = (struct vk_sync_signal){
913 .sync = queue->next_job_wait_sync[i],
914 .stage_mask = ~(VkPipelineStageFlags2)0,
915 .signal_value = 0,
916 };
917
918 result = device->ws->ops->null_job_submit(device->ws,
919 stage_waits,
920 stage_wait_count,
921 &next_job_wait_signal_sync);
922 if (result != VK_SUCCESS)
923 goto err_free_waits;
924 }
925
926 STACK_ARRAY_FINISH(stage_waits);
927
928 return VK_SUCCESS;
929
930 err_free_waits:
931 STACK_ARRAY_FINISH(stage_waits);
932
933 return result;
934 }
935
pvr_driver_queue_submit(struct vk_queue * queue,struct vk_queue_submit * submit)936 static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
937 struct vk_queue_submit *submit)
938 {
939 struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
940 struct pvr_device *device = driver_queue->device;
941 VkResult result;
942
943 result = pvr_clear_last_submits_syncs(driver_queue);
944 if (result != VK_SUCCESS)
945 return result;
946
947 result =
948 pvr_process_queue_waits(driver_queue, submit->waits, submit->wait_count);
949 if (result != VK_SUCCESS)
950 return result;
951
952 for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
953 result = pvr_process_cmd_buffer(
954 device,
955 driver_queue,
956 container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
957 if (result != VK_SUCCESS)
958 return result;
959 }
960
961 result = pvr_process_queue_signals(driver_queue,
962 submit->signals,
963 submit->signal_count);
964 if (result != VK_SUCCESS)
965 return result;
966
967 return VK_SUCCESS;
968 }
969