xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_queue.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on radv driver which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25  * SOFTWARE.
26  */
27 
28 /**
29  * This file implements VkQueue, VkFence, and VkSemaphore
30  */
31 
32 #include <assert.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <unistd.h>
37 #include <vulkan/vulkan.h>
38 
39 #include "pvr_job_compute.h"
40 #include "pvr_job_context.h"
41 #include "pvr_job_render.h"
42 #include "pvr_job_transfer.h"
43 #include "pvr_limits.h"
44 #include "pvr_private.h"
45 #include "util/macros.h"
46 #include "util/u_atomic.h"
47 #include "vk_alloc.h"
48 #include "vk_fence.h"
49 #include "vk_log.h"
50 #include "vk_object.h"
51 #include "vk_queue.h"
52 #include "vk_semaphore.h"
53 #include "vk_sync.h"
54 #include "vk_sync_dummy.h"
55 #include "vk_util.h"
56 
57 static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
58                                         struct vk_queue_submit *submit);
59 
pvr_queue_init(struct pvr_device * device,struct pvr_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)60 static VkResult pvr_queue_init(struct pvr_device *device,
61                                struct pvr_queue *queue,
62                                const VkDeviceQueueCreateInfo *pCreateInfo,
63                                uint32_t index_in_family)
64 {
65    struct pvr_transfer_ctx *transfer_ctx;
66    struct pvr_compute_ctx *compute_ctx;
67    struct pvr_compute_ctx *query_ctx;
68    struct pvr_render_ctx *gfx_ctx;
69    VkResult result;
70 
71    *queue = (struct pvr_queue){ 0 };
72 
73    result =
74       vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
75    if (result != VK_SUCCESS)
76       return result;
77 
78    if (device->ws->features.supports_threaded_submit) {
79       result = vk_queue_enable_submit_thread(&queue->vk);
80       if (result != VK_SUCCESS)
81          goto err_vk_queue_finish;
82    }
83 
84    result = pvr_transfer_ctx_create(device,
85                                     PVR_WINSYS_CTX_PRIORITY_MEDIUM,
86                                     &transfer_ctx);
87    if (result != VK_SUCCESS)
88       goto err_vk_queue_finish;
89 
90    result = pvr_compute_ctx_create(device,
91                                    PVR_WINSYS_CTX_PRIORITY_MEDIUM,
92                                    &compute_ctx);
93    if (result != VK_SUCCESS)
94       goto err_transfer_ctx_destroy;
95 
96    result = pvr_compute_ctx_create(device,
97                                    PVR_WINSYS_CTX_PRIORITY_MEDIUM,
98                                    &query_ctx);
99    if (result != VK_SUCCESS)
100       goto err_compute_ctx_destroy;
101 
102    result =
103       pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
104    if (result != VK_SUCCESS)
105       goto err_query_ctx_destroy;
106 
107    queue->device = device;
108    queue->gfx_ctx = gfx_ctx;
109    queue->compute_ctx = compute_ctx;
110    queue->query_ctx = query_ctx;
111    queue->transfer_ctx = transfer_ctx;
112 
113    queue->vk.driver_submit = pvr_driver_queue_submit;
114 
115    return VK_SUCCESS;
116 
117 err_query_ctx_destroy:
118    pvr_compute_ctx_destroy(query_ctx);
119 
120 err_compute_ctx_destroy:
121    pvr_compute_ctx_destroy(compute_ctx);
122 
123 err_transfer_ctx_destroy:
124    pvr_transfer_ctx_destroy(transfer_ctx);
125 
126 err_vk_queue_finish:
127    vk_queue_finish(&queue->vk);
128 
129    return result;
130 }
131 
pvr_queues_create(struct pvr_device * device,const VkDeviceCreateInfo * pCreateInfo)132 VkResult pvr_queues_create(struct pvr_device *device,
133                            const VkDeviceCreateInfo *pCreateInfo)
134 {
135    VkResult result;
136 
137    /* Check requested queue families and queues */
138    assert(pCreateInfo->queueCreateInfoCount == 1);
139    assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
140    assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
141 
142    const VkDeviceQueueCreateInfo *queue_create =
143       &pCreateInfo->pQueueCreateInfos[0];
144 
145    device->queues = vk_alloc(&device->vk.alloc,
146                              queue_create->queueCount * sizeof(*device->queues),
147                              8,
148                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
149    if (!device->queues)
150       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
151 
152    device->queue_count = 0;
153 
154    for (uint32_t i = 0; i < queue_create->queueCount; i++) {
155       result = pvr_queue_init(device, &device->queues[i], queue_create, i);
156       if (result != VK_SUCCESS)
157          goto err_queues_finish;
158 
159       device->queue_count++;
160    }
161 
162    return VK_SUCCESS;
163 
164 err_queues_finish:
165    pvr_queues_destroy(device);
166    return result;
167 }
168 
pvr_queue_finish(struct pvr_queue * queue)169 static void pvr_queue_finish(struct pvr_queue *queue)
170 {
171    for (uint32_t i = 0; i < ARRAY_SIZE(queue->next_job_wait_sync); i++) {
172       if (queue->next_job_wait_sync[i])
173          vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
174    }
175 
176    for (uint32_t i = 0; i < ARRAY_SIZE(queue->last_job_signal_sync); i++) {
177       if (queue->last_job_signal_sync[i])
178          vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
179    }
180 
181    pvr_render_ctx_destroy(queue->gfx_ctx);
182    pvr_compute_ctx_destroy(queue->query_ctx);
183    pvr_compute_ctx_destroy(queue->compute_ctx);
184    pvr_transfer_ctx_destroy(queue->transfer_ctx);
185 
186    vk_queue_finish(&queue->vk);
187 }
188 
pvr_queues_destroy(struct pvr_device * device)189 void pvr_queues_destroy(struct pvr_device *device)
190 {
191    for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
192       pvr_queue_finish(&device->queues[q_idx]);
193 
194    vk_free(&device->vk.alloc, device->queues);
195 }
196 
pvr_update_job_syncs(struct pvr_device * device,struct pvr_queue * queue,struct vk_sync * new_signal_sync,enum pvr_job_type submitted_job_type)197 static void pvr_update_job_syncs(struct pvr_device *device,
198                                  struct pvr_queue *queue,
199                                  struct vk_sync *new_signal_sync,
200                                  enum pvr_job_type submitted_job_type)
201 {
202    if (queue->next_job_wait_sync[submitted_job_type]) {
203       vk_sync_destroy(&device->vk,
204                       queue->next_job_wait_sync[submitted_job_type]);
205       queue->next_job_wait_sync[submitted_job_type] = NULL;
206    }
207 
208    if (queue->last_job_signal_sync[submitted_job_type]) {
209       vk_sync_destroy(&device->vk,
210                       queue->last_job_signal_sync[submitted_job_type]);
211    }
212 
213    queue->last_job_signal_sync[submitted_job_type] = new_signal_sync;
214 }
215 
pvr_process_graphics_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer,struct pvr_sub_cmd_gfx * sub_cmd)216 static VkResult pvr_process_graphics_cmd(struct pvr_device *device,
217                                          struct pvr_queue *queue,
218                                          struct pvr_cmd_buffer *cmd_buffer,
219                                          struct pvr_sub_cmd_gfx *sub_cmd)
220 {
221    pvr_dev_addr_t original_ctrl_stream_addr = { 0 };
222    struct vk_sync *geom_signal_sync;
223    struct vk_sync *frag_signal_sync = NULL;
224    VkResult result;
225 
226    result = vk_sync_create(&device->vk,
227                            &device->pdevice->ws->syncobj_type,
228                            0U,
229                            0UL,
230                            &geom_signal_sync);
231    if (result != VK_SUCCESS)
232       return result;
233 
234    if (sub_cmd->job.run_frag) {
235       result = vk_sync_create(&device->vk,
236                               &device->pdevice->ws->syncobj_type,
237                               0U,
238                               0UL,
239                               &frag_signal_sync);
240       if (result != VK_SUCCESS)
241          goto err_destroy_geom_sync;
242    }
243 
244    /* FIXME: DoShadowLoadOrStore() */
245 
246    /* Perform two render submits when using multiple framebuffer layers. The
247     * first submit contains just geometry, while the second only terminates
248     * (and triggers the fragment render if originally specified). This is needed
249     * because the render target cache gets cleared on terminating submits, which
250     * could result in missing primitives.
251     */
252    if (pvr_sub_cmd_gfx_requires_split_submit(sub_cmd)) {
253       /* If fragment work shouldn't be run there's no need for a split,
254        * and if geometry_terminate is false this kick can't have a fragment
255        * stage without another terminating geometry kick.
256        */
257       assert(sub_cmd->job.geometry_terminate && sub_cmd->job.run_frag);
258 
259       /* First submit must not touch fragment work. */
260       sub_cmd->job.geometry_terminate = false;
261       sub_cmd->job.run_frag = false;
262 
263       result =
264          pvr_render_job_submit(queue->gfx_ctx,
265                                &sub_cmd->job,
266                                queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
267                                NULL,
268                                NULL,
269                                NULL);
270 
271       sub_cmd->job.geometry_terminate = true;
272       sub_cmd->job.run_frag = true;
273 
274       if (result != VK_SUCCESS)
275          goto err_destroy_frag_sync;
276 
277       original_ctrl_stream_addr = sub_cmd->job.ctrl_stream_addr;
278 
279       /* Second submit contains only a trivial control stream to terminate the
280        * geometry work.
281        */
282       assert(sub_cmd->terminate_ctrl_stream);
283       sub_cmd->job.ctrl_stream_addr =
284          sub_cmd->terminate_ctrl_stream->vma->dev_addr;
285    }
286 
287    result = pvr_render_job_submit(queue->gfx_ctx,
288                                   &sub_cmd->job,
289                                   queue->next_job_wait_sync[PVR_JOB_TYPE_GEOM],
290                                   queue->next_job_wait_sync[PVR_JOB_TYPE_FRAG],
291                                   geom_signal_sync,
292                                   frag_signal_sync);
293 
294    if (original_ctrl_stream_addr.addr > 0)
295       sub_cmd->job.ctrl_stream_addr = original_ctrl_stream_addr;
296 
297    if (result != VK_SUCCESS)
298       goto err_destroy_frag_sync;
299 
300    pvr_update_job_syncs(device, queue, geom_signal_sync, PVR_JOB_TYPE_GEOM);
301 
302    if (sub_cmd->job.run_frag)
303       pvr_update_job_syncs(device, queue, frag_signal_sync, PVR_JOB_TYPE_FRAG);
304 
305    /* FIXME: DoShadowLoadOrStore() */
306 
307    return VK_SUCCESS;
308 
309 err_destroy_frag_sync:
310    if (frag_signal_sync)
311       vk_sync_destroy(&device->vk, frag_signal_sync);
312 err_destroy_geom_sync:
313    vk_sync_destroy(&device->vk, geom_signal_sync);
314 
315    return result;
316 }
317 
pvr_process_compute_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_compute * sub_cmd)318 static VkResult pvr_process_compute_cmd(struct pvr_device *device,
319                                         struct pvr_queue *queue,
320                                         struct pvr_sub_cmd_compute *sub_cmd)
321 {
322    struct vk_sync *sync;
323    VkResult result;
324 
325    result = vk_sync_create(&device->vk,
326                            &device->pdevice->ws->syncobj_type,
327                            0U,
328                            0UL,
329                            &sync);
330    if (result != VK_SUCCESS)
331       return result;
332 
333    result =
334       pvr_compute_job_submit(queue->compute_ctx,
335                              sub_cmd,
336                              queue->next_job_wait_sync[PVR_JOB_TYPE_COMPUTE],
337                              sync);
338    if (result != VK_SUCCESS) {
339       vk_sync_destroy(&device->vk, sync);
340       return result;
341    }
342 
343    pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_COMPUTE);
344 
345    return result;
346 }
347 
pvr_process_transfer_cmds(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_transfer * sub_cmd)348 static VkResult pvr_process_transfer_cmds(struct pvr_device *device,
349                                           struct pvr_queue *queue,
350                                           struct pvr_sub_cmd_transfer *sub_cmd)
351 {
352    struct vk_sync *sync;
353    VkResult result;
354 
355    result = vk_sync_create(&device->vk,
356                            &device->pdevice->ws->syncobj_type,
357                            0U,
358                            0UL,
359                            &sync);
360    if (result != VK_SUCCESS)
361       return result;
362 
363    result =
364       pvr_transfer_job_submit(queue->transfer_ctx,
365                               sub_cmd,
366                               queue->next_job_wait_sync[PVR_JOB_TYPE_TRANSFER],
367                               sync);
368    if (result != VK_SUCCESS) {
369       vk_sync_destroy(&device->vk, sync);
370       return result;
371    }
372 
373    pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_TRANSFER);
374 
375    return result;
376 }
377 
378 static VkResult
pvr_process_occlusion_query_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_compute * sub_cmd)379 pvr_process_occlusion_query_cmd(struct pvr_device *device,
380                                 struct pvr_queue *queue,
381                                 struct pvr_sub_cmd_compute *sub_cmd)
382 {
383    struct vk_sync *sync;
384    VkResult result;
385 
386    /* TODO: Currently we add barrier event sub commands to handle the sync
387     * necessary for the different occlusion query types. Would we get any speed
388     * up in processing the queue by doing that sync here without using event sub
389     * commands?
390     */
391 
392    result = vk_sync_create(&device->vk,
393                            &device->pdevice->ws->syncobj_type,
394                            0U,
395                            0UL,
396                            &sync);
397    if (result != VK_SUCCESS)
398       return result;
399 
400    result = pvr_compute_job_submit(
401       queue->query_ctx,
402       sub_cmd,
403       queue->next_job_wait_sync[PVR_JOB_TYPE_OCCLUSION_QUERY],
404       sync);
405    if (result != VK_SUCCESS) {
406       vk_sync_destroy(&device->vk, sync);
407       return result;
408    }
409 
410    pvr_update_job_syncs(device, queue, sync, PVR_JOB_TYPE_OCCLUSION_QUERY);
411 
412    return result;
413 }
414 
415 static VkResult
pvr_process_event_cmd_barrier(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_barrier * sub_cmd)416 pvr_process_event_cmd_barrier(struct pvr_device *device,
417                               struct pvr_queue *queue,
418                               struct pvr_sub_cmd_event_barrier *sub_cmd)
419 {
420    const uint32_t src_mask = sub_cmd->wait_for_stage_mask;
421    const uint32_t dst_mask = sub_cmd->wait_at_stage_mask;
422    struct vk_sync_wait wait_syncs[PVR_JOB_TYPE_MAX + 1];
423    uint32_t src_wait_count = 0;
424    VkResult result;
425 
426    assert(!(src_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
427                          PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
428    assert(!(dst_mask & ~(PVR_PIPELINE_STAGE_ALL_BITS |
429                          PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT)));
430 
431    u_foreach_bit (stage, src_mask) {
432       if (queue->last_job_signal_sync[stage]) {
433          wait_syncs[src_wait_count++] = (struct vk_sync_wait){
434             .sync = queue->last_job_signal_sync[stage],
435             .stage_mask = ~(VkPipelineStageFlags2)0,
436             .wait_value = 0,
437          };
438       }
439    }
440 
441    /* No previous src jobs that need finishing so no need for a barrier. */
442    if (src_wait_count == 0)
443       return VK_SUCCESS;
444 
445    u_foreach_bit (stage, dst_mask) {
446       uint32_t wait_count = src_wait_count;
447       struct vk_sync_signal signal;
448       struct vk_sync *signal_sync;
449 
450       result = vk_sync_create(&device->vk,
451                               &device->pdevice->ws->syncobj_type,
452                               0U,
453                               0UL,
454                               &signal_sync);
455       if (result != VK_SUCCESS)
456          return result;
457 
458       signal = (struct vk_sync_signal){
459          .sync = signal_sync,
460          .stage_mask = ~(VkPipelineStageFlags2)0,
461          .signal_value = 0,
462       };
463 
464       if (queue->next_job_wait_sync[stage]) {
465          wait_syncs[wait_count++] = (struct vk_sync_wait){
466             .sync = queue->next_job_wait_sync[stage],
467             .stage_mask = ~(VkPipelineStageFlags2)0,
468             .wait_value = 0,
469          };
470       }
471 
472       result = device->ws->ops->null_job_submit(device->ws,
473                                                 wait_syncs,
474                                                 wait_count,
475                                                 &signal);
476       if (result != VK_SUCCESS) {
477          vk_sync_destroy(&device->vk, signal_sync);
478          return result;
479       }
480 
481       if (queue->next_job_wait_sync[stage])
482          vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
483 
484       queue->next_job_wait_sync[stage] = signal_sync;
485    }
486 
487    return VK_SUCCESS;
488 }
489 
490 static VkResult
pvr_process_event_cmd_set_or_reset(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd,const enum pvr_event_state new_event_state)491 pvr_process_event_cmd_set_or_reset(struct pvr_device *device,
492                                    struct pvr_queue *queue,
493                                    struct pvr_sub_cmd_event_set_reset *sub_cmd,
494                                    const enum pvr_event_state new_event_state)
495 {
496    /* Not PVR_JOB_TYPE_MAX since that also includes
497     * PVR_JOB_TYPE_OCCLUSION_QUERY so no stage in the src mask.
498     */
499    struct vk_sync_wait waits[PVR_NUM_SYNC_PIPELINE_STAGES];
500    struct vk_sync_signal signal;
501    struct vk_sync *signal_sync;
502 
503    uint32_t wait_count = 0;
504    VkResult result;
505 
506    assert(!(sub_cmd->wait_for_stage_mask & ~PVR_PIPELINE_STAGE_ALL_BITS));
507 
508    u_foreach_bit (stage, sub_cmd->wait_for_stage_mask) {
509       if (!queue->last_job_signal_sync[stage])
510          continue;
511 
512       waits[wait_count++] = (struct vk_sync_wait){
513          .sync = queue->last_job_signal_sync[stage],
514          .stage_mask = ~(VkPipelineStageFlags2)0,
515          .wait_value = 0,
516       };
517    }
518 
519    result = vk_sync_create(&device->vk,
520                            &device->pdevice->ws->syncobj_type,
521                            0U,
522                            0UL,
523                            &signal_sync);
524    if (result != VK_SUCCESS)
525       return result;
526 
527    signal = (struct vk_sync_signal){
528       .sync = signal_sync,
529       .stage_mask = ~(VkPipelineStageFlags2)0,
530       .signal_value = 0,
531    };
532 
533    result =
534       device->ws->ops->null_job_submit(device->ws, waits, wait_count, &signal);
535    if (result != VK_SUCCESS) {
536       vk_sync_destroy(&device->vk, signal_sync);
537       return result;
538    }
539 
540    if (sub_cmd->event->sync)
541       vk_sync_destroy(&device->vk, sub_cmd->event->sync);
542 
543    sub_cmd->event->sync = signal_sync;
544    sub_cmd->event->state = new_event_state;
545 
546    return VK_SUCCESS;
547 }
548 
549 static inline VkResult
pvr_process_event_cmd_set(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd)550 pvr_process_event_cmd_set(struct pvr_device *device,
551                           struct pvr_queue *queue,
552                           struct pvr_sub_cmd_event_set_reset *sub_cmd)
553 {
554    return pvr_process_event_cmd_set_or_reset(device,
555                                              queue,
556                                              sub_cmd,
557                                              PVR_EVENT_STATE_SET_BY_DEVICE);
558 }
559 
560 static inline VkResult
pvr_process_event_cmd_reset(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_set_reset * sub_cmd)561 pvr_process_event_cmd_reset(struct pvr_device *device,
562                             struct pvr_queue *queue,
563                             struct pvr_sub_cmd_event_set_reset *sub_cmd)
564 {
565    return pvr_process_event_cmd_set_or_reset(device,
566                                              queue,
567                                              sub_cmd,
568                                              PVR_EVENT_STATE_RESET_BY_DEVICE);
569 }
570 
571 /**
572  * \brief Process an event sub command of wait type.
573  *
574  * This sets up barrier syncobjs to create a dependency from the event syncobjs
575  * onto the next job submissions.
576  *
577  * The barriers are setup by taking into consideration each event's dst stage
578  * mask so this is in line with vkCmdWaitEvents2().
579  *
580  * \param[in] device                       Device to create the syncobjs on.
581  * \param[in] sub_cmd                      Sub command to process.
582  * \param[in,out] barriers                 Current barriers as input. Barriers
583  *                                         for the next jobs as output.
584  * \parma[in,out] per_cmd_buffer_syncobjs  Completion syncobjs for the command
585  *                                         buffer being processed.
586  */
587 static VkResult
pvr_process_event_cmd_wait(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event_wait * sub_cmd)588 pvr_process_event_cmd_wait(struct pvr_device *device,
589                            struct pvr_queue *queue,
590                            struct pvr_sub_cmd_event_wait *sub_cmd)
591 {
592    uint32_t dst_mask = 0;
593    VkResult result;
594 
595    STACK_ARRAY(struct vk_sync_wait, waits, sub_cmd->count + 1);
596    if (!waits)
597       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
598 
599    for (uint32_t i = 0; i < sub_cmd->count; i++)
600       dst_mask |= sub_cmd->wait_at_stage_masks[i];
601 
602    u_foreach_bit (stage, dst_mask) {
603       struct vk_sync_signal signal;
604       struct vk_sync *signal_sync;
605       uint32_t wait_count = 0;
606 
607       for (uint32_t i = 0; i < sub_cmd->count; i++) {
608          if (sub_cmd->wait_at_stage_masks[i] & stage) {
609             waits[wait_count++] = (struct vk_sync_wait){
610                .sync = sub_cmd->events[i]->sync,
611                .stage_mask = ~(VkPipelineStageFlags2)0,
612                .wait_value = 0,
613             };
614          }
615       }
616 
617       if (!wait_count)
618          continue;
619 
620       if (queue->next_job_wait_sync[stage]) {
621          waits[wait_count++] = (struct vk_sync_wait){
622             .sync = queue->next_job_wait_sync[stage],
623             .stage_mask = ~(VkPipelineStageFlags2)0,
624             .wait_value = 0,
625          };
626       }
627 
628       assert(wait_count <= (sub_cmd->count + 1));
629 
630       result = vk_sync_create(&device->vk,
631                               &device->pdevice->ws->syncobj_type,
632                               0U,
633                               0UL,
634                               &signal_sync);
635       if (result != VK_SUCCESS)
636          goto err_free_waits;
637 
638       signal = (struct vk_sync_signal){
639          .sync = signal_sync,
640          .stage_mask = ~(VkPipelineStageFlags2)0,
641          .signal_value = 0,
642       };
643 
644       result = device->ws->ops->null_job_submit(device->ws,
645                                                 waits,
646                                                 wait_count,
647                                                 &signal);
648       if (result != VK_SUCCESS) {
649          vk_sync_destroy(&device->vk, signal.sync);
650          goto err_free_waits;
651       }
652 
653       if (queue->next_job_wait_sync[stage])
654          vk_sync_destroy(&device->vk, queue->next_job_wait_sync[stage]);
655 
656       queue->next_job_wait_sync[stage] = signal.sync;
657    }
658 
659    STACK_ARRAY_FINISH(waits);
660 
661    return VK_SUCCESS;
662 
663 err_free_waits:
664    STACK_ARRAY_FINISH(waits);
665 
666    return result;
667 }
668 
pvr_process_event_cmd(struct pvr_device * device,struct pvr_queue * queue,struct pvr_sub_cmd_event * sub_cmd)669 static VkResult pvr_process_event_cmd(struct pvr_device *device,
670                                       struct pvr_queue *queue,
671                                       struct pvr_sub_cmd_event *sub_cmd)
672 {
673    switch (sub_cmd->type) {
674    case PVR_EVENT_TYPE_SET:
675       return pvr_process_event_cmd_set(device, queue, &sub_cmd->set_reset);
676    case PVR_EVENT_TYPE_RESET:
677       return pvr_process_event_cmd_reset(device, queue, &sub_cmd->set_reset);
678    case PVR_EVENT_TYPE_WAIT:
679       return pvr_process_event_cmd_wait(device, queue, &sub_cmd->wait);
680    case PVR_EVENT_TYPE_BARRIER:
681       return pvr_process_event_cmd_barrier(device, queue, &sub_cmd->barrier);
682    default:
683       unreachable("Invalid event sub-command type.");
684    };
685 }
686 
pvr_process_cmd_buffer(struct pvr_device * device,struct pvr_queue * queue,struct pvr_cmd_buffer * cmd_buffer)687 static VkResult pvr_process_cmd_buffer(struct pvr_device *device,
688                                        struct pvr_queue *queue,
689                                        struct pvr_cmd_buffer *cmd_buffer)
690 {
691    VkResult result;
692 
693    list_for_each_entry_safe (struct pvr_sub_cmd,
694                              sub_cmd,
695                              &cmd_buffer->sub_cmds,
696                              link) {
697       switch (sub_cmd->type) {
698       case PVR_SUB_CMD_TYPE_GRAPHICS: {
699          /* If the fragment job utilizes occlusion queries, for data integrity
700           * it needs to wait for the occlusion query to be processed.
701           */
702          if (sub_cmd->gfx.has_occlusion_query) {
703             struct pvr_sub_cmd_event_barrier barrier = {
704                .wait_for_stage_mask = PVR_PIPELINE_STAGE_OCCLUSION_QUERY_BIT,
705                .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
706             };
707 
708             result = pvr_process_event_cmd_barrier(device, queue, &barrier);
709             if (result != VK_SUCCESS)
710                break;
711          }
712 
713          if (sub_cmd->gfx.wait_on_previous_transfer) {
714             struct pvr_sub_cmd_event_barrier barrier = {
715                .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
716                .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
717             };
718 
719             result = pvr_process_event_cmd_barrier(device, queue, &barrier);
720             if (result != VK_SUCCESS)
721                break;
722          }
723 
724          result =
725             pvr_process_graphics_cmd(device, queue, cmd_buffer, &sub_cmd->gfx);
726          break;
727       }
728 
729       case PVR_SUB_CMD_TYPE_COMPUTE:
730          result = pvr_process_compute_cmd(device, queue, &sub_cmd->compute);
731          break;
732 
733       case PVR_SUB_CMD_TYPE_TRANSFER: {
734          const bool serialize_with_frag = sub_cmd->transfer.serialize_with_frag;
735 
736          if (serialize_with_frag) {
737             struct pvr_sub_cmd_event_barrier barrier = {
738                .wait_for_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
739                .wait_at_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
740             };
741 
742             result = pvr_process_event_cmd_barrier(device, queue, &barrier);
743             if (result != VK_SUCCESS)
744                break;
745          }
746 
747          result = pvr_process_transfer_cmds(device, queue, &sub_cmd->transfer);
748 
749          if (serialize_with_frag) {
750             struct pvr_sub_cmd_event_barrier barrier = {
751                .wait_for_stage_mask = PVR_PIPELINE_STAGE_TRANSFER_BIT,
752                .wait_at_stage_mask = PVR_PIPELINE_STAGE_FRAG_BIT,
753             };
754 
755             if (result != VK_SUCCESS)
756                break;
757 
758             result = pvr_process_event_cmd_barrier(device, queue, &barrier);
759          }
760 
761          break;
762       }
763 
764       case PVR_SUB_CMD_TYPE_OCCLUSION_QUERY:
765          result =
766             pvr_process_occlusion_query_cmd(device, queue, &sub_cmd->compute);
767          break;
768 
769       case PVR_SUB_CMD_TYPE_EVENT:
770          result = pvr_process_event_cmd(device, queue, &sub_cmd->event);
771          break;
772 
773       default:
774          mesa_loge("Unsupported sub-command type %d", sub_cmd->type);
775          result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
776       }
777 
778       if (result != VK_SUCCESS)
779          return result;
780 
781       p_atomic_inc(&device->global_cmd_buffer_submit_count);
782    }
783 
784    return VK_SUCCESS;
785 }
786 
pvr_clear_last_submits_syncs(struct pvr_queue * queue)787 static VkResult pvr_clear_last_submits_syncs(struct pvr_queue *queue)
788 {
789    struct vk_sync_wait waits[PVR_JOB_TYPE_MAX * 2];
790    uint32_t wait_count = 0;
791    VkResult result;
792 
793    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
794       if (queue->next_job_wait_sync[i]) {
795          waits[wait_count++] = (struct vk_sync_wait){
796             .sync = queue->next_job_wait_sync[i],
797             .stage_mask = ~(VkPipelineStageFlags2)0,
798             .wait_value = 0,
799          };
800       }
801 
802       if (queue->last_job_signal_sync[i]) {
803          waits[wait_count++] = (struct vk_sync_wait){
804             .sync = queue->last_job_signal_sync[i],
805             .stage_mask = ~(VkPipelineStageFlags2)0,
806             .wait_value = 0,
807          };
808       }
809    }
810 
811    result = vk_sync_wait_many(&queue->device->vk,
812                               wait_count,
813                               waits,
814                               VK_SYNC_WAIT_COMPLETE,
815                               UINT64_MAX);
816 
817    if (result != VK_SUCCESS)
818       return vk_error(queue, result);
819 
820    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
821       if (queue->next_job_wait_sync[i]) {
822          vk_sync_destroy(&queue->device->vk, queue->next_job_wait_sync[i]);
823          queue->next_job_wait_sync[i] = NULL;
824       }
825 
826       if (queue->last_job_signal_sync[i]) {
827          vk_sync_destroy(&queue->device->vk, queue->last_job_signal_sync[i]);
828          queue->last_job_signal_sync[i] = NULL;
829       }
830    }
831 
832    return VK_SUCCESS;
833 }
834 
pvr_process_queue_signals(struct pvr_queue * queue,struct vk_sync_signal * signals,uint32_t signal_count)835 static VkResult pvr_process_queue_signals(struct pvr_queue *queue,
836                                           struct vk_sync_signal *signals,
837                                           uint32_t signal_count)
838 {
839    struct vk_sync_wait signal_waits[PVR_JOB_TYPE_MAX];
840    struct pvr_device *device = queue->device;
841    VkResult result;
842 
843    for (uint32_t signal_idx = 0; signal_idx < signal_count; signal_idx++) {
844       struct vk_sync_signal *signal = &signals[signal_idx];
845       const enum pvr_pipeline_stage_bits signal_stage_src =
846          pvr_stage_mask_src(signal->stage_mask);
847       uint32_t wait_count = 0;
848 
849       for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
850          /* Exception for occlusion query jobs since that's something internal,
851           * so the user provided syncs won't ever have it as a source stage.
852           */
853          if (!(signal_stage_src & BITFIELD_BIT(i)) &&
854              i != PVR_JOB_TYPE_OCCLUSION_QUERY)
855             continue;
856 
857          if (!queue->last_job_signal_sync[i])
858             continue;
859 
860          signal_waits[wait_count++] = (struct vk_sync_wait){
861             .sync = queue->last_job_signal_sync[i],
862             .stage_mask = ~(VkPipelineStageFlags2)0,
863             .wait_value = 0,
864          };
865       }
866 
867       result = device->ws->ops->null_job_submit(device->ws,
868                                                 signal_waits,
869                                                 wait_count,
870                                                 signal);
871       if (result != VK_SUCCESS)
872          return result;
873    }
874 
875    return VK_SUCCESS;
876 }
877 
pvr_process_queue_waits(struct pvr_queue * queue,struct vk_sync_wait * waits,uint32_t wait_count)878 static VkResult pvr_process_queue_waits(struct pvr_queue *queue,
879                                         struct vk_sync_wait *waits,
880                                         uint32_t wait_count)
881 {
882    struct pvr_device *device = queue->device;
883    VkResult result;
884 
885    STACK_ARRAY(struct vk_sync_wait, stage_waits, wait_count);
886    if (!stage_waits)
887       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
888 
889    for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
890       struct vk_sync_signal next_job_wait_signal_sync;
891       uint32_t stage_wait_count = 0;
892 
893       for (uint32_t wait_idx = 0; wait_idx < wait_count; wait_idx++) {
894          if (!(pvr_stage_mask(waits[wait_idx].stage_mask) & BITFIELD_BIT(i)))
895             continue;
896 
897          stage_waits[stage_wait_count++] = (struct vk_sync_wait){
898             .sync = waits[wait_idx].sync,
899             .stage_mask = ~(VkPipelineStageFlags2)0,
900             .wait_value = waits[wait_idx].wait_value,
901          };
902       }
903 
904       result = vk_sync_create(&device->vk,
905                               &device->pdevice->ws->syncobj_type,
906                               0U,
907                               0UL,
908                               &queue->next_job_wait_sync[i]);
909       if (result != VK_SUCCESS)
910          goto err_free_waits;
911 
912       next_job_wait_signal_sync = (struct vk_sync_signal){
913          .sync = queue->next_job_wait_sync[i],
914          .stage_mask = ~(VkPipelineStageFlags2)0,
915          .signal_value = 0,
916       };
917 
918       result = device->ws->ops->null_job_submit(device->ws,
919                                                 stage_waits,
920                                                 stage_wait_count,
921                                                 &next_job_wait_signal_sync);
922       if (result != VK_SUCCESS)
923          goto err_free_waits;
924    }
925 
926    STACK_ARRAY_FINISH(stage_waits);
927 
928    return VK_SUCCESS;
929 
930 err_free_waits:
931    STACK_ARRAY_FINISH(stage_waits);
932 
933    return result;
934 }
935 
pvr_driver_queue_submit(struct vk_queue * queue,struct vk_queue_submit * submit)936 static VkResult pvr_driver_queue_submit(struct vk_queue *queue,
937                                         struct vk_queue_submit *submit)
938 {
939    struct pvr_queue *driver_queue = container_of(queue, struct pvr_queue, vk);
940    struct pvr_device *device = driver_queue->device;
941    VkResult result;
942 
943    result = pvr_clear_last_submits_syncs(driver_queue);
944    if (result != VK_SUCCESS)
945       return result;
946 
947    result =
948       pvr_process_queue_waits(driver_queue, submit->waits, submit->wait_count);
949    if (result != VK_SUCCESS)
950       return result;
951 
952    for (uint32_t i = 0U; i < submit->command_buffer_count; i++) {
953       result = pvr_process_cmd_buffer(
954          device,
955          driver_queue,
956          container_of(submit->command_buffers[i], struct pvr_cmd_buffer, vk));
957       if (result != VK_SUCCESS)
958          return result;
959    }
960 
961    result = pvr_process_queue_signals(driver_queue,
962                                       submit->signals,
963                                       submit->signal_count);
964    if (result != VK_SUCCESS)
965       return result;
966 
967    return VK_SUCCESS;
968 }
969