xref: /aosp_15_r20/external/mesa3d/src/asahi/vulkan/hk_queue.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2024 Alyssa Rosenzweig
4  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5  * Copyright 2024 Valve Corporation
6  * Copyright 2024 Alyssa Rosenzweig
7  * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
8  * SPDX-License-Identifier: MIT
9  */
10 #include "hk_queue.h"
11 
12 #include "agx_bo.h"
13 #include "agx_device.h"
14 #include "agx_pack.h"
15 #include "decode.h"
16 #include "hk_cmd_buffer.h"
17 #include "hk_device.h"
18 #include "hk_physical_device.h"
19 
20 #include <xf86drm.h>
21 #include "asahi/lib/unstable_asahi_drm.h"
22 #include "util/list.h"
23 #include "vulkan/vulkan_core.h"
24 
25 #include "vk_drm_syncobj.h"
26 #include "vk_sync.h"
27 
28 /*
29  * We need to specially handle submits with no control streams. The kernel
30  * can't accept empty submits, but we can end up here in Vulkan for
31  * synchronization purposes only. Rather than submit a no-op job (slow),
32  * we simply tie the fences together.
33  */
34 static VkResult
queue_submit_empty(struct hk_device * dev,struct hk_queue * queue,struct vk_queue_submit * submit)35 queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
36                    struct vk_queue_submit *submit)
37 {
38    int fd = dev->dev.fd;
39 
40    /* Transfer the waits into the queue timeline. */
41    for (unsigned i = 0; i < submit->wait_count; ++i) {
42       struct vk_sync_wait *wait = &submit->waits[i];
43 
44       assert(vk_sync_type_is_drm_syncobj(wait->sync->type));
45       const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(wait->sync);
46 
47       drmSyncobjTransfer(fd, queue->drm.syncobj, ++queue->drm.timeline_value,
48                          syncobj->syncobj, wait->wait_value, 0);
49    }
50 
51    /* Transfer the queue timeline into each out fence. They will all be
52     * signalled when we reach this point.
53     */
54    for (unsigned i = 0; i < submit->signal_count; ++i) {
55       struct vk_sync_signal *signal = &submit->signals[i];
56 
57       assert(vk_sync_type_is_drm_syncobj(signal->sync->type));
58       const struct vk_drm_syncobj *syncobj =
59          vk_sync_as_drm_syncobj(signal->sync);
60 
61       drmSyncobjTransfer(fd, syncobj->syncobj, signal->signal_value,
62                          queue->drm.syncobj, queue->drm.timeline_value, 0);
63    }
64 
65    return VK_SUCCESS;
66 }
67 
68 static void
asahi_fill_cdm_command(struct hk_device * dev,struct hk_cs * cs,struct drm_asahi_cmd_compute * cmd)69 asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
70                        struct drm_asahi_cmd_compute *cmd)
71 {
72    size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
73 
74    *cmd = (struct drm_asahi_cmd_compute){
75       .encoder_ptr = cs->addr,
76       .encoder_end = cs->addr + len,
77 
78       .sampler_array = dev->samplers.table.bo->va->addr,
79       .sampler_count = dev->samplers.table.alloc,
80       .sampler_max = dev->samplers.table.alloc + 1,
81 
82       .usc_base = dev->dev.shader_base,
83 
84       .encoder_id = agx_get_global_id(&dev->dev),
85       .cmd_id = agx_get_global_id(&dev->dev),
86       .unk_mask = 0xffffffff,
87    };
88 
89    if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
90       cmd->helper_arg = dev->scratch.cs.buf->va->addr;
91       cmd->helper_cfg = cs->scratch.cs.preamble << 16;
92       cmd->helper_program = dev->dev.helper->va->addr | 1;
93    }
94 }
95 
96 static void
asahi_fill_vdm_command(struct hk_device * dev,struct hk_cs * cs,struct drm_asahi_cmd_render * c)97 asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
98                        struct drm_asahi_cmd_render *c)
99 {
100 #if 0
101    bool clear_pipeline_textures =
102       agx_tilebuffer_spills(&batch->tilebuffer_layout);
103 
104    for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
105       struct pipe_surface *surf = batch->key.cbufs[i];
106 
107       clear_pipeline_textures |=
108          surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
109    }
110 
111 #endif
112    unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
113    unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
114    unsigned encoder_id = agx_get_global_id(&dev->dev);
115 
116    memset(c, 0, sizeof(*c));
117 
118    c->encoder_ptr = cs->addr;
119    c->encoder_id = encoder_id;
120    c->cmd_3d_id = cmd_3d_id;
121    c->cmd_ta_id = cmd_ta_id;
122    c->ppp_ctrl = 0x202;
123 
124    c->fragment_usc_base = dev->dev.shader_base;
125    c->vertex_usc_base = c->fragment_usc_base;
126 
127    c->fb_width = cs->cr.width;
128    c->fb_height = cs->cr.height;
129 
130    c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
131    c->isp_bgobjvals = cs->cr.isp_bgobjvals;
132 
133    static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
134    memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
135 
136    c->depth_dimensions = (cs->cr.width - 1) | ((cs->cr.height - 1) << 15);
137 
138    c->depth_buffer_load = cs->cr.depth.buffer;
139    c->depth_buffer_store = cs->cr.depth.buffer;
140    c->depth_buffer_partial = cs->cr.depth.buffer;
141 
142    c->depth_buffer_load_stride = cs->cr.depth.stride;
143    c->depth_buffer_store_stride = cs->cr.depth.stride;
144    c->depth_buffer_partial_stride = cs->cr.depth.stride;
145 
146    c->depth_meta_buffer_load = cs->cr.depth.meta;
147    c->depth_meta_buffer_store = cs->cr.depth.meta;
148    c->depth_meta_buffer_partial = cs->cr.depth.meta;
149 
150    c->depth_meta_buffer_load_stride = cs->cr.depth.stride;
151    c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride;
152    c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride;
153 
154    c->stencil_buffer_load = cs->cr.stencil.buffer;
155    c->stencil_buffer_store = cs->cr.stencil.buffer;
156    c->stencil_buffer_partial = cs->cr.stencil.buffer;
157 
158    c->stencil_buffer_load_stride = cs->cr.stencil.stride;
159    c->stencil_buffer_store_stride = cs->cr.stencil.stride;
160    c->stencil_buffer_partial_stride = cs->cr.stencil.stride;
161 
162    c->stencil_meta_buffer_load = cs->cr.stencil.meta;
163    c->stencil_meta_buffer_store = cs->cr.stencil.meta;
164    c->stencil_meta_buffer_partial = cs->cr.stencil.meta;
165 
166    c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride;
167    c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride;
168    c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride;
169 
170    c->iogpu_unk_214 = cs->cr.iogpu_unk_214;
171 
172 #if 0
173    if (clear_pipeline_textures)
174       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
175    else
176       c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
177 
178    if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
179       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
180 
181    if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
182       c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
183 #endif
184 
185    if (dev->dev.debug & AGX_DBG_NOCLUSTER)
186       c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
187 
188 #if 0
189    /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
190    if (tib->nr_samples > 1 && framebuffer->zsbuf)
191       c->flags |= ASAHI_RENDER_MSAA_ZS;
192 #endif
193 
194    c->utile_width = cs->tib.tile_size.width;
195    c->utile_height = cs->tib.tile_size.height;
196 
197    /* Can be 0 for attachmentless rendering with no draws */
198    c->samples = MAX2(cs->tib.nr_samples, 1);
199    c->layers = cs->cr.layers;
200 
201    c->ppp_multisamplectl = cs->ppp_multisamplectl;
202    c->sample_size = cs->tib.sample_size_B;
203    c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048;
204 
205    float tan_60 = 1.732051f;
206    c->merge_upper_x = fui(tan_60 / cs->cr.width);
207    c->merge_upper_y = fui(tan_60 / cs->cr.height);
208 
209    c->load_pipeline = cs->cr.bg.main.usc | 4;
210    c->store_pipeline = cs->cr.eot.main.usc | 4;
211    c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4;
212    c->partial_store_pipeline = cs->cr.eot.partial.usc | 4;
213 
214    memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts,
215           sizeof(struct agx_counts_packed));
216 
217    memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts,
218           sizeof(struct agx_counts_packed));
219 
220    memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts,
221           sizeof(struct agx_counts_packed));
222 
223    memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts,
224           sizeof(struct agx_counts_packed));
225 
226    c->scissor_array = cs->uploaded_scissor;
227    c->depth_bias_array = cs->uploaded_zbias;
228 
229    c->vertex_sampler_array = dev->samplers.table.bo->va->addr;
230    c->vertex_sampler_count = dev->samplers.table.alloc;
231    c->vertex_sampler_max = dev->samplers.table.alloc + 1;
232 
233    c->fragment_sampler_array = c->vertex_sampler_array;
234    c->fragment_sampler_count = c->vertex_sampler_count;
235    c->fragment_sampler_max = c->vertex_sampler_max;
236 
237    c->visibility_result_buffer = dev->occlusion_queries.bo->va->addr;
238 
239    /* If a tile is empty, we do not want to process it, as the redundant
240     * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
241     * memory bandwidth. Any draw marks a tile as non-empty, so we only need to
242     * process empty tiles if the background+EOT programs have a side effect.
243     * This is the case exactly when there is an attachment we are clearing (some
244     * attachment A in clear and in resolve <==> non-empty intersection).
245     *
246     * This case matters a LOT for performance in workloads that split batches.
247     */
248    if (true /* TODO */)
249       c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
250 
251    if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
252       c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
253       c->vertex_helper_arg = dev->scratch.vs.buf->va->addr;
254       c->vertex_helper_cfg = cs->scratch.vs.preamble << 16;
255       c->vertex_helper_program = dev->dev.helper->va->addr | 1;
256    }
257 
258    if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
259       c->fragment_helper_arg = dev->scratch.fs.buf->va->addr;
260       c->fragment_helper_cfg = cs->scratch.fs.preamble << 16;
261       c->fragment_helper_program = dev->dev.helper->va->addr | 1;
262    }
263 }
264 
265 static void
asahi_fill_sync(struct drm_asahi_sync * sync,struct vk_sync * vk_sync,uint64_t value)266 asahi_fill_sync(struct drm_asahi_sync *sync, struct vk_sync *vk_sync,
267                 uint64_t value)
268 {
269    if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
270       unreachable("Unsupported sync type");
271       return;
272    }
273 
274    const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
275    *sync = (struct drm_asahi_sync){.handle = syncobj->syncobj};
276 
277    if (vk_sync->flags & VK_SYNC_IS_TIMELINE) {
278       sync->sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ;
279       sync->timeline_value = value;
280    } else {
281       sync->sync_type = DRM_ASAHI_SYNC_SYNCOBJ;
282    }
283 }
284 
285 union drm_asahi_cmd {
286    struct drm_asahi_cmd_compute compute;
287    struct drm_asahi_cmd_render render;
288 };
289 
290 /* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
291  * on the CTS once lossless compression is enabled. This needs to be
292  * investigated before we can reenable this mechanism. We are likely missing a
293  * cache flush or barrier somewhere.
294  *
295  * TODO: I think the actual maximum is 64. Can we query from the kernel?
296  */
297 #define MAX_COMMANDS_PER_SUBMIT (1)
298 
299 static VkResult
queue_submit_single(struct agx_device * dev,struct drm_asahi_submit * submit)300 queue_submit_single(struct agx_device *dev, struct drm_asahi_submit *submit)
301 {
302    int ret = dev->ops.submit(dev, submit, 0);
303 
304    /* XXX: don't trap */
305    if (ret) {
306       fprintf(stderr, "DRM_IOCTL_ASAHI_SUBMIT failed: %m\n");
307       assert(0);
308    }
309 
310    return VK_SUCCESS;
311 }
312 
313 /*
314  * The kernel/firmware jointly impose a limit on commands per submit ioctl, but
315  * we can build up arbitrarily large command buffers. We handle this here by
316  * looping the ioctl, submitting slices of the command buffers that are within
317  * bounds.
318  */
319 static VkResult
queue_submit_looped(struct agx_device * dev,struct drm_asahi_submit * submit)320 queue_submit_looped(struct agx_device *dev, struct drm_asahi_submit *submit)
321 {
322    struct drm_asahi_command *cmds = (void *)submit->commands;
323    unsigned commands_remaining = submit->command_count;
324    unsigned submitted_vdm = 0, submitted_cdm = 0;
325 
326    while (commands_remaining) {
327       bool first = commands_remaining == submit->command_count;
328       bool last = commands_remaining <= MAX_COMMANDS_PER_SUBMIT;
329 
330       unsigned count = MIN2(commands_remaining, MAX_COMMANDS_PER_SUBMIT);
331       commands_remaining -= count;
332 
333       assert(!last || commands_remaining == 0);
334       assert(count > 0);
335 
336       /* We need to fix up the barriers since barriers are ioctl-relative */
337       for (unsigned i = 0; i < count; ++i) {
338          assert(cmds[i].barriers[0] >= submitted_vdm);
339          assert(cmds[i].barriers[1] >= submitted_cdm);
340 
341          cmds[i].barriers[0] -= submitted_vdm;
342          cmds[i].barriers[1] -= submitted_cdm;
343       }
344 
345       /* We can't signal the out-syncobjs until all prior work finishes. Since
346        * only the last ioctl will signal, make sure it waits on prior ioctls.
347        *
348        * TODO: there might be a more performant way to do this.
349        */
350       if (last && !first) {
351          if (cmds[0].barriers[0] == DRM_ASAHI_BARRIER_NONE)
352             cmds[0].barriers[0] = 0;
353 
354          if (cmds[0].barriers[1] == DRM_ASAHI_BARRIER_NONE)
355             cmds[0].barriers[1] = 0;
356       }
357 
358       struct drm_asahi_submit submit_ioctl = {
359          .flags = submit->flags,
360          .queue_id = submit->queue_id,
361          .result_handle = submit->result_handle,
362          .commands = (uint64_t)(uintptr_t)(cmds),
363          .command_count = count,
364          .in_syncs = first ? submit->in_syncs : 0,
365          .in_sync_count = first ? submit->in_sync_count : 0,
366          .out_syncs = last ? submit->out_syncs : 0,
367          .out_sync_count = last ? submit->out_sync_count : 0,
368       };
369 
370       VkResult result = queue_submit_single(dev, &submit_ioctl);
371       if (result != VK_SUCCESS)
372          return result;
373 
374       for (unsigned i = 0; i < count; ++i) {
375          if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE)
376             submitted_cdm++;
377          else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER)
378             submitted_vdm++;
379          else
380             unreachable("unknown subqueue");
381       }
382 
383       cmds += count;
384    }
385 
386    return VK_SUCCESS;
387 }
388 
389 static VkResult
queue_submit(struct hk_device * dev,struct hk_queue * queue,struct vk_queue_submit * submit)390 queue_submit(struct hk_device *dev, struct hk_queue *queue,
391              struct vk_queue_submit *submit)
392 {
393    unsigned command_count = 0;
394 
395    /* Gather the number of individual commands to submit up front */
396    for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
397       struct hk_cmd_buffer *cmdbuf =
398          (struct hk_cmd_buffer *)submit->command_buffers[i];
399 
400       command_count += list_length(&cmdbuf->control_streams);
401    }
402 
403    if (command_count == 0)
404       return queue_submit_empty(dev, queue, submit);
405 
406    unsigned wait_count = 0;
407    struct drm_asahi_sync *waits =
408       alloca(submit->wait_count * sizeof(struct drm_asahi_sync));
409 
410    struct drm_asahi_sync *signals =
411       alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync));
412 
413    for (unsigned i = 0; i < submit->wait_count; ++i) {
414       /* The kernel rejects the submission if we try to wait on the same
415        * timeline semaphore at multiple points.
416        *
417        * TODO: Can we relax the UAPI?
418        *
419        * XXX: This is quadratic time.
420        */
421       bool skip = false;
422       if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) {
423          uint32_t v1 = submit->waits[i].wait_value;
424          for (unsigned j = 0; j < submit->wait_count; ++j) {
425             uint32_t v2 = submit->waits[j].wait_value;
426             if (i != j && submit->waits[i].sync == submit->waits[j].sync &&
427                 (v1 < v2 || (v1 == v2 && i < j))) {
428                skip = true;
429                break;
430             }
431          }
432 
433          if (skip)
434             continue;
435       }
436 
437       asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync,
438                       submit->waits[i].wait_value);
439    }
440 
441    for (unsigned i = 0; i < submit->signal_count; ++i) {
442       asahi_fill_sync(&signals[i], submit->signals[i].sync,
443                       submit->signals[i].signal_value);
444    }
445 
446    /* Signal progress on the queue itself */
447    signals[submit->signal_count] = (struct drm_asahi_sync){
448       .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
449       .handle = queue->drm.syncobj,
450       .timeline_value = ++queue->drm.timeline_value,
451    };
452 
453    /* Now setup the command structs */
454    struct drm_asahi_command *cmds = alloca(sizeof(*cmds) * command_count);
455    union drm_asahi_cmd *cmds_inner =
456       alloca(sizeof(*cmds_inner) * command_count);
457 
458    unsigned cmd_it = 0;
459    unsigned nr_vdm = 0, nr_cdm = 0;
460 
461    for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
462       struct hk_cmd_buffer *cmdbuf =
463          (struct hk_cmd_buffer *)submit->command_buffers[i];
464 
465       list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
466          assert(cmd_it < command_count);
467 
468          struct drm_asahi_command cmd = {
469             .cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it],
470             .result_offset = 0 /* TODO */,
471             .result_size = 0 /* TODO */,
472             /* Barrier on previous command */
473             .barriers = {nr_vdm, nr_cdm},
474          };
475 
476          if (cs->type == HK_CS_CDM) {
477             cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE;
478             cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute);
479             nr_cdm++;
480 
481             asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute);
482          } else {
483             assert(cs->type == HK_CS_VDM);
484             cmd.cmd_type = DRM_ASAHI_CMD_RENDER;
485             cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render);
486             nr_vdm++;
487 
488             asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render);
489          }
490 
491          cmds[cmd_it++] = cmd;
492       }
493    }
494 
495    assert(cmd_it == command_count);
496 
497    if (dev->dev.debug & AGX_DBG_TRACE) {
498       for (unsigned i = 0; i < command_count; ++i) {
499          if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) {
500             agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params,
501                                       &cmds_inner[i].compute, true);
502          } else {
503             assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER);
504             agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params,
505                                      &cmds_inner[i].render, true);
506          }
507       }
508 
509       agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
510                            dev->images.alloc);
511 
512       agxdecode_next_frame();
513    }
514 
515    struct drm_asahi_submit submit_ioctl = {
516       .flags = 0,
517       .queue_id = queue->drm.id,
518       .result_handle = 0 /* TODO */,
519       .in_sync_count = wait_count,
520       .out_sync_count = submit->signal_count + 1,
521       .command_count = command_count,
522       .in_syncs = (uint64_t)(uintptr_t)(waits),
523       .out_syncs = (uint64_t)(uintptr_t)(signals),
524       .commands = (uint64_t)(uintptr_t)(cmds),
525    };
526 
527    if (command_count <= MAX_COMMANDS_PER_SUBMIT)
528       return queue_submit_single(&dev->dev, &submit_ioctl);
529    else
530       return queue_submit_looped(&dev->dev, &submit_ioctl);
531 }
532 
533 static VkResult
hk_queue_submit(struct vk_queue * vk_queue,struct vk_queue_submit * submit)534 hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
535 {
536    struct hk_queue *queue = container_of(vk_queue, struct hk_queue, vk);
537    struct hk_device *dev = hk_queue_device(queue);
538 
539    if (vk_queue_is_lost(&queue->vk))
540       return VK_ERROR_DEVICE_LOST;
541 
542    VkResult result = queue_submit(dev, queue, submit);
543    if (result != VK_SUCCESS)
544       return vk_queue_set_lost(&queue->vk, "Submit failed");
545 
546    return VK_SUCCESS;
547 }
548 
549 VkResult
hk_queue_init(struct hk_device * dev,struct hk_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)550 hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
551               const VkDeviceQueueCreateInfo *pCreateInfo,
552               uint32_t index_in_family)
553 {
554    struct hk_physical_device *pdev = hk_device_physical(dev);
555    VkResult result;
556 
557    assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
558 
559    const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
560       vk_find_struct_const(pCreateInfo->pNext,
561                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
562    const enum VkQueueGlobalPriorityKHR global_priority =
563       priority_info ? priority_info->globalPriority
564                     : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
565 
566    if (global_priority != VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
567       return VK_ERROR_INITIALIZATION_FAILED;
568    }
569 
570    result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
571    if (result != VK_SUCCESS)
572       return result;
573 
574    queue->vk.driver_submit = hk_queue_submit;
575 
576    queue->drm.id = agx_create_command_queue(&dev->dev,
577                                             DRM_ASAHI_QUEUE_CAP_RENDER |
578                                                DRM_ASAHI_QUEUE_CAP_BLIT |
579                                                DRM_ASAHI_QUEUE_CAP_COMPUTE,
580                                             2);
581 
582    if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
583       mesa_loge("drmSyncobjCreate() failed %d\n", errno);
584       agx_destroy_command_queue(&dev->dev, queue->drm.id);
585       vk_queue_finish(&queue->vk);
586 
587       return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
588                        "DRM_IOCTL_SYNCOBJ_CREATE failed: %m");
589    }
590 
591    uint64_t initial_value = 1;
592    if (drmSyncobjTimelineSignal(dev->dev.fd, &queue->drm.syncobj,
593                                 &initial_value, 1)) {
594       hk_queue_finish(dev, queue);
595       return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
596                        "DRM_IOCTL_TIMELINE_SYNCOBJ_SIGNAL failed: %m");
597    }
598 
599    return VK_SUCCESS;
600 }
601 
602 void
hk_queue_finish(struct hk_device * dev,struct hk_queue * queue)603 hk_queue_finish(struct hk_device *dev, struct hk_queue *queue)
604 {
605    drmSyncobjDestroy(dev->dev.fd, queue->drm.syncobj);
606    agx_destroy_command_queue(&dev->dev, queue->drm.id);
607    vk_queue_finish(&queue->vk);
608 }
609