1 /*
2 * Copyright 2024 Valve Corporation
3 * Copyright 2024 Alyssa Rosenzweig
4 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
5 * Copyright 2024 Valve Corporation
6 * Copyright 2024 Alyssa Rosenzweig
7 * Copyright 2022-2023 Collabora Ltd. and Red Hat Inc.
8 * SPDX-License-Identifier: MIT
9 */
10 #include "hk_queue.h"
11
12 #include "agx_bo.h"
13 #include "agx_device.h"
14 #include "agx_pack.h"
15 #include "decode.h"
16 #include "hk_cmd_buffer.h"
17 #include "hk_device.h"
18 #include "hk_physical_device.h"
19
20 #include <xf86drm.h>
21 #include "asahi/lib/unstable_asahi_drm.h"
22 #include "util/list.h"
23 #include "vulkan/vulkan_core.h"
24
25 #include "vk_drm_syncobj.h"
26 #include "vk_sync.h"
27
28 /*
29 * We need to specially handle submits with no control streams. The kernel
30 * can't accept empty submits, but we can end up here in Vulkan for
31 * synchronization purposes only. Rather than submit a no-op job (slow),
32 * we simply tie the fences together.
33 */
34 static VkResult
queue_submit_empty(struct hk_device * dev,struct hk_queue * queue,struct vk_queue_submit * submit)35 queue_submit_empty(struct hk_device *dev, struct hk_queue *queue,
36 struct vk_queue_submit *submit)
37 {
38 int fd = dev->dev.fd;
39
40 /* Transfer the waits into the queue timeline. */
41 for (unsigned i = 0; i < submit->wait_count; ++i) {
42 struct vk_sync_wait *wait = &submit->waits[i];
43
44 assert(vk_sync_type_is_drm_syncobj(wait->sync->type));
45 const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(wait->sync);
46
47 drmSyncobjTransfer(fd, queue->drm.syncobj, ++queue->drm.timeline_value,
48 syncobj->syncobj, wait->wait_value, 0);
49 }
50
51 /* Transfer the queue timeline into each out fence. They will all be
52 * signalled when we reach this point.
53 */
54 for (unsigned i = 0; i < submit->signal_count; ++i) {
55 struct vk_sync_signal *signal = &submit->signals[i];
56
57 assert(vk_sync_type_is_drm_syncobj(signal->sync->type));
58 const struct vk_drm_syncobj *syncobj =
59 vk_sync_as_drm_syncobj(signal->sync);
60
61 drmSyncobjTransfer(fd, syncobj->syncobj, signal->signal_value,
62 queue->drm.syncobj, queue->drm.timeline_value, 0);
63 }
64
65 return VK_SUCCESS;
66 }
67
68 static void
asahi_fill_cdm_command(struct hk_device * dev,struct hk_cs * cs,struct drm_asahi_cmd_compute * cmd)69 asahi_fill_cdm_command(struct hk_device *dev, struct hk_cs *cs,
70 struct drm_asahi_cmd_compute *cmd)
71 {
72 size_t len = cs->stream_linked ? 65536 /* XXX */ : (cs->current - cs->start);
73
74 *cmd = (struct drm_asahi_cmd_compute){
75 .encoder_ptr = cs->addr,
76 .encoder_end = cs->addr + len,
77
78 .sampler_array = dev->samplers.table.bo->va->addr,
79 .sampler_count = dev->samplers.table.alloc,
80 .sampler_max = dev->samplers.table.alloc + 1,
81
82 .usc_base = dev->dev.shader_base,
83
84 .encoder_id = agx_get_global_id(&dev->dev),
85 .cmd_id = agx_get_global_id(&dev->dev),
86 .unk_mask = 0xffffffff,
87 };
88
89 if (cs->scratch.cs.main || cs->scratch.cs.preamble) {
90 cmd->helper_arg = dev->scratch.cs.buf->va->addr;
91 cmd->helper_cfg = cs->scratch.cs.preamble << 16;
92 cmd->helper_program = dev->dev.helper->va->addr | 1;
93 }
94 }
95
96 static void
asahi_fill_vdm_command(struct hk_device * dev,struct hk_cs * cs,struct drm_asahi_cmd_render * c)97 asahi_fill_vdm_command(struct hk_device *dev, struct hk_cs *cs,
98 struct drm_asahi_cmd_render *c)
99 {
100 #if 0
101 bool clear_pipeline_textures =
102 agx_tilebuffer_spills(&batch->tilebuffer_layout);
103
104 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
105 struct pipe_surface *surf = batch->key.cbufs[i];
106
107 clear_pipeline_textures |=
108 surf && surf->texture && !(batch->clear & (PIPE_CLEAR_COLOR0 << i));
109 }
110
111 #endif
112 unsigned cmd_ta_id = agx_get_global_id(&dev->dev);
113 unsigned cmd_3d_id = agx_get_global_id(&dev->dev);
114 unsigned encoder_id = agx_get_global_id(&dev->dev);
115
116 memset(c, 0, sizeof(*c));
117
118 c->encoder_ptr = cs->addr;
119 c->encoder_id = encoder_id;
120 c->cmd_3d_id = cmd_3d_id;
121 c->cmd_ta_id = cmd_ta_id;
122 c->ppp_ctrl = 0x202;
123
124 c->fragment_usc_base = dev->dev.shader_base;
125 c->vertex_usc_base = c->fragment_usc_base;
126
127 c->fb_width = cs->cr.width;
128 c->fb_height = cs->cr.height;
129
130 c->isp_bgobjdepth = cs->cr.isp_bgobjdepth;
131 c->isp_bgobjvals = cs->cr.isp_bgobjvals;
132
133 static_assert(sizeof(c->zls_ctrl) == sizeof(cs->cr.zls_control));
134 memcpy(&c->zls_ctrl, &cs->cr.zls_control, sizeof(cs->cr.zls_control));
135
136 c->depth_dimensions = (cs->cr.width - 1) | ((cs->cr.height - 1) << 15);
137
138 c->depth_buffer_load = cs->cr.depth.buffer;
139 c->depth_buffer_store = cs->cr.depth.buffer;
140 c->depth_buffer_partial = cs->cr.depth.buffer;
141
142 c->depth_buffer_load_stride = cs->cr.depth.stride;
143 c->depth_buffer_store_stride = cs->cr.depth.stride;
144 c->depth_buffer_partial_stride = cs->cr.depth.stride;
145
146 c->depth_meta_buffer_load = cs->cr.depth.meta;
147 c->depth_meta_buffer_store = cs->cr.depth.meta;
148 c->depth_meta_buffer_partial = cs->cr.depth.meta;
149
150 c->depth_meta_buffer_load_stride = cs->cr.depth.stride;
151 c->depth_meta_buffer_store_stride = cs->cr.depth.meta_stride;
152 c->depth_meta_buffer_partial_stride = cs->cr.depth.meta_stride;
153
154 c->stencil_buffer_load = cs->cr.stencil.buffer;
155 c->stencil_buffer_store = cs->cr.stencil.buffer;
156 c->stencil_buffer_partial = cs->cr.stencil.buffer;
157
158 c->stencil_buffer_load_stride = cs->cr.stencil.stride;
159 c->stencil_buffer_store_stride = cs->cr.stencil.stride;
160 c->stencil_buffer_partial_stride = cs->cr.stencil.stride;
161
162 c->stencil_meta_buffer_load = cs->cr.stencil.meta;
163 c->stencil_meta_buffer_store = cs->cr.stencil.meta;
164 c->stencil_meta_buffer_partial = cs->cr.stencil.meta;
165
166 c->stencil_meta_buffer_load_stride = cs->cr.stencil.stride;
167 c->stencil_meta_buffer_store_stride = cs->cr.stencil.meta_stride;
168 c->stencil_meta_buffer_partial_stride = cs->cr.stencil.meta_stride;
169
170 c->iogpu_unk_214 = cs->cr.iogpu_unk_214;
171
172 #if 0
173 if (clear_pipeline_textures)
174 c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
175 else
176 c->flags |= ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES;
177
178 if (zres && !(batch->clear & PIPE_CLEAR_DEPTH))
179 c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
180
181 if (sres && !(batch->clear & PIPE_CLEAR_STENCIL))
182 c->flags |= ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S;
183 #endif
184
185 if (dev->dev.debug & AGX_DBG_NOCLUSTER)
186 c->flags |= ASAHI_RENDER_NO_VERTEX_CLUSTERING;
187
188 #if 0
189 /* XXX is this for just MSAA+Z+S or MSAA+(Z|S)? */
190 if (tib->nr_samples > 1 && framebuffer->zsbuf)
191 c->flags |= ASAHI_RENDER_MSAA_ZS;
192 #endif
193
194 c->utile_width = cs->tib.tile_size.width;
195 c->utile_height = cs->tib.tile_size.height;
196
197 /* Can be 0 for attachmentless rendering with no draws */
198 c->samples = MAX2(cs->tib.nr_samples, 1);
199 c->layers = cs->cr.layers;
200
201 c->ppp_multisamplectl = cs->ppp_multisamplectl;
202 c->sample_size = cs->tib.sample_size_B;
203 c->tib_blocks = ALIGN_POT(agx_tilebuffer_total_size(&cs->tib), 2048) / 2048;
204
205 float tan_60 = 1.732051f;
206 c->merge_upper_x = fui(tan_60 / cs->cr.width);
207 c->merge_upper_y = fui(tan_60 / cs->cr.height);
208
209 c->load_pipeline = cs->cr.bg.main.usc | 4;
210 c->store_pipeline = cs->cr.eot.main.usc | 4;
211 c->partial_reload_pipeline = cs->cr.bg.partial.usc | 4;
212 c->partial_store_pipeline = cs->cr.eot.partial.usc | 4;
213
214 memcpy(&c->load_pipeline_bind, &cs->cr.bg.main.counts,
215 sizeof(struct agx_counts_packed));
216
217 memcpy(&c->store_pipeline_bind, &cs->cr.eot.main.counts,
218 sizeof(struct agx_counts_packed));
219
220 memcpy(&c->partial_reload_pipeline_bind, &cs->cr.bg.partial.counts,
221 sizeof(struct agx_counts_packed));
222
223 memcpy(&c->partial_store_pipeline_bind, &cs->cr.eot.partial.counts,
224 sizeof(struct agx_counts_packed));
225
226 c->scissor_array = cs->uploaded_scissor;
227 c->depth_bias_array = cs->uploaded_zbias;
228
229 c->vertex_sampler_array = dev->samplers.table.bo->va->addr;
230 c->vertex_sampler_count = dev->samplers.table.alloc;
231 c->vertex_sampler_max = dev->samplers.table.alloc + 1;
232
233 c->fragment_sampler_array = c->vertex_sampler_array;
234 c->fragment_sampler_count = c->vertex_sampler_count;
235 c->fragment_sampler_max = c->vertex_sampler_max;
236
237 c->visibility_result_buffer = dev->occlusion_queries.bo->va->addr;
238
239 /* If a tile is empty, we do not want to process it, as the redundant
240 * roundtrip of memory-->tilebuffer-->memory wastes a tremendous amount of
241 * memory bandwidth. Any draw marks a tile as non-empty, so we only need to
242 * process empty tiles if the background+EOT programs have a side effect.
243 * This is the case exactly when there is an attachment we are clearing (some
244 * attachment A in clear and in resolve <==> non-empty intersection).
245 *
246 * This case matters a LOT for performance in workloads that split batches.
247 */
248 if (true /* TODO */)
249 c->flags |= ASAHI_RENDER_PROCESS_EMPTY_TILES;
250
251 if (cs->scratch.vs.main || cs->scratch.vs.preamble) {
252 c->flags |= ASAHI_RENDER_VERTEX_SPILLS;
253 c->vertex_helper_arg = dev->scratch.vs.buf->va->addr;
254 c->vertex_helper_cfg = cs->scratch.vs.preamble << 16;
255 c->vertex_helper_program = dev->dev.helper->va->addr | 1;
256 }
257
258 if (cs->scratch.fs.main || cs->scratch.fs.preamble) {
259 c->fragment_helper_arg = dev->scratch.fs.buf->va->addr;
260 c->fragment_helper_cfg = cs->scratch.fs.preamble << 16;
261 c->fragment_helper_program = dev->dev.helper->va->addr | 1;
262 }
263 }
264
265 static void
asahi_fill_sync(struct drm_asahi_sync * sync,struct vk_sync * vk_sync,uint64_t value)266 asahi_fill_sync(struct drm_asahi_sync *sync, struct vk_sync *vk_sync,
267 uint64_t value)
268 {
269 if (unlikely(!vk_sync_type_is_drm_syncobj(vk_sync->type))) {
270 unreachable("Unsupported sync type");
271 return;
272 }
273
274 const struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(vk_sync);
275 *sync = (struct drm_asahi_sync){.handle = syncobj->syncobj};
276
277 if (vk_sync->flags & VK_SYNC_IS_TIMELINE) {
278 sync->sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ;
279 sync->timeline_value = value;
280 } else {
281 sync->sync_type = DRM_ASAHI_SYNC_SYNCOBJ;
282 }
283 }
284
285 union drm_asahi_cmd {
286 struct drm_asahi_cmd_compute compute;
287 struct drm_asahi_cmd_render render;
288 };
289
290 /* XXX: Batching multiple commands per submission is causing rare (7ppm) flakes
291 * on the CTS once lossless compression is enabled. This needs to be
292 * investigated before we can reenable this mechanism. We are likely missing a
293 * cache flush or barrier somewhere.
294 *
295 * TODO: I think the actual maximum is 64. Can we query from the kernel?
296 */
297 #define MAX_COMMANDS_PER_SUBMIT (1)
298
299 static VkResult
queue_submit_single(struct agx_device * dev,struct drm_asahi_submit * submit)300 queue_submit_single(struct agx_device *dev, struct drm_asahi_submit *submit)
301 {
302 int ret = dev->ops.submit(dev, submit, 0);
303
304 /* XXX: don't trap */
305 if (ret) {
306 fprintf(stderr, "DRM_IOCTL_ASAHI_SUBMIT failed: %m\n");
307 assert(0);
308 }
309
310 return VK_SUCCESS;
311 }
312
313 /*
314 * The kernel/firmware jointly impose a limit on commands per submit ioctl, but
315 * we can build up arbitrarily large command buffers. We handle this here by
316 * looping the ioctl, submitting slices of the command buffers that are within
317 * bounds.
318 */
319 static VkResult
queue_submit_looped(struct agx_device * dev,struct drm_asahi_submit * submit)320 queue_submit_looped(struct agx_device *dev, struct drm_asahi_submit *submit)
321 {
322 struct drm_asahi_command *cmds = (void *)submit->commands;
323 unsigned commands_remaining = submit->command_count;
324 unsigned submitted_vdm = 0, submitted_cdm = 0;
325
326 while (commands_remaining) {
327 bool first = commands_remaining == submit->command_count;
328 bool last = commands_remaining <= MAX_COMMANDS_PER_SUBMIT;
329
330 unsigned count = MIN2(commands_remaining, MAX_COMMANDS_PER_SUBMIT);
331 commands_remaining -= count;
332
333 assert(!last || commands_remaining == 0);
334 assert(count > 0);
335
336 /* We need to fix up the barriers since barriers are ioctl-relative */
337 for (unsigned i = 0; i < count; ++i) {
338 assert(cmds[i].barriers[0] >= submitted_vdm);
339 assert(cmds[i].barriers[1] >= submitted_cdm);
340
341 cmds[i].barriers[0] -= submitted_vdm;
342 cmds[i].barriers[1] -= submitted_cdm;
343 }
344
345 /* We can't signal the out-syncobjs until all prior work finishes. Since
346 * only the last ioctl will signal, make sure it waits on prior ioctls.
347 *
348 * TODO: there might be a more performant way to do this.
349 */
350 if (last && !first) {
351 if (cmds[0].barriers[0] == DRM_ASAHI_BARRIER_NONE)
352 cmds[0].barriers[0] = 0;
353
354 if (cmds[0].barriers[1] == DRM_ASAHI_BARRIER_NONE)
355 cmds[0].barriers[1] = 0;
356 }
357
358 struct drm_asahi_submit submit_ioctl = {
359 .flags = submit->flags,
360 .queue_id = submit->queue_id,
361 .result_handle = submit->result_handle,
362 .commands = (uint64_t)(uintptr_t)(cmds),
363 .command_count = count,
364 .in_syncs = first ? submit->in_syncs : 0,
365 .in_sync_count = first ? submit->in_sync_count : 0,
366 .out_syncs = last ? submit->out_syncs : 0,
367 .out_sync_count = last ? submit->out_sync_count : 0,
368 };
369
370 VkResult result = queue_submit_single(dev, &submit_ioctl);
371 if (result != VK_SUCCESS)
372 return result;
373
374 for (unsigned i = 0; i < count; ++i) {
375 if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE)
376 submitted_cdm++;
377 else if (cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER)
378 submitted_vdm++;
379 else
380 unreachable("unknown subqueue");
381 }
382
383 cmds += count;
384 }
385
386 return VK_SUCCESS;
387 }
388
389 static VkResult
queue_submit(struct hk_device * dev,struct hk_queue * queue,struct vk_queue_submit * submit)390 queue_submit(struct hk_device *dev, struct hk_queue *queue,
391 struct vk_queue_submit *submit)
392 {
393 unsigned command_count = 0;
394
395 /* Gather the number of individual commands to submit up front */
396 for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
397 struct hk_cmd_buffer *cmdbuf =
398 (struct hk_cmd_buffer *)submit->command_buffers[i];
399
400 command_count += list_length(&cmdbuf->control_streams);
401 }
402
403 if (command_count == 0)
404 return queue_submit_empty(dev, queue, submit);
405
406 unsigned wait_count = 0;
407 struct drm_asahi_sync *waits =
408 alloca(submit->wait_count * sizeof(struct drm_asahi_sync));
409
410 struct drm_asahi_sync *signals =
411 alloca((submit->signal_count + 1) * sizeof(struct drm_asahi_sync));
412
413 for (unsigned i = 0; i < submit->wait_count; ++i) {
414 /* The kernel rejects the submission if we try to wait on the same
415 * timeline semaphore at multiple points.
416 *
417 * TODO: Can we relax the UAPI?
418 *
419 * XXX: This is quadratic time.
420 */
421 bool skip = false;
422 if (submit->waits[i].sync->flags & VK_SYNC_IS_TIMELINE) {
423 uint32_t v1 = submit->waits[i].wait_value;
424 for (unsigned j = 0; j < submit->wait_count; ++j) {
425 uint32_t v2 = submit->waits[j].wait_value;
426 if (i != j && submit->waits[i].sync == submit->waits[j].sync &&
427 (v1 < v2 || (v1 == v2 && i < j))) {
428 skip = true;
429 break;
430 }
431 }
432
433 if (skip)
434 continue;
435 }
436
437 asahi_fill_sync(&waits[wait_count++], submit->waits[i].sync,
438 submit->waits[i].wait_value);
439 }
440
441 for (unsigned i = 0; i < submit->signal_count; ++i) {
442 asahi_fill_sync(&signals[i], submit->signals[i].sync,
443 submit->signals[i].signal_value);
444 }
445
446 /* Signal progress on the queue itself */
447 signals[submit->signal_count] = (struct drm_asahi_sync){
448 .sync_type = DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ,
449 .handle = queue->drm.syncobj,
450 .timeline_value = ++queue->drm.timeline_value,
451 };
452
453 /* Now setup the command structs */
454 struct drm_asahi_command *cmds = alloca(sizeof(*cmds) * command_count);
455 union drm_asahi_cmd *cmds_inner =
456 alloca(sizeof(*cmds_inner) * command_count);
457
458 unsigned cmd_it = 0;
459 unsigned nr_vdm = 0, nr_cdm = 0;
460
461 for (unsigned i = 0; i < submit->command_buffer_count; ++i) {
462 struct hk_cmd_buffer *cmdbuf =
463 (struct hk_cmd_buffer *)submit->command_buffers[i];
464
465 list_for_each_entry(struct hk_cs, cs, &cmdbuf->control_streams, node) {
466 assert(cmd_it < command_count);
467
468 struct drm_asahi_command cmd = {
469 .cmd_buffer = (uint64_t)(uintptr_t)&cmds_inner[cmd_it],
470 .result_offset = 0 /* TODO */,
471 .result_size = 0 /* TODO */,
472 /* Barrier on previous command */
473 .barriers = {nr_vdm, nr_cdm},
474 };
475
476 if (cs->type == HK_CS_CDM) {
477 cmd.cmd_type = DRM_ASAHI_CMD_COMPUTE;
478 cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_compute);
479 nr_cdm++;
480
481 asahi_fill_cdm_command(dev, cs, &cmds_inner[cmd_it].compute);
482 } else {
483 assert(cs->type == HK_CS_VDM);
484 cmd.cmd_type = DRM_ASAHI_CMD_RENDER;
485 cmd.cmd_buffer_size = sizeof(struct drm_asahi_cmd_render);
486 nr_vdm++;
487
488 asahi_fill_vdm_command(dev, cs, &cmds_inner[cmd_it].render);
489 }
490
491 cmds[cmd_it++] = cmd;
492 }
493 }
494
495 assert(cmd_it == command_count);
496
497 if (dev->dev.debug & AGX_DBG_TRACE) {
498 for (unsigned i = 0; i < command_count; ++i) {
499 if (cmds[i].cmd_type == DRM_ASAHI_CMD_COMPUTE) {
500 agxdecode_drm_cmd_compute(dev->dev.agxdecode, &dev->dev.params,
501 &cmds_inner[i].compute, true);
502 } else {
503 assert(cmds[i].cmd_type == DRM_ASAHI_CMD_RENDER);
504 agxdecode_drm_cmd_render(dev->dev.agxdecode, &dev->dev.params,
505 &cmds_inner[i].render, true);
506 }
507 }
508
509 agxdecode_image_heap(dev->dev.agxdecode, dev->images.bo->va->addr,
510 dev->images.alloc);
511
512 agxdecode_next_frame();
513 }
514
515 struct drm_asahi_submit submit_ioctl = {
516 .flags = 0,
517 .queue_id = queue->drm.id,
518 .result_handle = 0 /* TODO */,
519 .in_sync_count = wait_count,
520 .out_sync_count = submit->signal_count + 1,
521 .command_count = command_count,
522 .in_syncs = (uint64_t)(uintptr_t)(waits),
523 .out_syncs = (uint64_t)(uintptr_t)(signals),
524 .commands = (uint64_t)(uintptr_t)(cmds),
525 };
526
527 if (command_count <= MAX_COMMANDS_PER_SUBMIT)
528 return queue_submit_single(&dev->dev, &submit_ioctl);
529 else
530 return queue_submit_looped(&dev->dev, &submit_ioctl);
531 }
532
533 static VkResult
hk_queue_submit(struct vk_queue * vk_queue,struct vk_queue_submit * submit)534 hk_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
535 {
536 struct hk_queue *queue = container_of(vk_queue, struct hk_queue, vk);
537 struct hk_device *dev = hk_queue_device(queue);
538
539 if (vk_queue_is_lost(&queue->vk))
540 return VK_ERROR_DEVICE_LOST;
541
542 VkResult result = queue_submit(dev, queue, submit);
543 if (result != VK_SUCCESS)
544 return vk_queue_set_lost(&queue->vk, "Submit failed");
545
546 return VK_SUCCESS;
547 }
548
549 VkResult
hk_queue_init(struct hk_device * dev,struct hk_queue * queue,const VkDeviceQueueCreateInfo * pCreateInfo,uint32_t index_in_family)550 hk_queue_init(struct hk_device *dev, struct hk_queue *queue,
551 const VkDeviceQueueCreateInfo *pCreateInfo,
552 uint32_t index_in_family)
553 {
554 struct hk_physical_device *pdev = hk_device_physical(dev);
555 VkResult result;
556
557 assert(pCreateInfo->queueFamilyIndex < pdev->queue_family_count);
558
559 const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
560 vk_find_struct_const(pCreateInfo->pNext,
561 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
562 const enum VkQueueGlobalPriorityKHR global_priority =
563 priority_info ? priority_info->globalPriority
564 : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
565
566 if (global_priority != VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
567 return VK_ERROR_INITIALIZATION_FAILED;
568 }
569
570 result = vk_queue_init(&queue->vk, &dev->vk, pCreateInfo, index_in_family);
571 if (result != VK_SUCCESS)
572 return result;
573
574 queue->vk.driver_submit = hk_queue_submit;
575
576 queue->drm.id = agx_create_command_queue(&dev->dev,
577 DRM_ASAHI_QUEUE_CAP_RENDER |
578 DRM_ASAHI_QUEUE_CAP_BLIT |
579 DRM_ASAHI_QUEUE_CAP_COMPUTE,
580 2);
581
582 if (drmSyncobjCreate(dev->dev.fd, 0, &queue->drm.syncobj)) {
583 mesa_loge("drmSyncobjCreate() failed %d\n", errno);
584 agx_destroy_command_queue(&dev->dev, queue->drm.id);
585 vk_queue_finish(&queue->vk);
586
587 return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
588 "DRM_IOCTL_SYNCOBJ_CREATE failed: %m");
589 }
590
591 uint64_t initial_value = 1;
592 if (drmSyncobjTimelineSignal(dev->dev.fd, &queue->drm.syncobj,
593 &initial_value, 1)) {
594 hk_queue_finish(dev, queue);
595 return vk_errorf(dev, VK_ERROR_OUT_OF_HOST_MEMORY,
596 "DRM_IOCTL_TIMELINE_SYNCOBJ_SIGNAL failed: %m");
597 }
598
599 return VK_SUCCESS;
600 }
601
602 void
hk_queue_finish(struct hk_device * dev,struct hk_queue * queue)603 hk_queue_finish(struct hk_device *dev, struct hk_queue *queue)
604 {
605 drmSyncobjDestroy(dev->dev.fd, queue->drm.syncobj);
606 agx_destroy_command_queue(&dev->dev, queue->drm.id);
607 vk_queue_finish(&queue->vk);
608 }
609