1 /*
2 * Copyright © 2020 Google, Inc.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "tu_knl.h"
7
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15
16 #include "msm_kgsl.h"
17 #include "ion/ion.h"
18 #include "ion/ion_4.19.h"
19
20 #include "vk_util.h"
21
22 #include "util/os_file.h"
23 #include "util/u_debug.h"
24 #include "util/u_vector.h"
25 #include "util/libsync.h"
26 #include "util/timespec.h"
27
28 #include "tu_cmd_buffer.h"
29 #include "tu_cs.h"
30 #include "tu_device.h"
31 #include "tu_dynamic_rendering.h"
32 #include "tu_rmv.h"
33
34 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
35 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
36
37
38 static int
safe_ioctl(int fd,unsigned long request,void * arg)39 safe_ioctl(int fd, unsigned long request, void *arg)
40 {
41 int ret;
42
43 do {
44 ret = ioctl(fd, request, arg);
45 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
46
47 return ret;
48 }
49
50 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)51 kgsl_submitqueue_new(struct tu_device *dev,
52 int priority,
53 uint32_t *queue_id)
54 {
55 struct kgsl_drawctxt_create req = {
56 .flags = KGSL_CONTEXT_SAVE_GMEM |
57 KGSL_CONTEXT_NO_GMEM_ALLOC |
58 KGSL_CONTEXT_PREAMBLE,
59 };
60
61 int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
62 if (ret)
63 return ret;
64
65 *queue_id = req.drawctxt_id;
66
67 return 0;
68 }
69
70 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)71 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
72 {
73 struct kgsl_drawctxt_destroy req = {
74 .drawctxt_id = queue_id,
75 };
76
77 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
78 }
79
80 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
81
82 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)83 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
84 enum tu_bo_alloc_flags flags)
85 {
86 struct dma_heap_allocation_data alloc = {
87 .len = size,
88 .fd_flags = O_RDWR | O_CLOEXEC,
89 };
90
91 int ret;
92 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
93 &alloc);
94
95 if (ret) {
96 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
97 "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
98 }
99
100 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
101 }
102
103 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)104 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
105 enum tu_bo_alloc_flags flags)
106 {
107 struct ion_new_allocation_data alloc = {
108 .len = size,
109 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
110 .flags = 0,
111 .fd = -1,
112 };
113
114 int ret;
115 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
116 if (ret) {
117 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
118 "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
119 }
120
121 return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
122 }
123
124 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)125 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
126 enum tu_bo_alloc_flags flags)
127 {
128 struct ion_allocation_data alloc = {
129 .len = size,
130 .align = 4096,
131 .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
132 .flags = 0,
133 .handle = -1,
134 };
135
136 int ret;
137 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
138 if (ret) {
139 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
140 "ION_IOC_ALLOC failed (%s)", strerror(errno));
141 }
142
143 struct ion_fd_data share = {
144 .handle = alloc.handle,
145 .fd = -1,
146 };
147
148 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
149 if (ret) {
150 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
151 "ION_IOC_SHARE failed (%s)", strerror(errno));
152 }
153
154 struct ion_handle_data free = {
155 .handle = alloc.handle,
156 };
157 ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
158 if (ret) {
159 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
160 "ION_IOC_FREE failed (%s)", strerror(errno));
161 }
162
163 return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
164 }
165
166 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)167 kgsl_bo_init(struct tu_device *dev,
168 struct vk_object_base *base,
169 struct tu_bo **out_bo,
170 uint64_t size,
171 uint64_t client_iova,
172 VkMemoryPropertyFlags mem_property,
173 enum tu_bo_alloc_flags flags,
174 const char *name)
175 {
176 if (flags & TU_BO_ALLOC_SHAREABLE) {
177 /* The Vulkan spec doesn't forbid allocating exportable memory with a
178 * fixed address, only imported memory, but on kgsl we can't sensibly
179 * implement it so just always reject it.
180 */
181 if (client_iova) {
182 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
183 "cannot allocate an exportable BO with a fixed address");
184 }
185
186 switch(dev->physical_device->kgsl_dma_type) {
187 case TU_KGSL_DMA_TYPE_DMAHEAP:
188 return bo_init_new_dmaheap(dev, out_bo, size, flags);
189 case TU_KGSL_DMA_TYPE_ION:
190 return bo_init_new_ion(dev, out_bo, size, flags);
191 case TU_KGSL_DMA_TYPE_ION_LEGACY:
192 return bo_init_new_ion_legacy(dev, out_bo, size, flags);
193 }
194 }
195
196 struct kgsl_gpumem_alloc_id req = {
197 .size = size,
198 };
199
200 if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
201 if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
202 req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
203 }
204
205 req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
206 } else {
207 req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
208 }
209
210 if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
211 req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
212
213 if (flags & TU_BO_ALLOC_REPLAYABLE)
214 req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
215
216 int ret;
217
218 ret = safe_ioctl(dev->physical_device->local_fd,
219 IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
220 if (ret) {
221 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
222 "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
223 }
224
225 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
226 assert(bo && bo->gem_handle == 0);
227
228 *bo = (struct tu_bo) {
229 .gem_handle = req.id,
230 .size = req.mmapsize,
231 .iova = req.gpuaddr,
232 .name = tu_debug_bos_add(dev, req.mmapsize, name),
233 .refcnt = 1,
234 .shared_fd = -1,
235 .base = base,
236 };
237
238 if (flags & TU_BO_ALLOC_REPLAYABLE) {
239 uint64_t offset = req.id << 12;
240 void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
241 MAP_SHARED, dev->physical_device->local_fd, offset);
242 if (map == MAP_FAILED) {
243 kgsl_bo_finish(dev, bo);
244
245 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
246 "mmap failed (%s)", strerror(errno));
247 }
248
249 if (client_iova && (uint64_t)map != client_iova) {
250 kgsl_bo_finish(dev, bo);
251
252 return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
253 "mmap could not map the given address");
254 }
255
256 bo->map = map;
257 bo->iova = (uint64_t)map;
258
259 /* Because we're using SVM, the CPU mapping and GPU mapping are the same
260 * and the CPU mapping must stay fixed for the lifetime of the BO.
261 */
262 bo->never_unmap = true;
263
264 }
265
266
267 *out_bo = bo;
268
269 TU_RMV(bo_allocate, dev, bo);
270 if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
271 TU_RMV(internal_resource_create, dev, bo);
272 TU_RMV(resource_name, dev, bo, name);
273 }
274
275 return VK_SUCCESS;
276 }
277
278 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)279 kgsl_bo_init_dmabuf(struct tu_device *dev,
280 struct tu_bo **out_bo,
281 uint64_t size,
282 int fd)
283 {
284 struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
285 .fd = fd,
286 };
287 struct kgsl_gpuobj_import req = {
288 .priv = (uintptr_t)&import_dmabuf,
289 .priv_len = sizeof(import_dmabuf),
290 .flags = 0,
291 .type = KGSL_USER_MEM_TYPE_DMABUF,
292 };
293 int ret;
294
295 ret = safe_ioctl(dev->physical_device->local_fd,
296 IOCTL_KGSL_GPUOBJ_IMPORT, &req);
297 if (ret)
298 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
299 "Failed to import dma-buf (%s)\n", strerror(errno));
300
301 struct kgsl_gpuobj_info info_req = {
302 .id = req.id,
303 };
304
305 ret = safe_ioctl(dev->physical_device->local_fd,
306 IOCTL_KGSL_GPUOBJ_INFO, &info_req);
307 if (ret)
308 return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
309 "Failed to get dma-buf info (%s)\n", strerror(errno));
310
311 struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
312 assert(bo && bo->gem_handle == 0);
313
314 *bo = (struct tu_bo) {
315 .gem_handle = req.id,
316 .size = info_req.size,
317 .iova = info_req.gpuaddr,
318 .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
319 .refcnt = 1,
320 .shared_fd = os_dupfd_cloexec(fd),
321 };
322
323 *out_bo = bo;
324
325 return VK_SUCCESS;
326 }
327
328 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)329 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
330 {
331 assert(bo->shared_fd != -1);
332 return os_dupfd_cloexec(bo->shared_fd);
333 }
334
335 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)336 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
337 {
338 void *map = MAP_FAILED;
339 if (bo->shared_fd == -1) {
340 uint64_t offset = bo->gem_handle << 12;
341 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
342 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
343 dev->physical_device->local_fd, offset);
344 } else {
345 map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
346 MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
347 bo->shared_fd, 0);
348 }
349
350 if (map == MAP_FAILED)
351 return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
352
353 bo->map = map;
354 TU_RMV(bo_map, dev, bo);
355
356 return VK_SUCCESS;
357 }
358
359 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)360 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
361 {
362 }
363
364 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)365 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
366 {
367 assert(bo->gem_handle);
368
369 if (!p_atomic_dec_zero(&bo->refcnt))
370 return;
371
372 if (bo->map) {
373 TU_RMV(bo_unmap, dev, bo);
374 munmap(bo->map, bo->size);
375 }
376
377 if (bo->shared_fd != -1)
378 close(bo->shared_fd);
379
380 TU_RMV(bo_destroy, dev, bo);
381
382 struct kgsl_gpumem_free_id req = {
383 .id = bo->gem_handle
384 };
385
386 /* Tell sparse array that entry is free */
387 memset(bo, 0, sizeof(*bo));
388
389 safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
390 }
391
392 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)393 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
394 {
395 struct kgsl_device_getproperty getprop = {
396 .type = type,
397 .value = value,
398 .sizebytes = size,
399 };
400
401 return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
402 ? VK_ERROR_UNKNOWN
403 : VK_SUCCESS;
404 }
405
406 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)407 kgsl_is_memory_type_supported(int fd, uint32_t flags)
408 {
409 struct kgsl_gpumem_alloc_id req_alloc = {
410 .flags = flags,
411 .size = 0x1000,
412 };
413
414 int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
415 if (ret) {
416 return false;
417 }
418
419 struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
420
421 safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
422
423 return true;
424 }
425
426 enum kgsl_syncobj_state {
427 KGSL_SYNCOBJ_STATE_UNSIGNALED,
428 KGSL_SYNCOBJ_STATE_SIGNALED,
429 KGSL_SYNCOBJ_STATE_TS,
430 KGSL_SYNCOBJ_STATE_FD,
431 };
432
433 struct kgsl_syncobj
434 {
435 struct vk_object_base base;
436 enum kgsl_syncobj_state state;
437
438 struct tu_queue *queue;
439 uint32_t timestamp;
440
441 int fd;
442 };
443
444 struct tu_u_trace_syncobj
445 {
446 uint32_t msm_queue_id;
447 uint32_t timestamp;
448 };
449
450 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)451 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
452 {
453 s->state =
454 signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
455
456 s->timestamp = UINT32_MAX;
457 s->fd = -1;
458 }
459
460 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)461 kgsl_syncobj_reset(struct kgsl_syncobj *s)
462 {
463 if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
464 ASSERTED int ret = close(s->fd);
465 assert(ret == 0);
466 s->fd = -1;
467 } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
468 s->timestamp = UINT32_MAX;
469 }
470
471 s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
472 }
473
474 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)475 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
476 {
477 kgsl_syncobj_reset(s);
478 }
479
480 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)481 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
482 {
483 int fd;
484 struct kgsl_timestamp_event event = {
485 .type = KGSL_TIMESTAMP_EVENT_FENCE,
486 .timestamp = timestamp,
487 .context_id = queue->msm_queue_id,
488 .priv = &fd,
489 .len = sizeof(fd),
490 };
491
492 int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
493 if (ret)
494 return -1;
495
496 return fd;
497 }
498
499 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)500 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
501 {
502 assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
503 return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
504 }
505
506 /* return true if timestamp a is greater (more recent) then b
507 * this relies on timestamps never having a difference > (1<<31)
508 */
509 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)510 timestamp_cmp(uint32_t a, uint32_t b)
511 {
512 return (int32_t) (a - b) >= 0;
513 }
514
515 static uint32_t
max_ts(uint32_t a,uint32_t b)516 max_ts(uint32_t a, uint32_t b)
517 {
518 return timestamp_cmp(a, b) ? a : b;
519 }
520
521 static uint32_t
min_ts(uint32_t a,uint32_t b)522 min_ts(uint32_t a, uint32_t b)
523 {
524 return timestamp_cmp(a, b) ? b : a;
525 }
526
527 static int
get_relative_ms(uint64_t abs_timeout_ns)528 get_relative_ms(uint64_t abs_timeout_ns)
529 {
530 if (abs_timeout_ns >= INT64_MAX)
531 /* We can assume that a wait with a value this high is a forever wait
532 * and return -1 here as it's the infinite timeout for ppoll() while
533 * being the highest unsigned integer value for the wait KGSL IOCTL
534 */
535 return -1;
536
537 uint64_t cur_time_ms = os_time_get_nano() / 1000000;
538 uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
539 if (abs_timeout_ms <= cur_time_ms)
540 return 0;
541
542 return abs_timeout_ms - cur_time_ms;
543 }
544
545 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
546 * which could lead to waiting substantially longer than requested
547 */
548 static int
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)549 wait_timestamp_safe(int fd,
550 unsigned int context_id,
551 unsigned int timestamp,
552 uint64_t abs_timeout_ns)
553 {
554 struct kgsl_device_waittimestamp_ctxtid wait = {
555 .context_id = context_id,
556 .timestamp = timestamp,
557 .timeout = get_relative_ms(abs_timeout_ns),
558 };
559
560 while (true) {
561 int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
562
563 if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
564 int timeout_ms = get_relative_ms(abs_timeout_ns);
565
566 /* update timeout to consider time that has passed since the start */
567 if (timeout_ms == 0) {
568 errno = ETIME;
569 return -1;
570 }
571
572 wait.timeout = timeout_ms;
573 } else if (ret == -1 && errno == ETIMEDOUT) {
574 /* The kernel returns ETIMEDOUT if the timeout is reached, but
575 * we want to return ETIME instead.
576 */
577 errno = ETIME;
578 return -1;
579 } else {
580 return ret;
581 }
582 }
583 }
584
585 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)586 kgsl_syncobj_wait(struct tu_device *device,
587 struct kgsl_syncobj *s,
588 uint64_t abs_timeout_ns)
589 {
590 if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
591 /* If this syncobj is unsignaled we need to wait for it to resolve to a
592 * valid syncobj prior to letting the rest of the wait continue, this
593 * avoids needing kernel support for wait-before-signal semantics.
594 */
595
596 if (abs_timeout_ns == 0)
597 return VK_TIMEOUT; // If this is a simple poll then we can return early
598
599 pthread_mutex_lock(&device->submit_mutex);
600 struct timespec abstime;
601 timespec_from_nsec(&abstime, abs_timeout_ns);
602
603 while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
604 int ret;
605 if (abs_timeout_ns == UINT64_MAX) {
606 ret = pthread_cond_wait(&device->timeline_cond,
607 &device->submit_mutex);
608 } else {
609 ret = pthread_cond_timedwait(&device->timeline_cond,
610 &device->submit_mutex, &abstime);
611 }
612 if (ret != 0) {
613 assert(ret == ETIMEDOUT);
614 pthread_mutex_unlock(&device->submit_mutex);
615 return VK_TIMEOUT;
616 }
617 }
618
619 pthread_mutex_unlock(&device->submit_mutex);
620 }
621
622 switch (s->state) {
623 case KGSL_SYNCOBJ_STATE_SIGNALED:
624 return VK_SUCCESS;
625
626 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
627 return VK_TIMEOUT;
628
629 case KGSL_SYNCOBJ_STATE_TS: {
630 int ret = wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
631 s->timestamp, abs_timeout_ns);
632 if (ret) {
633 assert(errno == ETIME);
634 return VK_TIMEOUT;
635 } else {
636 return VK_SUCCESS;
637 }
638 }
639
640 case KGSL_SYNCOBJ_STATE_FD: {
641 int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
642 if (ret) {
643 assert(errno == ETIME);
644 return VK_TIMEOUT;
645 } else {
646 return VK_SUCCESS;
647 }
648 }
649
650 default:
651 unreachable("invalid syncobj state");
652 }
653 }
654
655 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
656 for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
657 if (sync->state == filter)
658
659 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)660 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
661 {
662 if (count == 0)
663 return VK_TIMEOUT;
664 else if (count == 1)
665 return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
666
667 uint32_t num_fds = 0;
668 struct tu_queue *queue = NULL;
669 struct kgsl_syncobj *sync = NULL;
670
671 /* Simple case, we already have a signal one */
672 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
673 return VK_SUCCESS;
674
675 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
676 num_fds++;
677
678 /* If we have TS from different queues we cannot compare them and would
679 * have to convert them into FDs
680 */
681 bool convert_ts_to_fd = false;
682 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
683 if (queue != NULL && sync->queue != queue) {
684 convert_ts_to_fd = true;
685 break;
686 }
687 queue = sync->queue;
688 }
689
690 /* If we have no FD nor TS syncobjs then we can return immediately */
691 if (num_fds == 0 && queue == NULL)
692 return VK_TIMEOUT;
693
694 VkResult result = VK_TIMEOUT;
695
696 struct u_vector poll_fds = { 0 };
697 uint32_t lowest_timestamp = 0;
698
699 if (convert_ts_to_fd || num_fds > 0)
700 u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
701
702 if (convert_ts_to_fd) {
703 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
704 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
705 poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
706 poll_fd->events = POLLIN;
707 }
708 } else {
709 /* TSs could be merged by finding the one with the lowest timestamp */
710 bool first_ts = true;
711 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
712 if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
713 first_ts = false;
714 lowest_timestamp = sync->timestamp;
715 }
716 }
717
718 if (num_fds) {
719 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
720 poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
721 poll_fd->events = POLLIN;
722 }
723 }
724
725 if (num_fds) {
726 kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
727 struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
728 poll_fd->fd = sync->fd;
729 poll_fd->events = POLLIN;
730 }
731 }
732
733 if (u_vector_length(&poll_fds) == 0) {
734 int ret = wait_timestamp_safe(device->fd, queue->msm_queue_id,
735 lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
736 if (ret) {
737 assert(errno == ETIME);
738 result = VK_TIMEOUT;
739 } else {
740 result = VK_SUCCESS;
741 }
742 } else {
743 int ret, i;
744
745 struct pollfd *fds = (struct pollfd *) poll_fds.data;
746 uint32_t fds_count = u_vector_length(&poll_fds);
747 do {
748 ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
749 if (ret > 0) {
750 for (i = 0; i < fds_count; i++) {
751 if (fds[i].revents & (POLLERR | POLLNVAL)) {
752 errno = EINVAL;
753 ret = -1;
754 break;
755 }
756 }
757 break;
758 } else if (ret == 0) {
759 errno = ETIME;
760 break;
761 }
762 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
763
764 for (uint32_t i = 0; i < fds_count - num_fds; i++)
765 close(fds[i].fd);
766
767 if (ret != 0) {
768 assert(errno == ETIME);
769 result = VK_TIMEOUT;
770 } else {
771 result = VK_SUCCESS;
772 }
773 }
774
775 u_vector_finish(&poll_fds);
776 return result;
777 }
778
779 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)780 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
781 {
782 if (!pFd)
783 return VK_SUCCESS;
784
785 switch (s->state) {
786 case KGSL_SYNCOBJ_STATE_SIGNALED:
787 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
788 /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
789 *pFd = -1;
790 return VK_SUCCESS;
791
792 case KGSL_SYNCOBJ_STATE_FD:
793 if (s->fd < 0)
794 *pFd = -1;
795 else
796 *pFd = dup(s->fd);
797 return VK_SUCCESS;
798
799 case KGSL_SYNCOBJ_STATE_TS:
800 *pFd = kgsl_syncobj_ts_to_fd(s);
801 return VK_SUCCESS;
802
803 default:
804 unreachable("Invalid syncobj state");
805 }
806 }
807
808 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)809 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
810 {
811 kgsl_syncobj_reset(s);
812 if (fd >= 0) {
813 s->state = KGSL_SYNCOBJ_STATE_FD;
814 s->fd = fd;
815 } else {
816 s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
817 }
818
819 return VK_SUCCESS;
820 }
821
822 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)823 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
824 {
825 int fd = sync_merge(name, fd1, fd2);
826 if (fd < 0)
827 return -1;
828
829 close(fd1);
830 if (close_fd2)
831 close(fd2);
832
833 return fd;
834 }
835
836 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
837 * after all submitted syncobjs are signalled
838 */
839 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)840 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
841 {
842 struct kgsl_syncobj ret;
843 kgsl_syncobj_init(&ret, true);
844
845 if (count == 0)
846 return ret;
847
848 for (uint32_t i = 0; i < count; ++i) {
849 const struct kgsl_syncobj *sync = syncobjs[i];
850
851 switch (sync->state) {
852 case KGSL_SYNCOBJ_STATE_SIGNALED:
853 break;
854
855 case KGSL_SYNCOBJ_STATE_UNSIGNALED:
856 kgsl_syncobj_reset(&ret);
857 return ret;
858
859 case KGSL_SYNCOBJ_STATE_TS:
860 if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
861 if (ret.queue == sync->queue) {
862 ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
863 } else {
864 ret.state = KGSL_SYNCOBJ_STATE_FD;
865 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
866 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
867 assert(ret.fd >= 0);
868 }
869 } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
870 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
871 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
872 assert(ret.fd >= 0);
873 } else {
874 ret = *sync;
875 }
876 break;
877
878 case KGSL_SYNCOBJ_STATE_FD:
879 if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
880 ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
881 assert(ret.fd >= 0);
882 } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
883 ret.state = KGSL_SYNCOBJ_STATE_FD;
884 int sync_fd = kgsl_syncobj_ts_to_fd(sync);
885 ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
886 assert(ret.fd >= 0);
887 } else {
888 ret = *sync;
889 ret.fd = dup(ret.fd);
890 assert(ret.fd >= 0);
891 }
892 break;
893
894 default:
895 unreachable("invalid syncobj state");
896 }
897 }
898
899 return ret;
900 }
901
902 struct vk_kgsl_syncobj
903 {
904 struct vk_sync vk;
905 struct kgsl_syncobj syncobj;
906 };
907
908 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)909 vk_kgsl_sync_init(struct vk_device *device,
910 struct vk_sync *sync,
911 uint64_t initial_value)
912 {
913 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
914 kgsl_syncobj_init(&s->syncobj, initial_value != 0);
915 return VK_SUCCESS;
916 }
917
918 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)919 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
920 {
921 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
922 kgsl_syncobj_destroy(&s->syncobj);
923 }
924
925 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)926 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
927 {
928 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
929 kgsl_syncobj_reset(&s->syncobj);
930 return VK_SUCCESS;
931 }
932
933 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)934 vk_kgsl_sync_move(struct vk_device *device,
935 struct vk_sync *dst,
936 struct vk_sync *src)
937 {
938 struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
939 struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
940 kgsl_syncobj_reset(&d->syncobj);
941 d->syncobj = s->syncobj;
942 kgsl_syncobj_init(&s->syncobj, false);
943 return VK_SUCCESS;
944 }
945
946 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)947 vk_kgsl_sync_wait(struct vk_device *_device,
948 struct vk_sync *sync,
949 uint64_t wait_value,
950 enum vk_sync_wait_flags wait_flags,
951 uint64_t abs_timeout_ns)
952 {
953 struct tu_device *device = container_of(_device, struct tu_device, vk);
954 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
955
956 if (wait_flags & VK_SYNC_WAIT_PENDING)
957 return VK_SUCCESS;
958
959 return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
960 }
961
962 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)963 vk_kgsl_sync_wait_many(struct vk_device *_device,
964 uint32_t wait_count,
965 const struct vk_sync_wait *waits,
966 enum vk_sync_wait_flags wait_flags,
967 uint64_t abs_timeout_ns)
968 {
969 struct tu_device *device = container_of(_device, struct tu_device, vk);
970
971 if (wait_flags & VK_SYNC_WAIT_PENDING)
972 return VK_SUCCESS;
973
974 if (wait_flags & VK_SYNC_WAIT_ANY) {
975 struct kgsl_syncobj *syncobjs[wait_count];
976 for (uint32_t i = 0; i < wait_count; i++) {
977 syncobjs[i] =
978 &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
979 }
980
981 return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
982 abs_timeout_ns);
983 } else {
984 for (uint32_t i = 0; i < wait_count; i++) {
985 struct vk_kgsl_syncobj *s =
986 container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
987
988 VkResult result =
989 kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
990 if (result != VK_SUCCESS)
991 return result;
992 }
993 return VK_SUCCESS;
994 }
995 }
996
997 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)998 vk_kgsl_sync_import_sync_file(struct vk_device *device,
999 struct vk_sync *sync,
1000 int fd)
1001 {
1002 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1003 if (fd >= 0) {
1004 fd = dup(fd);
1005 if (fd < 0) {
1006 mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
1007 strerror(errno));
1008 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1009 }
1010 }
1011 return kgsl_syncobj_import(&s->syncobj, fd);
1012 }
1013
1014 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1015 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1016 struct vk_sync *sync,
1017 int *pFd)
1018 {
1019 struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1020 return kgsl_syncobj_export(&s->syncobj, pFd);
1021 }
1022
1023 const struct vk_sync_type vk_kgsl_sync_type = {
1024 .size = sizeof(struct vk_kgsl_syncobj),
1025 .features = (enum vk_sync_features)
1026 (VK_SYNC_FEATURE_BINARY |
1027 VK_SYNC_FEATURE_GPU_WAIT |
1028 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1029 VK_SYNC_FEATURE_CPU_WAIT |
1030 VK_SYNC_FEATURE_CPU_RESET |
1031 VK_SYNC_FEATURE_WAIT_ANY |
1032 VK_SYNC_FEATURE_WAIT_PENDING),
1033 .init = vk_kgsl_sync_init,
1034 .finish = vk_kgsl_sync_finish,
1035 .reset = vk_kgsl_sync_reset,
1036 .move = vk_kgsl_sync_move,
1037 .wait = vk_kgsl_sync_wait,
1038 .wait_many = vk_kgsl_sync_wait_many,
1039 .import_sync_file = vk_kgsl_sync_import_sync_file,
1040 .export_sync_file = vk_kgsl_sync_export_sync_file,
1041 };
1042
1043 static VkResult
kgsl_queue_submit(struct tu_queue * queue,struct vk_queue_submit * vk_submit)1044 kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit)
1045 {
1046 MESA_TRACE_FUNC();
1047
1048 bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
1049 bool has_trace_points = false;
1050
1051 if (vk_submit->command_buffer_count == 0) {
1052 pthread_mutex_lock(&queue->device->submit_mutex);
1053
1054 const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count + 1];
1055 for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
1056 wait_semaphores[i] = &container_of(vk_submit->waits[i].sync,
1057 struct vk_kgsl_syncobj, vk)
1058 ->syncobj;
1059 }
1060
1061 struct kgsl_syncobj last_submit_sync;
1062 if (queue->fence >= 0)
1063 last_submit_sync = (struct kgsl_syncobj) {
1064 .state = KGSL_SYNCOBJ_STATE_TS,
1065 .queue = queue,
1066 .timestamp = queue->fence,
1067 };
1068 else
1069 last_submit_sync = (struct kgsl_syncobj) {
1070 .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1071 };
1072
1073 wait_semaphores[vk_submit->wait_count] = &last_submit_sync;
1074
1075 struct kgsl_syncobj wait_sync =
1076 kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count + 1);
1077 assert(wait_sync.state !=
1078 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1079
1080 for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
1081 struct kgsl_syncobj *signal_sync =
1082 &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj,
1083 vk)
1084 ->syncobj;
1085
1086 kgsl_syncobj_reset(signal_sync);
1087 *signal_sync = wait_sync;
1088 }
1089
1090 pthread_mutex_unlock(&queue->device->submit_mutex);
1091 pthread_cond_broadcast(&queue->device->timeline_cond);
1092
1093 return VK_SUCCESS;
1094 }
1095
1096 uint32_t perf_pass_index =
1097 queue->device->perfcntrs_pass_cs ? vk_submit->perf_pass_index : ~0;
1098
1099 if (TU_DEBUG(LOG_SKIP_GMEM_OPS))
1100 tu_dbg_log_gmem_load_store_skips(queue->device);
1101
1102 VkResult result = VK_SUCCESS;
1103
1104 pthread_mutex_lock(&queue->device->submit_mutex);
1105
1106 struct tu_cmd_buffer **cmd_buffers =
1107 (struct tu_cmd_buffer **) vk_submit->command_buffers;
1108 static_assert(offsetof(struct tu_cmd_buffer, vk) == 0,
1109 "vk must be first member of tu_cmd_buffer");
1110 uint32_t cmdbuf_count = vk_submit->command_buffer_count;
1111
1112 result =
1113 tu_insert_dynamic_cmdbufs(queue->device, &cmd_buffers, &cmdbuf_count);
1114 if (result != VK_SUCCESS) {
1115 pthread_mutex_unlock(&queue->device->submit_mutex);
1116 return result;
1117 }
1118
1119 uint32_t entry_count = 0;
1120 for (uint32_t i = 0; i < cmdbuf_count; ++i) {
1121 struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1122
1123 if (perf_pass_index != ~0)
1124 entry_count++;
1125
1126 entry_count += cmd_buffer->cs.entry_count;
1127
1128 if (u_trace_enabled && u_trace_has_points(&cmd_buffers[i]->trace)) {
1129 if (!(cmd_buffers[i]->usage_flags &
1130 VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
1131 entry_count++;
1132
1133 has_trace_points = true;
1134 }
1135 }
1136
1137 if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count))
1138 entry_count++;
1139
1140 struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1141 vk_alloc(&queue->device->vk.alloc, sizeof(*cmds) * entry_count,
1142 alignof(*cmds), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1143 if (cmds == NULL) {
1144 pthread_mutex_unlock(&queue->device->submit_mutex);
1145 return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1146 }
1147
1148 uint32_t obj_count = 0;
1149 if (has_trace_points)
1150 obj_count++;
1151
1152 struct kgsl_command_object *objs = (struct kgsl_command_object *)
1153 vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1154 alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1155
1156 struct tu_u_trace_submission_data *u_trace_submission_data = NULL;
1157 if (has_trace_points) {
1158 tu_u_trace_submission_data_create(
1159 queue->device, cmd_buffers, cmdbuf_count, &u_trace_submission_data);
1160
1161 mtx_lock(&queue->device->kgsl_profiling_mutex);
1162 tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1163 &queue->device->kgsl_profiling_suballoc,
1164 sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1165 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1166 }
1167
1168 uint32_t entry_idx = 0;
1169 for (uint32_t i = 0; i < cmdbuf_count; i++) {
1170 struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1171 struct tu_cs *cs = &cmd_buffer->cs;
1172
1173 if (perf_pass_index != ~0) {
1174 struct tu_cs_entry *perf_cs_entry =
1175 &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index];
1176
1177 cmds[entry_idx++] = (struct kgsl_command_object) {
1178 .gpuaddr = perf_cs_entry->bo->iova + perf_cs_entry->offset,
1179 .size = perf_cs_entry->size,
1180 .flags = KGSL_CMDLIST_IB,
1181 .id = perf_cs_entry->bo->gem_handle,
1182 };
1183 }
1184
1185 for (uint32_t j = 0; j < cs->entry_count; j++) {
1186 cmds[entry_idx++] = (struct kgsl_command_object) {
1187 .gpuaddr = cs->entries[j].bo->iova + cs->entries[j].offset,
1188 .size = cs->entries[j].size,
1189 .flags = KGSL_CMDLIST_IB,
1190 .id = cs->entries[j].bo->gem_handle,
1191 };
1192 }
1193
1194 if (u_trace_submission_data &&
1195 u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1196 struct tu_cs_entry *trace_cs_entry =
1197 &u_trace_submission_data->cmd_trace_data[i]
1198 .timestamp_copy_cs->entries[0];
1199 cmds[entry_idx++] = (struct kgsl_command_object) {
1200 .offset = trace_cs_entry->offset,
1201 .gpuaddr = trace_cs_entry->bo->iova,
1202 .size = trace_cs_entry->size,
1203 .flags = KGSL_CMDLIST_IB,
1204 .id = trace_cs_entry->bo->gem_handle,
1205 };
1206 }
1207 }
1208
1209 struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1210 uint32_t obj_idx = 0;
1211 if (u_trace_submission_data) {
1212 struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1213
1214 objs[obj_idx++] = (struct kgsl_command_object) {
1215 .offset = bo->iova - bo->bo->iova,
1216 .gpuaddr = bo->bo->iova,
1217 .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1218 .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1219 .id = bo->bo->gem_handle,
1220 };
1221 profiling_buffer =
1222 (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1223 memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1224 }
1225
1226 if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
1227 struct tu_cs *autotune_cs = tu_autotune_on_submit(
1228 queue->device, &queue->device->autotune, cmd_buffers, cmdbuf_count);
1229 cmds[entry_idx++] = (struct kgsl_command_object) {
1230 .gpuaddr =
1231 autotune_cs->entries[0].bo->iova + autotune_cs->entries[0].offset,
1232 .size = autotune_cs->entries[0].size,
1233 .flags = KGSL_CMDLIST_IB,
1234 .id = autotune_cs->entries[0].bo->gem_handle,
1235 };
1236 }
1237
1238 const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count];
1239 for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
1240 wait_semaphores[i] =
1241 &container_of(vk_submit->waits[i].sync, struct vk_kgsl_syncobj, vk)
1242 ->syncobj;
1243 }
1244
1245 struct kgsl_syncobj wait_sync =
1246 kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count);
1247 assert(wait_sync.state !=
1248 KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1249
1250 struct kgsl_cmd_syncpoint_timestamp ts;
1251 struct kgsl_cmd_syncpoint_fence fn;
1252 struct kgsl_command_syncpoint sync = { 0 };
1253 bool has_sync = false;
1254 switch (wait_sync.state) {
1255 case KGSL_SYNCOBJ_STATE_SIGNALED:
1256 break;
1257
1258 case KGSL_SYNCOBJ_STATE_TS:
1259 ts.context_id = wait_sync.queue->msm_queue_id;
1260 ts.timestamp = wait_sync.timestamp;
1261
1262 has_sync = true;
1263 sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1264 sync.priv = (uintptr_t) &ts;
1265 sync.size = sizeof(ts);
1266 break;
1267
1268 case KGSL_SYNCOBJ_STATE_FD:
1269 fn.fd = wait_sync.fd;
1270
1271 has_sync = true;
1272 sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1273 sync.priv = (uintptr_t) &fn;
1274 sync.size = sizeof(fn);
1275 break;
1276
1277 default:
1278 unreachable("invalid syncobj state");
1279 }
1280
1281 struct kgsl_gpu_command req = {
1282 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1283 .cmdlist = (uintptr_t) cmds,
1284 .cmdsize = sizeof(struct kgsl_command_object),
1285 .numcmds = entry_idx,
1286 .synclist = (uintptr_t) &sync,
1287 .syncsize = sizeof(sync),
1288 .numsyncs = has_sync != 0 ? 1 : 0,
1289 .context_id = queue->msm_queue_id,
1290 };
1291
1292 if (obj_idx) {
1293 req.flags |= KGSL_CMDBATCH_PROFILING;
1294 req.objlist = (uintptr_t) objs;
1295 req.objsize = sizeof(struct kgsl_command_object);
1296 req.numobjs = obj_idx;
1297 }
1298
1299 int ret = safe_ioctl(queue->device->physical_device->local_fd,
1300 IOCTL_KGSL_GPU_COMMAND, &req);
1301
1302 uint64_t gpu_offset = 0;
1303 #if HAVE_PERFETTO
1304 if (profiling_buffer) {
1305 /* We need to wait for KGSL to queue the GPU command before we can read
1306 * the timestamp. Since this is just for profiling and doesn't take too
1307 * long, we can just busy-wait for it.
1308 */
1309 while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1310
1311 struct kgsl_perfcounter_read_group perf = {
1312 .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1313 .countable = 0,
1314 .value = 0
1315 };
1316
1317 struct kgsl_perfcounter_read req = {
1318 .reads = &perf,
1319 .count = 1,
1320 };
1321
1322 ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1323 /* Older KGSL has some kind of garbage in upper 32 bits */
1324 uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1325
1326 gpu_offset = tu_device_ticks_to_ns(
1327 queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1328
1329 struct tu_perfetto_clocks clocks = {
1330 .cpu = profiling_buffer->wall_clock_ns,
1331 .gpu_ts = tu_device_ticks_to_ns(queue->device,
1332 profiling_buffer->gpu_ticks_queued),
1333 .gpu_ts_offset = gpu_offset,
1334 };
1335
1336 clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, &clocks);
1337 gpu_offset = clocks.gpu_ts_offset;
1338 }
1339 #endif
1340
1341 kgsl_syncobj_destroy(&wait_sync);
1342
1343 if (ret) {
1344 result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1345 strerror(errno));
1346 goto fail_submit;
1347 }
1348
1349 p_atomic_set(&queue->fence, req.timestamp);
1350
1351 for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
1352 struct kgsl_syncobj *signal_sync =
1353 &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj, vk)
1354 ->syncobj;
1355
1356 kgsl_syncobj_reset(signal_sync);
1357 signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1358 signal_sync->queue = queue;
1359 signal_sync->timestamp = req.timestamp;
1360 }
1361
1362 if (u_trace_submission_data) {
1363 struct tu_u_trace_submission_data *submission_data =
1364 u_trace_submission_data;
1365 submission_data->submission_id = queue->device->submit_count;
1366 submission_data->gpu_ts_offset = gpu_offset;
1367 /* We have to allocate it here since it is different between drm/kgsl */
1368 submission_data->syncobj = (struct tu_u_trace_syncobj *)
1369 vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1370 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1371 submission_data->syncobj->timestamp = req.timestamp;
1372 submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
1373
1374 u_trace_submission_data = NULL;
1375
1376 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; i++) {
1377 bool free_data = i == submission_data->last_buffer_with_tracepoints;
1378 if (submission_data->cmd_trace_data[i].trace)
1379 u_trace_flush(submission_data->cmd_trace_data[i].trace,
1380 submission_data, queue->device->vk.current_frame,
1381 free_data);
1382
1383 if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1384 /* u_trace is owned by cmd_buffer */
1385 submission_data->cmd_trace_data[i].trace = NULL;
1386 }
1387 }
1388 }
1389
1390 queue->device->submit_count++;
1391
1392 pthread_mutex_unlock(&queue->device->submit_mutex);
1393 pthread_cond_broadcast(&queue->device->timeline_cond);
1394
1395 u_trace_context_process(&queue->device->trace_context, false);
1396
1397 if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1398 vk_free(&queue->device->vk.alloc, cmd_buffers);
1399
1400 vk_free(&queue->device->vk.alloc, cmds);
1401
1402 return VK_SUCCESS;
1403
1404 fail_submit:
1405 pthread_mutex_unlock(&queue->device->submit_mutex);
1406
1407 if (result != VK_SUCCESS) {
1408 mtx_lock(&queue->device->kgsl_profiling_mutex);
1409 tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1410 &u_trace_submission_data->kgsl_timestamp_bo);
1411 mtx_unlock(&queue->device->kgsl_profiling_mutex);
1412 }
1413
1414 if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1415 vk_free(&queue->device->vk.alloc, cmd_buffers);
1416
1417 vk_free(&queue->device->vk.alloc, cmds);
1418
1419 return result;
1420 }
1421
1422 static VkResult
kgsl_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1423 kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1424 {
1425 struct kgsl_device_waittimestamp_ctxtid req = {
1426 .context_id = syncobj->msm_queue_id,
1427 .timestamp = syncobj->timestamp,
1428 .timeout = 5000, // 5s
1429 };
1430
1431 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &req);
1432
1433 if (ret) {
1434 assert(errno == ETIME);
1435 return VK_TIMEOUT;
1436 }
1437
1438 return VK_SUCCESS;
1439 }
1440
1441 static VkResult
kgsl_device_init(struct tu_device * dev)1442 kgsl_device_init(struct tu_device *dev)
1443 {
1444 dev->fd = dev->physical_device->local_fd;
1445 return VK_SUCCESS;
1446 }
1447
1448 static void
kgsl_device_finish(struct tu_device * dev)1449 kgsl_device_finish(struct tu_device *dev)
1450 {
1451 /* No-op */
1452 }
1453
1454 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1455 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1456 {
1457 unreachable("");
1458 return 0;
1459 }
1460
1461 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1462 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1463 {
1464 /* kgsl doesn't have a way to get it */
1465 *suspend_count = 0;
1466 return 0;
1467 }
1468
1469 static VkResult
kgsl_device_check_status(struct tu_device * device)1470 kgsl_device_check_status(struct tu_device *device)
1471 {
1472 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1473 for (unsigned q = 0; q < device->queue_count[i]; q++) {
1474 /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1475 * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1476 * was queried on that queue.
1477 */
1478 uint32_t value = device->queues[i][q].msm_queue_id;
1479 VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1480 &value, sizeof(value));
1481 if (status != VK_SUCCESS)
1482 return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1483
1484 if (value != KGSL_CTX_STAT_NO_ERROR &&
1485 value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1486 return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1487 }
1488 }
1489 }
1490
1491 return VK_SUCCESS;
1492 }
1493
1494 static const struct tu_knl kgsl_knl_funcs = {
1495 .name = "kgsl",
1496
1497 .device_init = kgsl_device_init,
1498 .device_finish = kgsl_device_finish,
1499 .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1500 .device_get_suspend_count = kgsl_device_get_suspend_count,
1501 .device_check_status = kgsl_device_check_status,
1502 .submitqueue_new = kgsl_submitqueue_new,
1503 .submitqueue_close = kgsl_submitqueue_close,
1504 .bo_init = kgsl_bo_init,
1505 .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1506 .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1507 .bo_map = kgsl_bo_map,
1508 .bo_allow_dump = kgsl_bo_allow_dump,
1509 .bo_finish = kgsl_bo_finish,
1510 .device_wait_u_trace = kgsl_device_wait_u_trace,
1511 .queue_submit = kgsl_queue_submit,
1512 };
1513
1514 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1515 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1516 {
1517 if (instance->vk.enabled_extensions.KHR_display) {
1518 return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1519 "I can't KHR_display");
1520 }
1521
1522 struct tu_physical_device *device = (struct tu_physical_device *)
1523 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1524 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1525 if (!device) {
1526 close(fd);
1527 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1528 }
1529
1530 static const char dma_heap_path[] = "/dev/dma_heap/system";
1531 static const char ion_path[] = "/dev/ion";
1532 int dma_fd;
1533
1534 dma_fd = open(dma_heap_path, O_RDONLY);
1535 if (dma_fd >= 0) {
1536 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1537 } else {
1538 dma_fd = open(ion_path, O_RDONLY);
1539 if (dma_fd >= 0) {
1540 /* ION_IOC_FREE available only for legacy ION */
1541 struct ion_handle_data free = { .handle = 0 };
1542 if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1543 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1544 else
1545 device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1546 } else {
1547 mesa_logw(
1548 "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1549 "unavailable: %s",
1550 dma_heap_path, ion_path, strerror(errno));
1551 }
1552 }
1553
1554 VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1555
1556 struct kgsl_devinfo info;
1557 if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1558 goto fail;
1559
1560 uint64_t gmem_iova;
1561 if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1562 goto fail;
1563
1564 /* kgsl version check? */
1565
1566 device->instance = instance;
1567 device->master_fd = -1;
1568 device->local_fd = fd;
1569 device->kgsl_dma_fd = dma_fd;
1570
1571 device->dev_id.gpu_id =
1572 ((info.chip_id >> 24) & 0xff) * 100 +
1573 ((info.chip_id >> 16) & 0xff) * 10 +
1574 ((info.chip_id >> 8) & 0xff);
1575 device->dev_id.chip_id = info.chip_id;
1576 device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1577 device->gmem_base = gmem_iova;
1578
1579 device->submitqueue_priority_count = 1;
1580
1581 device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1582
1583 device->sync_types[0] = &vk_kgsl_sync_type;
1584 device->sync_types[1] = &device->timeline_type.sync;
1585 device->sync_types[2] = NULL;
1586
1587 device->heap.size = tu_get_system_heap_size(device);
1588 device->heap.used = 0u;
1589 device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1590
1591 device->has_set_iova = kgsl_is_memory_type_supported(
1592 fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1593
1594 /* Even if kernel is new enough, the GPU itself may not support it. */
1595 device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1596 fd, KGSL_MEMFLAGS_IOCOHERENT |
1597 (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1598
1599 instance->knl = &kgsl_knl_funcs;
1600
1601 result = tu_physical_device_init(device, instance);
1602 if (result != VK_SUCCESS)
1603 goto fail;
1604
1605 list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1606
1607 return VK_SUCCESS;
1608
1609 fail:
1610 vk_free(&instance->vk.alloc, device);
1611 close(fd);
1612 if (dma_fd >= 0)
1613 close(dma_fd);
1614 return result;
1615 }
1616