xref: /aosp_15_r20/external/mesa3d/src/freedreno/vulkan/tu_knl_kgsl.cc (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2020 Google, Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "tu_knl.h"
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <poll.h>
11 #include <stdint.h>
12 #include <sys/ioctl.h>
13 #include <sys/mman.h>
14 #include <linux/dma-heap.h>
15 
16 #include "msm_kgsl.h"
17 #include "ion/ion.h"
18 #include "ion/ion_4.19.h"
19 
20 #include "vk_util.h"
21 
22 #include "util/os_file.h"
23 #include "util/u_debug.h"
24 #include "util/u_vector.h"
25 #include "util/libsync.h"
26 #include "util/timespec.h"
27 
28 #include "tu_cmd_buffer.h"
29 #include "tu_cs.h"
30 #include "tu_device.h"
31 #include "tu_dynamic_rendering.h"
32 #include "tu_rmv.h"
33 
34 /* ION_HEAP(ION_SYSTEM_HEAP_ID) */
35 #define KGSL_ION_SYSTEM_HEAP_MASK (1u << 25)
36 
37 
38 static int
safe_ioctl(int fd,unsigned long request,void * arg)39 safe_ioctl(int fd, unsigned long request, void *arg)
40 {
41    int ret;
42 
43    do {
44       ret = ioctl(fd, request, arg);
45    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
46 
47    return ret;
48 }
49 
50 static int
kgsl_submitqueue_new(struct tu_device * dev,int priority,uint32_t * queue_id)51 kgsl_submitqueue_new(struct tu_device *dev,
52                      int priority,
53                      uint32_t *queue_id)
54 {
55    struct kgsl_drawctxt_create req = {
56       .flags = KGSL_CONTEXT_SAVE_GMEM |
57               KGSL_CONTEXT_NO_GMEM_ALLOC |
58               KGSL_CONTEXT_PREAMBLE,
59    };
60 
61    int ret = safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_CREATE, &req);
62    if (ret)
63       return ret;
64 
65    *queue_id = req.drawctxt_id;
66 
67    return 0;
68 }
69 
70 static void
kgsl_submitqueue_close(struct tu_device * dev,uint32_t queue_id)71 kgsl_submitqueue_close(struct tu_device *dev, uint32_t queue_id)
72 {
73    struct kgsl_drawctxt_destroy req = {
74       .drawctxt_id = queue_id,
75    };
76 
77    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_DRAWCTXT_DESTROY, &req);
78 }
79 
80 static void kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo);
81 
82 static VkResult
bo_init_new_dmaheap(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)83 bo_init_new_dmaheap(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
84                 enum tu_bo_alloc_flags flags)
85 {
86    struct dma_heap_allocation_data alloc = {
87       .len = size,
88       .fd_flags = O_RDWR | O_CLOEXEC,
89    };
90 
91    int ret;
92    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, DMA_HEAP_IOCTL_ALLOC,
93                     &alloc);
94 
95    if (ret) {
96       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
97                        "DMA_HEAP_IOCTL_ALLOC failed (%s)", strerror(errno));
98    }
99 
100    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
101 }
102 
103 static VkResult
bo_init_new_ion(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)104 bo_init_new_ion(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
105                 enum tu_bo_alloc_flags flags)
106 {
107    struct ion_new_allocation_data alloc = {
108       .len = size,
109       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
110       .flags = 0,
111       .fd = -1,
112    };
113 
114    int ret;
115    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_NEW_ALLOC, &alloc);
116    if (ret) {
117       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
118                        "ION_IOC_NEW_ALLOC failed (%s)", strerror(errno));
119    }
120 
121    return tu_bo_init_dmabuf(dev, out_bo, -1, alloc.fd);
122 }
123 
124 static VkResult
bo_init_new_ion_legacy(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,enum tu_bo_alloc_flags flags)125 bo_init_new_ion_legacy(struct tu_device *dev, struct tu_bo **out_bo, uint64_t size,
126                        enum tu_bo_alloc_flags flags)
127 {
128    struct ion_allocation_data alloc = {
129       .len = size,
130       .align = 4096,
131       .heap_id_mask = KGSL_ION_SYSTEM_HEAP_MASK,
132       .flags = 0,
133       .handle = -1,
134    };
135 
136    int ret;
137    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_ALLOC, &alloc);
138    if (ret) {
139       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
140                        "ION_IOC_ALLOC failed (%s)", strerror(errno));
141    }
142 
143    struct ion_fd_data share = {
144       .handle = alloc.handle,
145       .fd = -1,
146    };
147 
148    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_SHARE, &share);
149    if (ret) {
150       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
151                        "ION_IOC_SHARE failed (%s)", strerror(errno));
152    }
153 
154    struct ion_handle_data free = {
155       .handle = alloc.handle,
156    };
157    ret = safe_ioctl(dev->physical_device->kgsl_dma_fd, ION_IOC_FREE, &free);
158    if (ret) {
159       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
160                        "ION_IOC_FREE failed (%s)", strerror(errno));
161    }
162 
163    return tu_bo_init_dmabuf(dev, out_bo, -1, share.fd);
164 }
165 
166 static VkResult
kgsl_bo_init(struct tu_device * dev,struct vk_object_base * base,struct tu_bo ** out_bo,uint64_t size,uint64_t client_iova,VkMemoryPropertyFlags mem_property,enum tu_bo_alloc_flags flags,const char * name)167 kgsl_bo_init(struct tu_device *dev,
168              struct vk_object_base *base,
169              struct tu_bo **out_bo,
170              uint64_t size,
171              uint64_t client_iova,
172              VkMemoryPropertyFlags mem_property,
173              enum tu_bo_alloc_flags flags,
174              const char *name)
175 {
176    if (flags & TU_BO_ALLOC_SHAREABLE) {
177       /* The Vulkan spec doesn't forbid allocating exportable memory with a
178        * fixed address, only imported memory, but on kgsl we can't sensibly
179        * implement it so just always reject it.
180        */
181       if (client_iova) {
182          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
183                           "cannot allocate an exportable BO with a fixed address");
184       }
185 
186       switch(dev->physical_device->kgsl_dma_type) {
187       case TU_KGSL_DMA_TYPE_DMAHEAP:
188          return bo_init_new_dmaheap(dev, out_bo, size, flags);
189       case TU_KGSL_DMA_TYPE_ION:
190          return bo_init_new_ion(dev, out_bo, size, flags);
191       case TU_KGSL_DMA_TYPE_ION_LEGACY:
192          return bo_init_new_ion_legacy(dev, out_bo, size, flags);
193       }
194    }
195 
196    struct kgsl_gpumem_alloc_id req = {
197       .size = size,
198    };
199 
200    if (mem_property & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) {
201       if (mem_property & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) {
202          req.flags |= KGSL_MEMFLAGS_IOCOHERENT;
203       }
204 
205       req.flags |= KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT;
206    } else {
207       req.flags |= KGSL_CACHEMODE_WRITECOMBINE << KGSL_CACHEMODE_SHIFT;
208    }
209 
210    if (flags & TU_BO_ALLOC_GPU_READ_ONLY)
211       req.flags |= KGSL_MEMFLAGS_GPUREADONLY;
212 
213    if (flags & TU_BO_ALLOC_REPLAYABLE)
214       req.flags |= KGSL_MEMFLAGS_USE_CPU_MAP;
215 
216    int ret;
217 
218    ret = safe_ioctl(dev->physical_device->local_fd,
219                     IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
220    if (ret) {
221       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
222                        "GPUMEM_ALLOC_ID failed (%s)", strerror(errno));
223    }
224 
225    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
226    assert(bo && bo->gem_handle == 0);
227 
228    *bo = (struct tu_bo) {
229       .gem_handle = req.id,
230       .size = req.mmapsize,
231       .iova = req.gpuaddr,
232       .name = tu_debug_bos_add(dev, req.mmapsize, name),
233       .refcnt = 1,
234       .shared_fd = -1,
235       .base = base,
236    };
237 
238    if (flags & TU_BO_ALLOC_REPLAYABLE) {
239       uint64_t offset = req.id << 12;
240       void *map = mmap((void *)client_iova, bo->size, PROT_READ | PROT_WRITE,
241                        MAP_SHARED, dev->physical_device->local_fd, offset);
242       if (map == MAP_FAILED) {
243          kgsl_bo_finish(dev, bo);
244 
245          return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
246                           "mmap failed (%s)", strerror(errno));
247       }
248 
249       if (client_iova && (uint64_t)map != client_iova) {
250          kgsl_bo_finish(dev, bo);
251 
252          return vk_errorf(dev, VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS,
253                           "mmap could not map the given address");
254       }
255 
256       bo->map = map;
257       bo->iova = (uint64_t)map;
258 
259       /* Because we're using SVM, the CPU mapping and GPU mapping are the same
260        * and the CPU mapping must stay fixed for the lifetime of the BO.
261        */
262       bo->never_unmap = true;
263 
264    }
265 
266 
267    *out_bo = bo;
268 
269    TU_RMV(bo_allocate, dev, bo);
270    if (flags & TU_BO_ALLOC_INTERNAL_RESOURCE) {
271       TU_RMV(internal_resource_create, dev, bo);
272       TU_RMV(resource_name, dev, bo, name);
273    }
274 
275    return VK_SUCCESS;
276 }
277 
278 static VkResult
kgsl_bo_init_dmabuf(struct tu_device * dev,struct tu_bo ** out_bo,uint64_t size,int fd)279 kgsl_bo_init_dmabuf(struct tu_device *dev,
280                     struct tu_bo **out_bo,
281                     uint64_t size,
282                     int fd)
283 {
284    struct kgsl_gpuobj_import_dma_buf import_dmabuf = {
285       .fd = fd,
286    };
287    struct kgsl_gpuobj_import req = {
288       .priv = (uintptr_t)&import_dmabuf,
289       .priv_len = sizeof(import_dmabuf),
290       .flags = 0,
291       .type = KGSL_USER_MEM_TYPE_DMABUF,
292    };
293    int ret;
294 
295    ret = safe_ioctl(dev->physical_device->local_fd,
296                     IOCTL_KGSL_GPUOBJ_IMPORT, &req);
297    if (ret)
298       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
299                        "Failed to import dma-buf (%s)\n", strerror(errno));
300 
301    struct kgsl_gpuobj_info info_req = {
302       .id = req.id,
303    };
304 
305    ret = safe_ioctl(dev->physical_device->local_fd,
306                     IOCTL_KGSL_GPUOBJ_INFO, &info_req);
307    if (ret)
308       return vk_errorf(dev, VK_ERROR_OUT_OF_DEVICE_MEMORY,
309                        "Failed to get dma-buf info (%s)\n", strerror(errno));
310 
311    struct tu_bo* bo = tu_device_lookup_bo(dev, req.id);
312    assert(bo && bo->gem_handle == 0);
313 
314    *bo = (struct tu_bo) {
315       .gem_handle = req.id,
316       .size = info_req.size,
317       .iova = info_req.gpuaddr,
318       .name = tu_debug_bos_add(dev, info_req.size, "dmabuf"),
319       .refcnt = 1,
320       .shared_fd = os_dupfd_cloexec(fd),
321    };
322 
323    *out_bo = bo;
324 
325    return VK_SUCCESS;
326 }
327 
328 static int
kgsl_bo_export_dmabuf(struct tu_device * dev,struct tu_bo * bo)329 kgsl_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo)
330 {
331    assert(bo->shared_fd != -1);
332    return os_dupfd_cloexec(bo->shared_fd);
333 }
334 
335 static VkResult
kgsl_bo_map(struct tu_device * dev,struct tu_bo * bo,void * placed_addr)336 kgsl_bo_map(struct tu_device *dev, struct tu_bo *bo, void *placed_addr)
337 {
338    void *map = MAP_FAILED;
339    if (bo->shared_fd == -1) {
340       uint64_t offset = bo->gem_handle << 12;
341       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
342                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
343                  dev->physical_device->local_fd, offset);
344    } else {
345       map = mmap(placed_addr, bo->size, PROT_READ | PROT_WRITE,
346                  MAP_SHARED | (placed_addr != NULL ? MAP_FIXED : 0),
347                  bo->shared_fd, 0);
348    }
349 
350    if (map == MAP_FAILED)
351       return vk_error(dev, VK_ERROR_MEMORY_MAP_FAILED);
352 
353    bo->map = map;
354    TU_RMV(bo_map, dev, bo);
355 
356    return VK_SUCCESS;
357 }
358 
359 static void
kgsl_bo_allow_dump(struct tu_device * dev,struct tu_bo * bo)360 kgsl_bo_allow_dump(struct tu_device *dev, struct tu_bo *bo)
361 {
362 }
363 
364 static void
kgsl_bo_finish(struct tu_device * dev,struct tu_bo * bo)365 kgsl_bo_finish(struct tu_device *dev, struct tu_bo *bo)
366 {
367    assert(bo->gem_handle);
368 
369    if (!p_atomic_dec_zero(&bo->refcnt))
370       return;
371 
372    if (bo->map) {
373       TU_RMV(bo_unmap, dev, bo);
374       munmap(bo->map, bo->size);
375    }
376 
377    if (bo->shared_fd != -1)
378       close(bo->shared_fd);
379 
380    TU_RMV(bo_destroy, dev, bo);
381 
382    struct kgsl_gpumem_free_id req = {
383       .id = bo->gem_handle
384    };
385 
386    /* Tell sparse array that entry is free */
387    memset(bo, 0, sizeof(*bo));
388 
389    safe_ioctl(dev->physical_device->local_fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req);
390 }
391 
392 static VkResult
get_kgsl_prop(int fd,unsigned int type,void * value,size_t size)393 get_kgsl_prop(int fd, unsigned int type, void *value, size_t size)
394 {
395    struct kgsl_device_getproperty getprop = {
396       .type = type,
397       .value = value,
398       .sizebytes = size,
399    };
400 
401    return safe_ioctl(fd, IOCTL_KGSL_DEVICE_GETPROPERTY, &getprop)
402              ? VK_ERROR_UNKNOWN
403              : VK_SUCCESS;
404 }
405 
406 static bool
kgsl_is_memory_type_supported(int fd,uint32_t flags)407 kgsl_is_memory_type_supported(int fd, uint32_t flags)
408 {
409    struct kgsl_gpumem_alloc_id req_alloc = {
410       .flags = flags,
411       .size = 0x1000,
412    };
413 
414    int ret = safe_ioctl(fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req_alloc);
415    if (ret) {
416       return false;
417    }
418 
419    struct kgsl_gpumem_free_id req_free = { .id = req_alloc.id };
420 
421    safe_ioctl(fd, IOCTL_KGSL_GPUMEM_FREE_ID, &req_free);
422 
423    return true;
424 }
425 
426 enum kgsl_syncobj_state {
427    KGSL_SYNCOBJ_STATE_UNSIGNALED,
428    KGSL_SYNCOBJ_STATE_SIGNALED,
429    KGSL_SYNCOBJ_STATE_TS,
430    KGSL_SYNCOBJ_STATE_FD,
431 };
432 
433 struct kgsl_syncobj
434 {
435    struct vk_object_base base;
436    enum kgsl_syncobj_state state;
437 
438    struct tu_queue *queue;
439    uint32_t timestamp;
440 
441    int fd;
442 };
443 
444 struct tu_u_trace_syncobj
445 {
446    uint32_t msm_queue_id;
447    uint32_t timestamp;
448 };
449 
450 static void
kgsl_syncobj_init(struct kgsl_syncobj * s,bool signaled)451 kgsl_syncobj_init(struct kgsl_syncobj *s, bool signaled)
452 {
453    s->state =
454       signaled ? KGSL_SYNCOBJ_STATE_SIGNALED : KGSL_SYNCOBJ_STATE_UNSIGNALED;
455 
456    s->timestamp = UINT32_MAX;
457    s->fd = -1;
458 }
459 
460 static void
kgsl_syncobj_reset(struct kgsl_syncobj * s)461 kgsl_syncobj_reset(struct kgsl_syncobj *s)
462 {
463    if (s->state == KGSL_SYNCOBJ_STATE_FD && s->fd >= 0) {
464       ASSERTED int ret = close(s->fd);
465       assert(ret == 0);
466       s->fd = -1;
467    } else if (s->state == KGSL_SYNCOBJ_STATE_TS) {
468       s->timestamp = UINT32_MAX;
469    }
470 
471    s->state = KGSL_SYNCOBJ_STATE_UNSIGNALED;
472 }
473 
474 static void
kgsl_syncobj_destroy(struct kgsl_syncobj * s)475 kgsl_syncobj_destroy(struct kgsl_syncobj *s)
476 {
477    kgsl_syncobj_reset(s);
478 }
479 
480 static int
timestamp_to_fd(struct tu_queue * queue,uint32_t timestamp)481 timestamp_to_fd(struct tu_queue *queue, uint32_t timestamp)
482 {
483    int fd;
484    struct kgsl_timestamp_event event = {
485       .type = KGSL_TIMESTAMP_EVENT_FENCE,
486       .timestamp = timestamp,
487       .context_id = queue->msm_queue_id,
488       .priv = &fd,
489       .len = sizeof(fd),
490    };
491 
492    int ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_TIMESTAMP_EVENT, &event);
493    if (ret)
494       return -1;
495 
496    return fd;
497 }
498 
499 static int
kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj * syncobj)500 kgsl_syncobj_ts_to_fd(const struct kgsl_syncobj *syncobj)
501 {
502    assert(syncobj->state == KGSL_SYNCOBJ_STATE_TS);
503    return timestamp_to_fd(syncobj->queue, syncobj->timestamp);
504 }
505 
506 /* return true if timestamp a is greater (more recent) then b
507  * this relies on timestamps never having a difference > (1<<31)
508  */
509 static inline bool
timestamp_cmp(uint32_t a,uint32_t b)510 timestamp_cmp(uint32_t a, uint32_t b)
511 {
512    return (int32_t) (a - b) >= 0;
513 }
514 
515 static uint32_t
max_ts(uint32_t a,uint32_t b)516 max_ts(uint32_t a, uint32_t b)
517 {
518    return timestamp_cmp(a, b) ? a : b;
519 }
520 
521 static uint32_t
min_ts(uint32_t a,uint32_t b)522 min_ts(uint32_t a, uint32_t b)
523 {
524    return timestamp_cmp(a, b) ? b : a;
525 }
526 
527 static int
get_relative_ms(uint64_t abs_timeout_ns)528 get_relative_ms(uint64_t abs_timeout_ns)
529 {
530    if (abs_timeout_ns >= INT64_MAX)
531       /* We can assume that a wait with a value this high is a forever wait
532        * and return -1 here as it's the infinite timeout for ppoll() while
533        * being the highest unsigned integer value for the wait KGSL IOCTL
534        */
535       return -1;
536 
537    uint64_t cur_time_ms = os_time_get_nano() / 1000000;
538    uint64_t abs_timeout_ms = abs_timeout_ns / 1000000;
539    if (abs_timeout_ms <= cur_time_ms)
540       return 0;
541 
542    return abs_timeout_ms - cur_time_ms;
543 }
544 
545 /* safe_ioctl is not enough as restarted waits would not adjust the timeout
546  * which could lead to waiting substantially longer than requested
547  */
548 static int
wait_timestamp_safe(int fd,unsigned int context_id,unsigned int timestamp,uint64_t abs_timeout_ns)549 wait_timestamp_safe(int fd,
550                     unsigned int context_id,
551                     unsigned int timestamp,
552                     uint64_t abs_timeout_ns)
553 {
554    struct kgsl_device_waittimestamp_ctxtid wait = {
555       .context_id = context_id,
556       .timestamp = timestamp,
557       .timeout = get_relative_ms(abs_timeout_ns),
558    };
559 
560    while (true) {
561       int ret = ioctl(fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
562 
563       if (ret == -1 && (errno == EINTR || errno == EAGAIN)) {
564          int timeout_ms = get_relative_ms(abs_timeout_ns);
565 
566          /* update timeout to consider time that has passed since the start */
567          if (timeout_ms == 0) {
568             errno = ETIME;
569             return -1;
570          }
571 
572          wait.timeout = timeout_ms;
573       } else if (ret == -1 && errno == ETIMEDOUT) {
574          /* The kernel returns ETIMEDOUT if the timeout is reached, but
575           * we want to return ETIME instead.
576           */
577          errno = ETIME;
578          return -1;
579       } else {
580          return ret;
581       }
582    }
583 }
584 
585 static VkResult
kgsl_syncobj_wait(struct tu_device * device,struct kgsl_syncobj * s,uint64_t abs_timeout_ns)586 kgsl_syncobj_wait(struct tu_device *device,
587                   struct kgsl_syncobj *s,
588                   uint64_t abs_timeout_ns)
589 {
590    if (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
591       /* If this syncobj is unsignaled we need to wait for it to resolve to a
592        * valid syncobj prior to letting the rest of the wait continue, this
593        * avoids needing kernel support for wait-before-signal semantics.
594        */
595 
596       if (abs_timeout_ns == 0)
597          return VK_TIMEOUT; // If this is a simple poll then we can return early
598 
599       pthread_mutex_lock(&device->submit_mutex);
600       struct timespec abstime;
601       timespec_from_nsec(&abstime, abs_timeout_ns);
602 
603       while (s->state == KGSL_SYNCOBJ_STATE_UNSIGNALED) {
604          int ret;
605          if (abs_timeout_ns == UINT64_MAX) {
606             ret = pthread_cond_wait(&device->timeline_cond,
607                                     &device->submit_mutex);
608          } else {
609             ret = pthread_cond_timedwait(&device->timeline_cond,
610                                          &device->submit_mutex, &abstime);
611          }
612          if (ret != 0) {
613             assert(ret == ETIMEDOUT);
614             pthread_mutex_unlock(&device->submit_mutex);
615             return VK_TIMEOUT;
616          }
617       }
618 
619       pthread_mutex_unlock(&device->submit_mutex);
620    }
621 
622    switch (s->state) {
623    case KGSL_SYNCOBJ_STATE_SIGNALED:
624       return VK_SUCCESS;
625 
626    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
627       return VK_TIMEOUT;
628 
629    case KGSL_SYNCOBJ_STATE_TS: {
630       int ret = wait_timestamp_safe(device->fd, s->queue->msm_queue_id,
631                                     s->timestamp, abs_timeout_ns);
632       if (ret) {
633          assert(errno == ETIME);
634          return VK_TIMEOUT;
635       } else {
636          return VK_SUCCESS;
637       }
638    }
639 
640    case KGSL_SYNCOBJ_STATE_FD: {
641       int ret = sync_wait(s->fd, get_relative_ms(abs_timeout_ns));
642       if (ret) {
643          assert(errno == ETIME);
644          return VK_TIMEOUT;
645       } else {
646          return VK_SUCCESS;
647       }
648    }
649 
650    default:
651       unreachable("invalid syncobj state");
652    }
653 }
654 
655 #define kgsl_syncobj_foreach_state(syncobjs, filter) \
656    for (uint32_t i = 0; sync = syncobjs[i], i < count; i++) \
657       if (sync->state == filter)
658 
659 static VkResult
kgsl_syncobj_wait_any(struct tu_device * device,struct kgsl_syncobj ** syncobjs,uint32_t count,uint64_t abs_timeout_ns)660 kgsl_syncobj_wait_any(struct tu_device* device, struct kgsl_syncobj **syncobjs, uint32_t count, uint64_t abs_timeout_ns)
661 {
662    if (count == 0)
663       return VK_TIMEOUT;
664    else if (count == 1)
665       return kgsl_syncobj_wait(device, syncobjs[0], abs_timeout_ns);
666 
667    uint32_t num_fds = 0;
668    struct tu_queue *queue = NULL;
669    struct kgsl_syncobj *sync = NULL;
670 
671    /* Simple case, we already have a signal one */
672    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_SIGNALED)
673       return VK_SUCCESS;
674 
675    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD)
676       num_fds++;
677 
678    /* If we have TS from different queues we cannot compare them and would
679     * have to convert them into FDs
680     */
681    bool convert_ts_to_fd = false;
682    kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
683       if (queue != NULL && sync->queue != queue) {
684          convert_ts_to_fd = true;
685          break;
686       }
687       queue = sync->queue;
688    }
689 
690    /* If we have no FD nor TS syncobjs then we can return immediately */
691    if (num_fds == 0 && queue == NULL)
692       return VK_TIMEOUT;
693 
694    VkResult result = VK_TIMEOUT;
695 
696    struct u_vector poll_fds = { 0 };
697    uint32_t lowest_timestamp = 0;
698 
699    if (convert_ts_to_fd || num_fds > 0)
700       u_vector_init(&poll_fds, 4, sizeof(struct pollfd));
701 
702    if (convert_ts_to_fd) {
703       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
704          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
705          poll_fd->fd = timestamp_to_fd(sync->queue, sync->timestamp);
706          poll_fd->events = POLLIN;
707       }
708    } else {
709       /* TSs could be merged by finding the one with the lowest timestamp */
710       bool first_ts = true;
711       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_TS) {
712          if (first_ts || timestamp_cmp(sync->timestamp, lowest_timestamp)) {
713             first_ts = false;
714             lowest_timestamp = sync->timestamp;
715          }
716       }
717 
718       if (num_fds) {
719          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
720          poll_fd->fd = timestamp_to_fd(queue, lowest_timestamp);
721          poll_fd->events = POLLIN;
722       }
723    }
724 
725    if (num_fds) {
726       kgsl_syncobj_foreach_state(syncobjs, KGSL_SYNCOBJ_STATE_FD) {
727          struct pollfd *poll_fd = (struct pollfd *) u_vector_add(&poll_fds);
728          poll_fd->fd = sync->fd;
729          poll_fd->events = POLLIN;
730       }
731    }
732 
733    if (u_vector_length(&poll_fds) == 0) {
734       int ret = wait_timestamp_safe(device->fd, queue->msm_queue_id,
735                                     lowest_timestamp, MIN2(abs_timeout_ns, INT64_MAX));
736       if (ret) {
737          assert(errno == ETIME);
738          result = VK_TIMEOUT;
739       } else {
740          result = VK_SUCCESS;
741       }
742    } else {
743       int ret, i;
744 
745       struct pollfd *fds = (struct pollfd *) poll_fds.data;
746       uint32_t fds_count = u_vector_length(&poll_fds);
747       do {
748          ret = poll(fds, fds_count, get_relative_ms(abs_timeout_ns));
749          if (ret > 0) {
750             for (i = 0; i < fds_count; i++) {
751                if (fds[i].revents & (POLLERR | POLLNVAL)) {
752                   errno = EINVAL;
753                   ret = -1;
754                   break;
755                }
756             }
757             break;
758          } else if (ret == 0) {
759             errno = ETIME;
760             break;
761          }
762       } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
763 
764       for (uint32_t i = 0; i < fds_count - num_fds; i++)
765          close(fds[i].fd);
766 
767       if (ret != 0) {
768          assert(errno == ETIME);
769          result = VK_TIMEOUT;
770       } else {
771          result = VK_SUCCESS;
772       }
773    }
774 
775    u_vector_finish(&poll_fds);
776    return result;
777 }
778 
779 static VkResult
kgsl_syncobj_export(struct kgsl_syncobj * s,int * pFd)780 kgsl_syncobj_export(struct kgsl_syncobj *s, int *pFd)
781 {
782    if (!pFd)
783       return VK_SUCCESS;
784 
785    switch (s->state) {
786    case KGSL_SYNCOBJ_STATE_SIGNALED:
787    case KGSL_SYNCOBJ_STATE_UNSIGNALED:
788       /* Getting a sync FD from an unsignaled syncobj is UB in Vulkan */
789       *pFd = -1;
790       return VK_SUCCESS;
791 
792    case KGSL_SYNCOBJ_STATE_FD:
793       if (s->fd < 0)
794          *pFd = -1;
795       else
796          *pFd = dup(s->fd);
797       return VK_SUCCESS;
798 
799    case KGSL_SYNCOBJ_STATE_TS:
800       *pFd = kgsl_syncobj_ts_to_fd(s);
801       return VK_SUCCESS;
802 
803    default:
804       unreachable("Invalid syncobj state");
805    }
806 }
807 
808 static VkResult
kgsl_syncobj_import(struct kgsl_syncobj * s,int fd)809 kgsl_syncobj_import(struct kgsl_syncobj *s, int fd)
810 {
811    kgsl_syncobj_reset(s);
812    if (fd >= 0) {
813       s->state = KGSL_SYNCOBJ_STATE_FD;
814       s->fd = fd;
815    } else {
816       s->state = KGSL_SYNCOBJ_STATE_SIGNALED;
817    }
818 
819    return VK_SUCCESS;
820 }
821 
822 static int
sync_merge_close(const char * name,int fd1,int fd2,bool close_fd2)823 sync_merge_close(const char *name, int fd1, int fd2, bool close_fd2)
824 {
825    int fd = sync_merge(name, fd1, fd2);
826    if (fd < 0)
827       return -1;
828 
829    close(fd1);
830    if (close_fd2)
831       close(fd2);
832 
833    return fd;
834 }
835 
836 /* Merges multiple kgsl_syncobjs into a single one which is only signalled
837  * after all submitted syncobjs are signalled
838  */
839 static struct kgsl_syncobj
kgsl_syncobj_merge(const struct kgsl_syncobj ** syncobjs,uint32_t count)840 kgsl_syncobj_merge(const struct kgsl_syncobj **syncobjs, uint32_t count)
841 {
842    struct kgsl_syncobj ret;
843    kgsl_syncobj_init(&ret, true);
844 
845    if (count == 0)
846       return ret;
847 
848    for (uint32_t i = 0; i < count; ++i) {
849       const struct kgsl_syncobj *sync = syncobjs[i];
850 
851       switch (sync->state) {
852       case KGSL_SYNCOBJ_STATE_SIGNALED:
853          break;
854 
855       case KGSL_SYNCOBJ_STATE_UNSIGNALED:
856          kgsl_syncobj_reset(&ret);
857          return ret;
858 
859       case KGSL_SYNCOBJ_STATE_TS:
860          if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
861             if (ret.queue == sync->queue) {
862                ret.timestamp = max_ts(ret.timestamp, sync->timestamp);
863             } else {
864                ret.state = KGSL_SYNCOBJ_STATE_FD;
865                int sync_fd = kgsl_syncobj_ts_to_fd(sync);
866                ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
867                assert(ret.fd >= 0);
868             }
869          } else if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
870             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
871             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
872             assert(ret.fd >= 0);
873          } else {
874             ret = *sync;
875          }
876          break;
877 
878       case KGSL_SYNCOBJ_STATE_FD:
879          if (ret.state == KGSL_SYNCOBJ_STATE_FD) {
880             ret.fd = sync_merge_close("tu_sync", ret.fd, sync->fd, false);
881             assert(ret.fd >= 0);
882          } else if (ret.state == KGSL_SYNCOBJ_STATE_TS) {
883             ret.state = KGSL_SYNCOBJ_STATE_FD;
884             int sync_fd = kgsl_syncobj_ts_to_fd(sync);
885             ret.fd = sync_merge_close("tu_sync", ret.fd, sync_fd, true);
886             assert(ret.fd >= 0);
887          } else {
888             ret = *sync;
889             ret.fd = dup(ret.fd);
890             assert(ret.fd >= 0);
891          }
892          break;
893 
894       default:
895          unreachable("invalid syncobj state");
896       }
897    }
898 
899    return ret;
900 }
901 
902 struct vk_kgsl_syncobj
903 {
904    struct vk_sync vk;
905    struct kgsl_syncobj syncobj;
906 };
907 
908 static VkResult
vk_kgsl_sync_init(struct vk_device * device,struct vk_sync * sync,uint64_t initial_value)909 vk_kgsl_sync_init(struct vk_device *device,
910                   struct vk_sync *sync,
911                   uint64_t initial_value)
912 {
913    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
914    kgsl_syncobj_init(&s->syncobj, initial_value != 0);
915    return VK_SUCCESS;
916 }
917 
918 static void
vk_kgsl_sync_finish(struct vk_device * device,struct vk_sync * sync)919 vk_kgsl_sync_finish(struct vk_device *device, struct vk_sync *sync)
920 {
921    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
922    kgsl_syncobj_destroy(&s->syncobj);
923 }
924 
925 static VkResult
vk_kgsl_sync_reset(struct vk_device * device,struct vk_sync * sync)926 vk_kgsl_sync_reset(struct vk_device *device, struct vk_sync *sync)
927 {
928    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
929    kgsl_syncobj_reset(&s->syncobj);
930    return VK_SUCCESS;
931 }
932 
933 static VkResult
vk_kgsl_sync_move(struct vk_device * device,struct vk_sync * dst,struct vk_sync * src)934 vk_kgsl_sync_move(struct vk_device *device,
935                   struct vk_sync *dst,
936                   struct vk_sync *src)
937 {
938    struct vk_kgsl_syncobj *d = container_of(dst, struct vk_kgsl_syncobj, vk);
939    struct vk_kgsl_syncobj *s = container_of(src, struct vk_kgsl_syncobj, vk);
940    kgsl_syncobj_reset(&d->syncobj);
941    d->syncobj = s->syncobj;
942    kgsl_syncobj_init(&s->syncobj, false);
943    return VK_SUCCESS;
944 }
945 
946 static VkResult
vk_kgsl_sync_wait(struct vk_device * _device,struct vk_sync * sync,uint64_t wait_value,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)947 vk_kgsl_sync_wait(struct vk_device *_device,
948                   struct vk_sync *sync,
949                   uint64_t wait_value,
950                   enum vk_sync_wait_flags wait_flags,
951                   uint64_t abs_timeout_ns)
952 {
953    struct tu_device *device = container_of(_device, struct tu_device, vk);
954    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
955 
956    if (wait_flags & VK_SYNC_WAIT_PENDING)
957       return VK_SUCCESS;
958 
959    return kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
960 }
961 
962 static VkResult
vk_kgsl_sync_wait_many(struct vk_device * _device,uint32_t wait_count,const struct vk_sync_wait * waits,enum vk_sync_wait_flags wait_flags,uint64_t abs_timeout_ns)963 vk_kgsl_sync_wait_many(struct vk_device *_device,
964                        uint32_t wait_count,
965                        const struct vk_sync_wait *waits,
966                        enum vk_sync_wait_flags wait_flags,
967                        uint64_t abs_timeout_ns)
968 {
969    struct tu_device *device = container_of(_device, struct tu_device, vk);
970 
971    if (wait_flags & VK_SYNC_WAIT_PENDING)
972       return VK_SUCCESS;
973 
974    if (wait_flags & VK_SYNC_WAIT_ANY) {
975       struct kgsl_syncobj *syncobjs[wait_count];
976       for (uint32_t i = 0; i < wait_count; i++) {
977          syncobjs[i] =
978             &container_of(waits[i].sync, struct vk_kgsl_syncobj, vk)->syncobj;
979       }
980 
981       return kgsl_syncobj_wait_any(device, syncobjs, wait_count,
982                                    abs_timeout_ns);
983    } else {
984       for (uint32_t i = 0; i < wait_count; i++) {
985          struct vk_kgsl_syncobj *s =
986             container_of(waits[i].sync, struct vk_kgsl_syncobj, vk);
987 
988          VkResult result =
989             kgsl_syncobj_wait(device, &s->syncobj, abs_timeout_ns);
990          if (result != VK_SUCCESS)
991             return result;
992       }
993       return VK_SUCCESS;
994    }
995 }
996 
997 static VkResult
vk_kgsl_sync_import_sync_file(struct vk_device * device,struct vk_sync * sync,int fd)998 vk_kgsl_sync_import_sync_file(struct vk_device *device,
999                               struct vk_sync *sync,
1000                               int fd)
1001 {
1002    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1003    if (fd >= 0) {
1004       fd = dup(fd);
1005       if (fd < 0) {
1006          mesa_loge("vk_kgsl_sync_import_sync_file: dup failed: %s",
1007                    strerror(errno));
1008          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1009       }
1010    }
1011    return kgsl_syncobj_import(&s->syncobj, fd);
1012 }
1013 
1014 static VkResult
vk_kgsl_sync_export_sync_file(struct vk_device * device,struct vk_sync * sync,int * pFd)1015 vk_kgsl_sync_export_sync_file(struct vk_device *device,
1016                               struct vk_sync *sync,
1017                               int *pFd)
1018 {
1019    struct vk_kgsl_syncobj *s = container_of(sync, struct vk_kgsl_syncobj, vk);
1020    return kgsl_syncobj_export(&s->syncobj, pFd);
1021 }
1022 
1023 const struct vk_sync_type vk_kgsl_sync_type = {
1024    .size = sizeof(struct vk_kgsl_syncobj),
1025    .features = (enum vk_sync_features)
1026                (VK_SYNC_FEATURE_BINARY |
1027                 VK_SYNC_FEATURE_GPU_WAIT |
1028                 VK_SYNC_FEATURE_GPU_MULTI_WAIT |
1029                 VK_SYNC_FEATURE_CPU_WAIT |
1030                 VK_SYNC_FEATURE_CPU_RESET |
1031                 VK_SYNC_FEATURE_WAIT_ANY |
1032                 VK_SYNC_FEATURE_WAIT_PENDING),
1033    .init = vk_kgsl_sync_init,
1034    .finish = vk_kgsl_sync_finish,
1035    .reset = vk_kgsl_sync_reset,
1036    .move = vk_kgsl_sync_move,
1037    .wait = vk_kgsl_sync_wait,
1038    .wait_many = vk_kgsl_sync_wait_many,
1039    .import_sync_file = vk_kgsl_sync_import_sync_file,
1040    .export_sync_file = vk_kgsl_sync_export_sync_file,
1041 };
1042 
1043 static VkResult
kgsl_queue_submit(struct tu_queue * queue,struct vk_queue_submit * vk_submit)1044 kgsl_queue_submit(struct tu_queue *queue, struct vk_queue_submit *vk_submit)
1045 {
1046    MESA_TRACE_FUNC();
1047 
1048    bool u_trace_enabled = u_trace_should_process(&queue->device->trace_context);
1049    bool has_trace_points = false;
1050 
1051    if (vk_submit->command_buffer_count == 0) {
1052       pthread_mutex_lock(&queue->device->submit_mutex);
1053 
1054       const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count + 1];
1055       for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
1056          wait_semaphores[i] = &container_of(vk_submit->waits[i].sync,
1057                                             struct vk_kgsl_syncobj, vk)
1058                                   ->syncobj;
1059       }
1060 
1061       struct kgsl_syncobj last_submit_sync;
1062       if (queue->fence >= 0)
1063          last_submit_sync = (struct kgsl_syncobj) {
1064             .state = KGSL_SYNCOBJ_STATE_TS,
1065             .queue = queue,
1066             .timestamp = queue->fence,
1067          };
1068       else
1069          last_submit_sync = (struct kgsl_syncobj) {
1070             .state = KGSL_SYNCOBJ_STATE_SIGNALED,
1071          };
1072 
1073       wait_semaphores[vk_submit->wait_count] = &last_submit_sync;
1074 
1075       struct kgsl_syncobj wait_sync =
1076          kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count + 1);
1077       assert(wait_sync.state !=
1078              KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1079 
1080       for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
1081          struct kgsl_syncobj *signal_sync =
1082             &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj,
1083                           vk)
1084                 ->syncobj;
1085 
1086          kgsl_syncobj_reset(signal_sync);
1087          *signal_sync = wait_sync;
1088       }
1089 
1090       pthread_mutex_unlock(&queue->device->submit_mutex);
1091       pthread_cond_broadcast(&queue->device->timeline_cond);
1092 
1093       return VK_SUCCESS;
1094    }
1095 
1096    uint32_t perf_pass_index =
1097       queue->device->perfcntrs_pass_cs ? vk_submit->perf_pass_index : ~0;
1098 
1099    if (TU_DEBUG(LOG_SKIP_GMEM_OPS))
1100       tu_dbg_log_gmem_load_store_skips(queue->device);
1101 
1102    VkResult result = VK_SUCCESS;
1103 
1104    pthread_mutex_lock(&queue->device->submit_mutex);
1105 
1106    struct tu_cmd_buffer **cmd_buffers =
1107       (struct tu_cmd_buffer **) vk_submit->command_buffers;
1108    static_assert(offsetof(struct tu_cmd_buffer, vk) == 0,
1109                  "vk must be first member of tu_cmd_buffer");
1110    uint32_t cmdbuf_count = vk_submit->command_buffer_count;
1111 
1112    result =
1113       tu_insert_dynamic_cmdbufs(queue->device, &cmd_buffers, &cmdbuf_count);
1114    if (result != VK_SUCCESS) {
1115       pthread_mutex_unlock(&queue->device->submit_mutex);
1116       return result;
1117    }
1118 
1119    uint32_t entry_count = 0;
1120    for (uint32_t i = 0; i < cmdbuf_count; ++i) {
1121       struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1122 
1123       if (perf_pass_index != ~0)
1124          entry_count++;
1125 
1126       entry_count += cmd_buffer->cs.entry_count;
1127 
1128       if (u_trace_enabled && u_trace_has_points(&cmd_buffers[i]->trace)) {
1129          if (!(cmd_buffers[i]->usage_flags &
1130                VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT))
1131             entry_count++;
1132 
1133          has_trace_points = true;
1134       }
1135    }
1136 
1137    if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count))
1138       entry_count++;
1139 
1140    struct kgsl_command_object *cmds = (struct kgsl_command_object *)
1141       vk_alloc(&queue->device->vk.alloc, sizeof(*cmds) * entry_count,
1142                alignof(*cmds), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1143    if (cmds == NULL) {
1144       pthread_mutex_unlock(&queue->device->submit_mutex);
1145       return vk_error(queue, VK_ERROR_OUT_OF_HOST_MEMORY);
1146    }
1147 
1148    uint32_t obj_count = 0;
1149    if (has_trace_points)
1150       obj_count++;
1151 
1152    struct kgsl_command_object *objs = (struct kgsl_command_object *)
1153       vk_alloc(&queue->device->vk.alloc, sizeof(*objs) * obj_count,
1154                alignof(*objs), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1155 
1156    struct tu_u_trace_submission_data *u_trace_submission_data = NULL;
1157    if (has_trace_points) {
1158       tu_u_trace_submission_data_create(
1159          queue->device, cmd_buffers, cmdbuf_count, &u_trace_submission_data);
1160 
1161       mtx_lock(&queue->device->kgsl_profiling_mutex);
1162       tu_suballoc_bo_alloc(&u_trace_submission_data->kgsl_timestamp_bo,
1163                            &queue->device->kgsl_profiling_suballoc,
1164                            sizeof(struct kgsl_cmdbatch_profiling_buffer), 4);
1165       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1166    }
1167 
1168    uint32_t entry_idx = 0;
1169    for (uint32_t i = 0; i < cmdbuf_count; i++) {
1170       struct tu_cmd_buffer *cmd_buffer = cmd_buffers[i];
1171       struct tu_cs *cs = &cmd_buffer->cs;
1172 
1173       if (perf_pass_index != ~0) {
1174          struct tu_cs_entry *perf_cs_entry =
1175             &cmd_buffer->device->perfcntrs_pass_cs_entries[perf_pass_index];
1176 
1177          cmds[entry_idx++] = (struct kgsl_command_object) {
1178             .gpuaddr = perf_cs_entry->bo->iova + perf_cs_entry->offset,
1179             .size = perf_cs_entry->size,
1180             .flags = KGSL_CMDLIST_IB,
1181             .id = perf_cs_entry->bo->gem_handle,
1182          };
1183       }
1184 
1185       for (uint32_t j = 0; j < cs->entry_count; j++) {
1186          cmds[entry_idx++] = (struct kgsl_command_object) {
1187             .gpuaddr = cs->entries[j].bo->iova + cs->entries[j].offset,
1188             .size = cs->entries[j].size,
1189             .flags = KGSL_CMDLIST_IB,
1190             .id = cs->entries[j].bo->gem_handle,
1191          };
1192       }
1193 
1194       if (u_trace_submission_data &&
1195           u_trace_submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1196          struct tu_cs_entry *trace_cs_entry =
1197             &u_trace_submission_data->cmd_trace_data[i]
1198                 .timestamp_copy_cs->entries[0];
1199          cmds[entry_idx++] = (struct kgsl_command_object) {
1200             .offset = trace_cs_entry->offset,
1201             .gpuaddr = trace_cs_entry->bo->iova,
1202             .size = trace_cs_entry->size,
1203             .flags = KGSL_CMDLIST_IB,
1204             .id = trace_cs_entry->bo->gem_handle,
1205          };
1206       }
1207    }
1208 
1209    struct kgsl_cmdbatch_profiling_buffer *profiling_buffer = NULL;
1210    uint32_t obj_idx = 0;
1211    if (u_trace_submission_data) {
1212       struct tu_suballoc_bo *bo = &u_trace_submission_data->kgsl_timestamp_bo;
1213 
1214       objs[obj_idx++] = (struct kgsl_command_object) {
1215          .offset = bo->iova - bo->bo->iova,
1216          .gpuaddr = bo->bo->iova,
1217          .size = sizeof(struct kgsl_cmdbatch_profiling_buffer),
1218          .flags = KGSL_OBJLIST_MEMOBJ | KGSL_OBJLIST_PROFILE,
1219          .id = bo->bo->gem_handle,
1220       };
1221       profiling_buffer =
1222          (struct kgsl_cmdbatch_profiling_buffer *) tu_suballoc_bo_map(bo);
1223       memset(profiling_buffer, 0, sizeof(*profiling_buffer));
1224    }
1225 
1226    if (tu_autotune_submit_requires_fence(cmd_buffers, cmdbuf_count)) {
1227       struct tu_cs *autotune_cs = tu_autotune_on_submit(
1228          queue->device, &queue->device->autotune, cmd_buffers, cmdbuf_count);
1229       cmds[entry_idx++] = (struct kgsl_command_object) {
1230          .gpuaddr =
1231             autotune_cs->entries[0].bo->iova + autotune_cs->entries[0].offset,
1232          .size = autotune_cs->entries[0].size,
1233          .flags = KGSL_CMDLIST_IB,
1234          .id = autotune_cs->entries[0].bo->gem_handle,
1235       };
1236    }
1237 
1238    const struct kgsl_syncobj *wait_semaphores[vk_submit->wait_count];
1239    for (uint32_t i = 0; i < vk_submit->wait_count; i++) {
1240       wait_semaphores[i] =
1241          &container_of(vk_submit->waits[i].sync, struct vk_kgsl_syncobj, vk)
1242              ->syncobj;
1243    }
1244 
1245    struct kgsl_syncobj wait_sync =
1246       kgsl_syncobj_merge(wait_semaphores, vk_submit->wait_count);
1247    assert(wait_sync.state !=
1248           KGSL_SYNCOBJ_STATE_UNSIGNALED); // Would wait forever
1249 
1250    struct kgsl_cmd_syncpoint_timestamp ts;
1251    struct kgsl_cmd_syncpoint_fence fn;
1252    struct kgsl_command_syncpoint sync = { 0 };
1253    bool has_sync = false;
1254    switch (wait_sync.state) {
1255    case KGSL_SYNCOBJ_STATE_SIGNALED:
1256       break;
1257 
1258    case KGSL_SYNCOBJ_STATE_TS:
1259       ts.context_id = wait_sync.queue->msm_queue_id;
1260       ts.timestamp = wait_sync.timestamp;
1261 
1262       has_sync = true;
1263       sync.type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
1264       sync.priv = (uintptr_t) &ts;
1265       sync.size = sizeof(ts);
1266       break;
1267 
1268    case KGSL_SYNCOBJ_STATE_FD:
1269       fn.fd = wait_sync.fd;
1270 
1271       has_sync = true;
1272       sync.type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
1273       sync.priv = (uintptr_t) &fn;
1274       sync.size = sizeof(fn);
1275       break;
1276 
1277    default:
1278       unreachable("invalid syncobj state");
1279    }
1280 
1281    struct kgsl_gpu_command req = {
1282       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
1283       .cmdlist = (uintptr_t) cmds,
1284       .cmdsize = sizeof(struct kgsl_command_object),
1285       .numcmds = entry_idx,
1286       .synclist = (uintptr_t) &sync,
1287       .syncsize = sizeof(sync),
1288       .numsyncs = has_sync != 0 ? 1 : 0,
1289       .context_id = queue->msm_queue_id,
1290    };
1291 
1292    if (obj_idx) {
1293       req.flags |= KGSL_CMDBATCH_PROFILING;
1294       req.objlist = (uintptr_t) objs;
1295       req.objsize = sizeof(struct kgsl_command_object);
1296       req.numobjs = obj_idx;
1297    }
1298 
1299    int ret = safe_ioctl(queue->device->physical_device->local_fd,
1300                         IOCTL_KGSL_GPU_COMMAND, &req);
1301 
1302    uint64_t gpu_offset = 0;
1303 #if HAVE_PERFETTO
1304    if (profiling_buffer) {
1305       /* We need to wait for KGSL to queue the GPU command before we can read
1306        * the timestamp. Since this is just for profiling and doesn't take too
1307        * long, we can just busy-wait for it.
1308        */
1309       while (p_atomic_read(&profiling_buffer->gpu_ticks_queued) == 0);
1310 
1311       struct kgsl_perfcounter_read_group perf = {
1312          .groupid = KGSL_PERFCOUNTER_GROUP_ALWAYSON,
1313          .countable = 0,
1314          .value = 0
1315       };
1316 
1317       struct kgsl_perfcounter_read req = {
1318          .reads = &perf,
1319          .count = 1,
1320       };
1321 
1322       ret = safe_ioctl(queue->device->fd, IOCTL_KGSL_PERFCOUNTER_READ, &req);
1323       /* Older KGSL has some kind of garbage in upper 32 bits */
1324       uint64_t offseted_gpu_ts = perf.value & 0xffffffff;
1325 
1326       gpu_offset = tu_device_ticks_to_ns(
1327          queue->device, offseted_gpu_ts - profiling_buffer->gpu_ticks_queued);
1328 
1329       struct tu_perfetto_clocks clocks = {
1330          .cpu = profiling_buffer->wall_clock_ns,
1331          .gpu_ts = tu_device_ticks_to_ns(queue->device,
1332                                          profiling_buffer->gpu_ticks_queued),
1333          .gpu_ts_offset = gpu_offset,
1334       };
1335 
1336       clocks = tu_perfetto_submit(queue->device, queue->device->submit_count, &clocks);
1337       gpu_offset = clocks.gpu_ts_offset;
1338    }
1339 #endif
1340 
1341    kgsl_syncobj_destroy(&wait_sync);
1342 
1343    if (ret) {
1344       result = vk_device_set_lost(&queue->device->vk, "submit failed: %s\n",
1345                                   strerror(errno));
1346       goto fail_submit;
1347    }
1348 
1349    p_atomic_set(&queue->fence, req.timestamp);
1350 
1351    for (uint32_t i = 0; i < vk_submit->signal_count; i++) {
1352       struct kgsl_syncobj *signal_sync =
1353          &container_of(vk_submit->signals[i].sync, struct vk_kgsl_syncobj, vk)
1354              ->syncobj;
1355 
1356       kgsl_syncobj_reset(signal_sync);
1357       signal_sync->state = KGSL_SYNCOBJ_STATE_TS;
1358       signal_sync->queue = queue;
1359       signal_sync->timestamp = req.timestamp;
1360    }
1361 
1362    if (u_trace_submission_data) {
1363       struct tu_u_trace_submission_data *submission_data =
1364          u_trace_submission_data;
1365       submission_data->submission_id = queue->device->submit_count;
1366       submission_data->gpu_ts_offset = gpu_offset;
1367       /* We have to allocate it here since it is different between drm/kgsl */
1368       submission_data->syncobj = (struct tu_u_trace_syncobj *)
1369          vk_alloc(&queue->device->vk.alloc, sizeof(struct tu_u_trace_syncobj),
1370                8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1371          submission_data->syncobj->timestamp = req.timestamp;
1372          submission_data->syncobj->msm_queue_id = queue->msm_queue_id;
1373 
1374       u_trace_submission_data = NULL;
1375 
1376       for (uint32_t i = 0; i < submission_data->cmd_buffer_count; i++) {
1377          bool free_data = i == submission_data->last_buffer_with_tracepoints;
1378          if (submission_data->cmd_trace_data[i].trace)
1379             u_trace_flush(submission_data->cmd_trace_data[i].trace,
1380                           submission_data, queue->device->vk.current_frame,
1381                           free_data);
1382 
1383          if (!submission_data->cmd_trace_data[i].timestamp_copy_cs) {
1384             /* u_trace is owned by cmd_buffer */
1385             submission_data->cmd_trace_data[i].trace = NULL;
1386          }
1387       }
1388    }
1389 
1390    queue->device->submit_count++;
1391 
1392    pthread_mutex_unlock(&queue->device->submit_mutex);
1393    pthread_cond_broadcast(&queue->device->timeline_cond);
1394 
1395    u_trace_context_process(&queue->device->trace_context, false);
1396 
1397    if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1398       vk_free(&queue->device->vk.alloc, cmd_buffers);
1399 
1400    vk_free(&queue->device->vk.alloc, cmds);
1401 
1402    return VK_SUCCESS;
1403 
1404 fail_submit:
1405    pthread_mutex_unlock(&queue->device->submit_mutex);
1406 
1407    if (result != VK_SUCCESS) {
1408       mtx_lock(&queue->device->kgsl_profiling_mutex);
1409       tu_suballoc_bo_free(&queue->device->kgsl_profiling_suballoc,
1410                           &u_trace_submission_data->kgsl_timestamp_bo);
1411       mtx_unlock(&queue->device->kgsl_profiling_mutex);
1412    }
1413 
1414    if (cmd_buffers != (struct tu_cmd_buffer **) vk_submit->command_buffers)
1415       vk_free(&queue->device->vk.alloc, cmd_buffers);
1416 
1417    vk_free(&queue->device->vk.alloc, cmds);
1418 
1419    return result;
1420 }
1421 
1422 static VkResult
kgsl_device_wait_u_trace(struct tu_device * dev,struct tu_u_trace_syncobj * syncobj)1423 kgsl_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
1424 {
1425    struct kgsl_device_waittimestamp_ctxtid req = {
1426       .context_id = syncobj->msm_queue_id,
1427       .timestamp = syncobj->timestamp,
1428       .timeout = 5000, // 5s
1429    };
1430 
1431    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &req);
1432 
1433    if (ret) {
1434       assert(errno == ETIME);
1435       return VK_TIMEOUT;
1436    }
1437 
1438    return VK_SUCCESS;
1439 }
1440 
1441 static VkResult
kgsl_device_init(struct tu_device * dev)1442 kgsl_device_init(struct tu_device *dev)
1443 {
1444    dev->fd = dev->physical_device->local_fd;
1445    return VK_SUCCESS;
1446 }
1447 
1448 static void
kgsl_device_finish(struct tu_device * dev)1449 kgsl_device_finish(struct tu_device *dev)
1450 {
1451    /* No-op */
1452 }
1453 
1454 static int
kgsl_device_get_gpu_timestamp(struct tu_device * dev,uint64_t * ts)1455 kgsl_device_get_gpu_timestamp(struct tu_device *dev, uint64_t *ts)
1456 {
1457    unreachable("");
1458    return 0;
1459 }
1460 
1461 static int
kgsl_device_get_suspend_count(struct tu_device * dev,uint64_t * suspend_count)1462 kgsl_device_get_suspend_count(struct tu_device *dev, uint64_t *suspend_count)
1463 {
1464    /* kgsl doesn't have a way to get it */
1465    *suspend_count = 0;
1466    return 0;
1467 }
1468 
1469 static VkResult
kgsl_device_check_status(struct tu_device * device)1470 kgsl_device_check_status(struct tu_device *device)
1471 {
1472    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
1473       for (unsigned q = 0; q < device->queue_count[i]; q++) {
1474          /* KGSL's KGSL_PROP_GPU_RESET_STAT takes the u32 msm_queue_id and returns a
1475          * KGSL_CTX_STAT_* for the worst reset that happened since the last time it
1476          * was queried on that queue.
1477          */
1478          uint32_t value = device->queues[i][q].msm_queue_id;
1479          VkResult status = get_kgsl_prop(device->fd, KGSL_PROP_GPU_RESET_STAT,
1480                                        &value, sizeof(value));
1481          if (status != VK_SUCCESS)
1482             return vk_device_set_lost(&device->vk, "Failed to get GPU reset status");
1483 
1484          if (value != KGSL_CTX_STAT_NO_ERROR &&
1485             value != KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT) {
1486             return vk_device_set_lost(&device->vk, "GPU faulted or hung");
1487          }
1488       }
1489    }
1490 
1491    return VK_SUCCESS;
1492 }
1493 
1494 static const struct tu_knl kgsl_knl_funcs = {
1495       .name = "kgsl",
1496 
1497       .device_init = kgsl_device_init,
1498       .device_finish = kgsl_device_finish,
1499       .device_get_gpu_timestamp = kgsl_device_get_gpu_timestamp,
1500       .device_get_suspend_count = kgsl_device_get_suspend_count,
1501       .device_check_status = kgsl_device_check_status,
1502       .submitqueue_new = kgsl_submitqueue_new,
1503       .submitqueue_close = kgsl_submitqueue_close,
1504       .bo_init = kgsl_bo_init,
1505       .bo_init_dmabuf = kgsl_bo_init_dmabuf,
1506       .bo_export_dmabuf = kgsl_bo_export_dmabuf,
1507       .bo_map = kgsl_bo_map,
1508       .bo_allow_dump = kgsl_bo_allow_dump,
1509       .bo_finish = kgsl_bo_finish,
1510       .device_wait_u_trace = kgsl_device_wait_u_trace,
1511       .queue_submit = kgsl_queue_submit,
1512 };
1513 
1514 VkResult
tu_knl_kgsl_load(struct tu_instance * instance,int fd)1515 tu_knl_kgsl_load(struct tu_instance *instance, int fd)
1516 {
1517    if (instance->vk.enabled_extensions.KHR_display) {
1518       return vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1519                        "I can't KHR_display");
1520    }
1521 
1522    struct tu_physical_device *device = (struct tu_physical_device *)
1523       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1524                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1525    if (!device) {
1526       close(fd);
1527       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1528    }
1529 
1530    static const char dma_heap_path[] = "/dev/dma_heap/system";
1531    static const char ion_path[] = "/dev/ion";
1532    int dma_fd;
1533 
1534    dma_fd = open(dma_heap_path, O_RDONLY);
1535    if (dma_fd >= 0) {
1536       device->kgsl_dma_type = TU_KGSL_DMA_TYPE_DMAHEAP;
1537    } else {
1538       dma_fd = open(ion_path, O_RDONLY);
1539       if (dma_fd >= 0) {
1540          /* ION_IOC_FREE available only for legacy ION */
1541          struct ion_handle_data free = { .handle = 0 };
1542          if (safe_ioctl(dma_fd, ION_IOC_FREE, &free) >= 0 || errno != ENOTTY)
1543             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION_LEGACY;
1544          else
1545             device->kgsl_dma_type = TU_KGSL_DMA_TYPE_ION;
1546       } else {
1547          mesa_logw(
1548             "Unable to open neither %s nor %s, VK_KHR_external_memory_fd would be "
1549             "unavailable: %s",
1550             dma_heap_path, ion_path, strerror(errno));
1551       }
1552    }
1553 
1554    VkResult result = VK_ERROR_INITIALIZATION_FAILED;
1555 
1556    struct kgsl_devinfo info;
1557    if (get_kgsl_prop(fd, KGSL_PROP_DEVICE_INFO, &info, sizeof(info)))
1558       goto fail;
1559 
1560    uint64_t gmem_iova;
1561    if (get_kgsl_prop(fd, KGSL_PROP_UCHE_GMEM_VADDR, &gmem_iova, sizeof(gmem_iova)))
1562       goto fail;
1563 
1564    /* kgsl version check? */
1565 
1566    device->instance = instance;
1567    device->master_fd = -1;
1568    device->local_fd = fd;
1569    device->kgsl_dma_fd = dma_fd;
1570 
1571    device->dev_id.gpu_id =
1572       ((info.chip_id >> 24) & 0xff) * 100 +
1573       ((info.chip_id >> 16) & 0xff) * 10 +
1574       ((info.chip_id >>  8) & 0xff);
1575    device->dev_id.chip_id = info.chip_id;
1576    device->gmem_size = debug_get_num_option("TU_GMEM", info.gmem_sizebytes);
1577    device->gmem_base = gmem_iova;
1578 
1579    device->submitqueue_priority_count = 1;
1580 
1581    device->timeline_type = vk_sync_timeline_get_type(&vk_kgsl_sync_type);
1582 
1583    device->sync_types[0] = &vk_kgsl_sync_type;
1584    device->sync_types[1] = &device->timeline_type.sync;
1585    device->sync_types[2] = NULL;
1586 
1587    device->heap.size = tu_get_system_heap_size(device);
1588    device->heap.used = 0u;
1589    device->heap.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1590 
1591    device->has_set_iova = kgsl_is_memory_type_supported(
1592       fd, KGSL_MEMFLAGS_USE_CPU_MAP);
1593 
1594    /* Even if kernel is new enough, the GPU itself may not support it. */
1595    device->has_cached_coherent_memory = kgsl_is_memory_type_supported(
1596       fd, KGSL_MEMFLAGS_IOCOHERENT |
1597              (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT));
1598 
1599    instance->knl = &kgsl_knl_funcs;
1600 
1601    result = tu_physical_device_init(device, instance);
1602    if (result != VK_SUCCESS)
1603       goto fail;
1604 
1605    list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1606 
1607    return VK_SUCCESS;
1608 
1609 fail:
1610    vk_free(&instance->vk.alloc, device);
1611    close(fd);
1612    if (dma_fd >= 0)
1613       close(dma_fd);
1614    return result;
1615 }
1616