1 /*
2 * Copyright 2019 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <[email protected]>
25 */
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <pthread.h>
29 #include <stdio.h>
30 #include <xf86drm.h>
31
32 #include "pan_bo.h"
33 #include "pan_device.h"
34 #include "pan_util.h"
35 #include "wrap.h"
36
37 #include "util/os_mman.h"
38
39 #include "util/u_inlines.h"
40 #include "util/u_math.h"
41
42 /* This file implements a userspace BO cache. Allocating and freeing
43 * GPU-visible buffers is very expensive, and even the extra kernel roundtrips
44 * adds more work than we would like at this point. So caching BOs in userspace
45 * solves both of these problems and does not require kernel updates.
46 *
47 * Cached BOs are sorted into a bucket based on rounding their size down to the
48 * nearest power-of-two. Each bucket contains a linked list of free panfrost_bo
49 * objects. Putting a BO into the cache is accomplished by adding it to the
50 * corresponding bucket. Getting a BO from the cache consists of finding the
51 * appropriate bucket and sorting. A cache eviction is a kernel-level free of a
52 * BO and removing it from the bucket. We special case evicting all BOs from
53 * the cache, since that's what helpful in practice and avoids extra logic
54 * around the linked list.
55 */
56
57 static uint32_t
to_kmod_bo_flags(uint32_t flags)58 to_kmod_bo_flags(uint32_t flags)
59 {
60 uint32_t kmod_bo_flags = 0;
61
62 if (flags & PAN_BO_EXECUTE)
63 kmod_bo_flags |= PAN_KMOD_BO_FLAG_EXECUTABLE;
64 if (flags & PAN_BO_GROWABLE)
65 kmod_bo_flags |= PAN_KMOD_BO_FLAG_ALLOC_ON_FAULT;
66 if (flags & PAN_BO_INVISIBLE)
67 kmod_bo_flags |= PAN_KMOD_BO_FLAG_NO_MMAP;
68
69 return kmod_bo_flags;
70 }
71
72 static struct panfrost_bo *
panfrost_bo_alloc(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)73 panfrost_bo_alloc(struct panfrost_device *dev, size_t size, uint32_t flags,
74 const char *label)
75 {
76 struct pan_kmod_vm *exclusive_vm =
77 !(flags & PAN_BO_SHAREABLE) ? dev->kmod.vm : NULL;
78 struct pan_kmod_bo *kmod_bo;
79 struct panfrost_bo *bo;
80
81 kmod_bo = pan_kmod_bo_alloc(dev->kmod.dev, exclusive_vm, size,
82 to_kmod_bo_flags(flags));
83
84 if (kmod_bo == NULL)
85 goto err_alloc;
86
87 bo = pan_lookup_bo(dev, kmod_bo->handle);
88 assert(!memcmp(bo, &((struct panfrost_bo){0}), sizeof(*bo)));
89 bo->kmod_bo = kmod_bo;
90
91 struct pan_kmod_vm_op vm_op = {
92 .type = PAN_KMOD_VM_OP_TYPE_MAP,
93 .va =
94 {
95 .start = PAN_KMOD_VM_MAP_AUTO_VA,
96 .size = bo->kmod_bo->size,
97 },
98 .map =
99 {
100 .bo = bo->kmod_bo,
101 .bo_offset = 0,
102 },
103 };
104
105 int ret =
106 pan_kmod_vm_bind(dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, &vm_op, 1);
107
108 if (ret)
109 goto err_bind;
110
111 bo->ptr.gpu = vm_op.va.start;
112 bo->flags = flags;
113 bo->dev = dev;
114 bo->label = label;
115 return bo;
116 err_bind:
117 pan_kmod_bo_put(kmod_bo);
118 /* BO will be freed with the sparse array, but zero to indicate free */
119 memset(bo, 0, sizeof(*bo));
120 err_alloc:
121 return NULL;
122 }
123
124 static void
panfrost_bo_free(struct panfrost_bo * bo)125 panfrost_bo_free(struct panfrost_bo *bo)
126 {
127 struct pan_kmod_bo *kmod_bo = bo->kmod_bo;
128 struct pan_kmod_vm *vm = bo->dev->kmod.vm;
129 uint64_t gpu_va = bo->ptr.gpu;
130
131 /* BO will be freed with the sparse array, but zero to indicate free */
132 memset(bo, 0, sizeof(*bo));
133
134 struct pan_kmod_vm_op vm_op = {
135 .type = PAN_KMOD_VM_OP_TYPE_UNMAP,
136 .va =
137 {
138 .start = gpu_va,
139 .size = kmod_bo->size,
140 },
141 };
142
143 ASSERTED int ret = pan_kmod_vm_bind(
144 vm, PAN_KMOD_VM_OP_MODE_DEFER_TO_NEXT_IDLE_POINT, &vm_op, 1);
145 assert(!ret);
146
147 pan_kmod_bo_put(kmod_bo);
148 }
149
150 /* Returns true if the BO is ready, false otherwise.
151 * access_type is encoding the type of access one wants to ensure is done.
152 * Waiting is always done for writers, but if wait_readers is set then readers
153 * are also waited for.
154 */
155 bool
panfrost_bo_wait(struct panfrost_bo * bo,int64_t timeout_ns,bool wait_readers)156 panfrost_bo_wait(struct panfrost_bo *bo, int64_t timeout_ns, bool wait_readers)
157 {
158 /* If the BO has been exported or imported we can't rely on the cached
159 * state, we need to call the WAIT_BO ioctl.
160 */
161 if (!(bo->flags & PAN_BO_SHARED)) {
162 /* If ->gpu_access is 0, the BO is idle, no need to wait. */
163 if (!bo->gpu_access)
164 return true;
165
166 /* If the caller only wants to wait for writers and no
167 * writes are pending, we don't have to wait.
168 */
169 if (!wait_readers && !(bo->gpu_access & PAN_BO_ACCESS_WRITE))
170 return true;
171 }
172
173 if (pan_kmod_bo_wait(bo->kmod_bo, timeout_ns, !wait_readers)) {
174 /* Set gpu_access to 0 so that the next call to bo_wait()
175 * doesn't have to call the WAIT_BO ioctl.
176 */
177 bo->gpu_access = 0;
178 return true;
179 }
180
181 return false;
182 }
183
184 /* Helper to calculate the bucket index of a BO */
185
186 static unsigned
pan_bucket_index(unsigned size)187 pan_bucket_index(unsigned size)
188 {
189 /* Round down to POT to compute a bucket index */
190
191 unsigned bucket_index = util_logbase2(size);
192
193 /* Clamp the bucket index; all huge allocations will be
194 * sorted into the largest bucket */
195
196 bucket_index = CLAMP(bucket_index, MIN_BO_CACHE_BUCKET, MAX_BO_CACHE_BUCKET);
197
198 /* Reindex from 0 */
199 return (bucket_index - MIN_BO_CACHE_BUCKET);
200 }
201
202 static struct list_head *
pan_bucket(struct panfrost_device * dev,unsigned size)203 pan_bucket(struct panfrost_device *dev, unsigned size)
204 {
205 return &dev->bo_cache.buckets[pan_bucket_index(size)];
206 }
207
208 /* Tries to fetch a BO of sufficient size with the appropriate flags from the
209 * BO cache. If it succeeds, it returns that BO and removes the BO from the
210 * cache. If it fails, it returns NULL signaling the caller to allocate a new
211 * BO. */
212
213 static struct panfrost_bo *
panfrost_bo_cache_fetch(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label,bool dontwait)214 panfrost_bo_cache_fetch(struct panfrost_device *dev, size_t size,
215 uint32_t flags, const char *label, bool dontwait)
216 {
217 pthread_mutex_lock(&dev->bo_cache.lock);
218 struct list_head *bucket = pan_bucket(dev, size);
219 struct panfrost_bo *bo = NULL;
220
221 /* Iterate the bucket looking for something suitable */
222 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
223 if (panfrost_bo_size(entry) < size || entry->flags != flags)
224 continue;
225
226 /* If the oldest BO in the cache is busy, likely so is
227 * everything newer, so bail. */
228 if (!panfrost_bo_wait(entry, dontwait ? 0 : INT64_MAX, true))
229 break;
230
231 /* This one works, splice it out of the cache */
232 list_del(&entry->bucket_link);
233 list_del(&entry->lru_link);
234
235 if (!pan_kmod_bo_make_unevictable(entry->kmod_bo)) {
236 panfrost_bo_free(entry);
237 continue;
238 }
239 /* Let's go! */
240 bo = entry;
241 bo->label = label;
242 break;
243 }
244 pthread_mutex_unlock(&dev->bo_cache.lock);
245
246 return bo;
247 }
248
249 static void
panfrost_bo_cache_evict_stale_bos(struct panfrost_device * dev)250 panfrost_bo_cache_evict_stale_bos(struct panfrost_device *dev)
251 {
252 struct timespec time;
253
254 clock_gettime(CLOCK_MONOTONIC, &time);
255 list_for_each_entry_safe(struct panfrost_bo, entry, &dev->bo_cache.lru,
256 lru_link) {
257 /* We want all entries that have been used more than 1 sec
258 * ago to be dropped, others can be kept.
259 * Note the <= 2 check and not <= 1. It's here to account for
260 * the fact that we're only testing ->tv_sec, not ->tv_nsec.
261 * That means we might keep entries that are between 1 and 2
262 * seconds old, but we don't really care, as long as unused BOs
263 * are dropped at some point.
264 */
265 if (time.tv_sec - entry->last_used <= 2)
266 break;
267
268 list_del(&entry->bucket_link);
269 list_del(&entry->lru_link);
270 panfrost_bo_free(entry);
271 }
272 }
273
274 /* Tries to add a BO to the cache. Returns if it was
275 * successful */
276
277 static bool
panfrost_bo_cache_put(struct panfrost_bo * bo)278 panfrost_bo_cache_put(struct panfrost_bo *bo)
279 {
280 struct panfrost_device *dev = bo->dev;
281
282 if (bo->flags & PAN_BO_SHARED || dev->debug & PAN_DBG_NO_CACHE)
283 return false;
284
285 /* Must be first */
286 pthread_mutex_lock(&dev->bo_cache.lock);
287
288 struct list_head *bucket = pan_bucket(dev, MAX2(panfrost_bo_size(bo), 4096));
289 struct timespec time;
290
291 pan_kmod_bo_make_evictable(bo->kmod_bo);
292
293 /* Add us to the bucket */
294 list_addtail(&bo->bucket_link, bucket);
295
296 /* Add us to the LRU list and update the last_used field. */
297 list_addtail(&bo->lru_link, &dev->bo_cache.lru);
298 clock_gettime(CLOCK_MONOTONIC, &time);
299 bo->last_used = time.tv_sec;
300
301 /* Let's do some cleanup in the BO cache while we hold the
302 * lock.
303 */
304 panfrost_bo_cache_evict_stale_bos(dev);
305
306 /* Update the label to help debug BO cache memory usage issues */
307 bo->label = "Unused (BO cache)";
308
309 /* Must be last */
310 pthread_mutex_unlock(&dev->bo_cache.lock);
311 return true;
312 }
313
314 /* Evicts all BOs from the cache. Called during context
315 * destroy or during low-memory situations (to free up
316 * memory that may be unused by us just sitting in our
317 * cache, but still reserved from the perspective of the
318 * OS) */
319
320 void
panfrost_bo_cache_evict_all(struct panfrost_device * dev)321 panfrost_bo_cache_evict_all(struct panfrost_device *dev)
322 {
323 pthread_mutex_lock(&dev->bo_cache.lock);
324 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) {
325 struct list_head *bucket = &dev->bo_cache.buckets[i];
326
327 list_for_each_entry_safe(struct panfrost_bo, entry, bucket, bucket_link) {
328 list_del(&entry->bucket_link);
329 list_del(&entry->lru_link);
330 panfrost_bo_free(entry);
331 }
332 }
333 pthread_mutex_unlock(&dev->bo_cache.lock);
334 }
335
336 void
panfrost_bo_mmap(struct panfrost_bo * bo)337 panfrost_bo_mmap(struct panfrost_bo *bo)
338 {
339 if (bo->ptr.cpu)
340 return;
341
342 bo->ptr.cpu = pan_kmod_bo_mmap(bo->kmod_bo, 0, panfrost_bo_size(bo),
343 PROT_READ | PROT_WRITE, MAP_SHARED, NULL);
344 if (bo->ptr.cpu == MAP_FAILED) {
345 bo->ptr.cpu = NULL;
346 fprintf(stderr, "mmap failed: result=%p size=0x%llx\n", bo->ptr.cpu,
347 (long long)panfrost_bo_size(bo));
348 }
349 }
350
351 static void
panfrost_bo_munmap(struct panfrost_bo * bo)352 panfrost_bo_munmap(struct panfrost_bo *bo)
353 {
354 if (!bo->ptr.cpu)
355 return;
356
357 if (os_munmap((void *)(uintptr_t)bo->ptr.cpu, panfrost_bo_size(bo))) {
358 perror("munmap");
359 abort();
360 }
361
362 bo->ptr.cpu = NULL;
363 }
364
365 struct panfrost_bo *
panfrost_bo_create(struct panfrost_device * dev,size_t size,uint32_t flags,const char * label)366 panfrost_bo_create(struct panfrost_device *dev, size_t size, uint32_t flags,
367 const char *label)
368 {
369 struct panfrost_bo *bo;
370
371 if (dev->debug & PAN_DBG_DUMP) {
372 /* Make sure to CPU-map all BOs except growable ones, so that
373 we can dump them when PAN_MESA_DEBUG=dump. */
374 if (!(flags & PAN_BO_GROWABLE)) {
375 flags &= ~PAN_BO_INVISIBLE;
376 }
377 flags &= ~PAN_BO_DELAY_MMAP;
378 }
379 /* Kernel will fail (confusingly) with EPERM otherwise */
380 assert(size > 0);
381
382 /* To maximize BO cache usage, don't allocate tiny BOs */
383 size = ALIGN_POT(size, 4096);
384
385 /* GROWABLE BOs cannot be mmapped */
386 if (flags & PAN_BO_GROWABLE)
387 assert(flags & PAN_BO_INVISIBLE);
388
389 /* Ideally, we get a BO that's ready in the cache, or allocate a fresh
390 * BO. If allocation fails, we can try waiting for something in the
391 * cache. But if there's no nothing suitable, we should flush the cache
392 * to make space for the new allocation.
393 */
394 bo = panfrost_bo_cache_fetch(dev, size, flags, label, true);
395 if (!bo)
396 bo = panfrost_bo_alloc(dev, size, flags, label);
397 if (!bo)
398 bo = panfrost_bo_cache_fetch(dev, size, flags, label, false);
399 if (!bo) {
400 panfrost_bo_cache_evict_all(dev);
401 bo = panfrost_bo_alloc(dev, size, flags, label);
402 }
403
404 if (!bo)
405 return NULL;
406
407 /* Only mmap now if we know we need to. For CPU-invisible buffers, we
408 * never map since we don't care about their contents; they're purely
409 * for GPU-internal use. But we do trace them anyway. */
410
411 if (!(flags & (PAN_BO_INVISIBLE | PAN_BO_DELAY_MMAP)))
412 panfrost_bo_mmap(bo);
413
414 p_atomic_set(&bo->refcnt, 1);
415
416 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
417 if (flags & PAN_BO_INVISIBLE)
418 pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, NULL,
419 panfrost_bo_size(bo), NULL);
420 else if (!(flags & PAN_BO_DELAY_MMAP))
421 pandecode_inject_mmap(dev->decode_ctx, bo->ptr.gpu, bo->ptr.cpu,
422 panfrost_bo_size(bo), NULL);
423 }
424
425 return bo;
426 }
427
428 void
panfrost_bo_reference(struct panfrost_bo * bo)429 panfrost_bo_reference(struct panfrost_bo *bo)
430 {
431 if (bo) {
432 ASSERTED int count = p_atomic_inc_return(&bo->refcnt);
433 assert(count != 1);
434 }
435 }
436
437 void
panfrost_bo_unreference(struct panfrost_bo * bo)438 panfrost_bo_unreference(struct panfrost_bo *bo)
439 {
440 if (!bo)
441 return;
442
443 /* Don't return to cache if there are still references */
444 assert(p_atomic_read(&bo->refcnt) > 0);
445 if (p_atomic_dec_return(&bo->refcnt))
446 return;
447
448 struct panfrost_device *dev = bo->dev;
449
450 pthread_mutex_lock(&dev->bo_map_lock);
451
452 /* Someone might have imported this BO while we were waiting for the
453 * lock, let's make sure it's still not referenced before freeing it.
454 */
455 if (p_atomic_read(&bo->refcnt) == 0) {
456 /* When the reference count goes to zero, we need to cleanup */
457 panfrost_bo_munmap(bo);
458
459 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
460 pandecode_inject_free(dev->decode_ctx, bo->ptr.gpu,
461 panfrost_bo_size(bo));
462
463 /* Rather than freeing the BO now, we'll cache the BO for later
464 * allocations if we're allowed to.
465 */
466 if (!panfrost_bo_cache_put(bo))
467 panfrost_bo_free(bo);
468 }
469 pthread_mutex_unlock(&dev->bo_map_lock);
470 }
471
472 struct panfrost_bo *
panfrost_bo_import(struct panfrost_device * dev,int fd)473 panfrost_bo_import(struct panfrost_device *dev, int fd)
474 {
475 struct panfrost_bo *bo;
476 ASSERTED int ret;
477 unsigned gem_handle;
478
479 pthread_mutex_lock(&dev->bo_map_lock);
480 ret = drmPrimeFDToHandle(dev->kmod.dev->fd, fd, &gem_handle);
481 assert(!ret);
482
483 bo = pan_lookup_bo(dev, gem_handle);
484
485 if (!bo->dev) {
486 bo->dev = dev;
487 bo->kmod_bo = pan_kmod_bo_import(dev->kmod.dev, fd, 0);
488
489 struct pan_kmod_vm_op vm_op = {
490 .type = PAN_KMOD_VM_OP_TYPE_MAP,
491 .va =
492 {
493 .start = PAN_KMOD_VM_MAP_AUTO_VA,
494 .size = bo->kmod_bo->size,
495 },
496 .map =
497 {
498 .bo = bo->kmod_bo,
499 .bo_offset = 0,
500 },
501 };
502
503 ASSERTED int ret = pan_kmod_vm_bind(
504 dev->kmod.vm, PAN_KMOD_VM_OP_MODE_IMMEDIATE, &vm_op, 1);
505 assert(!ret);
506
507 bo->ptr.gpu = vm_op.va.start;
508 bo->flags = PAN_BO_SHARED;
509 p_atomic_set(&bo->refcnt, 1);
510
511 /* mmap imported BOs when PAN_MESA_DEBUG=dump */
512 if (dev->debug & PAN_DBG_DUMP)
513 panfrost_bo_mmap(bo);
514 } else {
515 /* bo->refcnt == 0 can happen if the BO
516 * was being released but panfrost_bo_import() acquired the
517 * lock before panfrost_bo_unreference(). In that case, refcnt
518 * is 0 and we can't use panfrost_bo_reference() directly, we
519 * have to re-initialize the refcnt().
520 * Note that panfrost_bo_unreference() checks
521 * refcnt value just after acquiring the lock to
522 * make sure the object is not freed if panfrost_bo_import()
523 * acquired it in the meantime.
524 */
525 if (p_atomic_read(&bo->refcnt) == 0)
526 p_atomic_set(&bo->refcnt, 1);
527 else
528 panfrost_bo_reference(bo);
529 }
530 pthread_mutex_unlock(&dev->bo_map_lock);
531
532 return bo;
533 }
534
535 int
panfrost_bo_export(struct panfrost_bo * bo)536 panfrost_bo_export(struct panfrost_bo *bo)
537 {
538 int ret = pan_kmod_bo_export(bo->kmod_bo);
539 if (ret >= 0)
540 bo->flags |= PAN_BO_SHARED;
541
542 return ret;
543 }
544
545 struct panfrost_bo *
panfrost_bo_from_kmod_bo(struct panfrost_device * dev,struct pan_kmod_bo * kmod_bo)546 panfrost_bo_from_kmod_bo(struct panfrost_device *dev,
547 struct pan_kmod_bo *kmod_bo)
548 {
549 if (!kmod_bo)
550 return NULL;
551
552 struct panfrost_bo *bo = pan_lookup_bo(dev, pan_kmod_bo_handle(kmod_bo));
553 assert(bo->kmod_bo == kmod_bo);
554
555 return bo;
556 }
557