xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/iris_bufmgr.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_bufmgr.c
25  *
26  * The Iris buffer manager.
27  *
28  * XXX: write better comments
29  * - BOs
30  * - Explain BO cache
31  * - main interface to GEM in the kernel
32  */
33 
34 #include <util/u_atomic.h>
35 #include <fcntl.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #include <assert.h>
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/types.h>
45 #include <stdbool.h>
46 #include <time.h>
47 #include <unistd.h>
48 
49 #include "errno.h"
50 #include "common/intel_aux_map.h"
51 #include "common/intel_mem.h"
52 #include "c99_alloca.h"
53 #include "dev/intel_debug.h"
54 #include "common/intel_common.h"
55 #include "common/intel_gem.h"
56 #include "dev/intel_device_info.h"
57 #include "drm-uapi/dma-buf.h"
58 #include "isl/isl.h"
59 #include "util/os_mman.h"
60 #include "util/u_debug.h"
61 #include "util/macros.h"
62 #include "util/hash_table.h"
63 #include "util/list.h"
64 #include "util/os_file.h"
65 #include "util/u_dynarray.h"
66 #include "util/vma.h"
67 #include "iris_bufmgr.h"
68 #include "iris_context.h"
69 #include "string.h"
70 #include "iris_kmd_backend.h"
71 #include "i915/iris_bufmgr.h"
72 #include "xe/iris_bufmgr.h"
73 
74 #include <xf86drm.h>
75 
76 #ifdef HAVE_VALGRIND
77 #include <valgrind.h>
78 #include <memcheck.h>
79 #define VG(x) x
80 #else
81 #define VG(x)
82 #endif
83 
84 /* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier
85  * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is
86  * leaked. All because it does not call VG(cli_free) from its
87  * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like
88  * and allocation, we mark it available for use upon mmapping and remove
89  * it upon unmapping.
90  */
91 #define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size))
92 #define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size))
93 
94 /* On FreeBSD PAGE_SIZE is already defined in
95  * /usr/include/machine/param.h that is indirectly
96  * included here.
97  */
98 #ifndef PAGE_SIZE
99 #define PAGE_SIZE 4096
100 #endif
101 
102 #define WARN_ONCE(cond, fmt...) do {                            \
103    if (unlikely(cond)) {                                        \
104       static bool _warned = false;                              \
105       if (!_warned) {                                           \
106          fprintf(stderr, "WARNING: ");                          \
107          fprintf(stderr, fmt);                                  \
108          _warned = true;                                        \
109       }                                                         \
110    }                                                            \
111 } while (0)
112 
113 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
114 
115 /**
116  * For debugging purposes, this returns a time in seconds.
117  */
118 static double
get_time(void)119 get_time(void)
120 {
121    struct timespec tp;
122 
123    clock_gettime(CLOCK_MONOTONIC, &tp);
124 
125    return tp.tv_sec + tp.tv_nsec / 1000000000.0;
126 }
127 
128 static inline int
atomic_add_unless(int * v,int add,int unless)129 atomic_add_unless(int *v, int add, int unless)
130 {
131    int c, old;
132    c = p_atomic_read(v);
133    while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c)
134       c = old;
135    return c == unless;
136 }
137 
138 static const char *
memzone_name(enum iris_memory_zone memzone)139 memzone_name(enum iris_memory_zone memzone)
140 {
141    const char *names[] = {
142       [IRIS_MEMZONE_SHADER]   = "shader",
143       [IRIS_MEMZONE_BINDER]   = "binder",
144       [IRIS_MEMZONE_SCRATCH]  = "scratchsurf",
145       [IRIS_MEMZONE_SURFACE]  = "surface",
146       [IRIS_MEMZONE_DYNAMIC]  = "dynamic",
147       [IRIS_MEMZONE_OTHER]    = "other",
148       [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor",
149    };
150    assert(memzone < ARRAY_SIZE(names));
151    return names[memzone];
152 }
153 
154 struct bo_cache_bucket {
155    /** List of cached BOs. */
156    struct list_head head;
157 
158    /** Size of this bucket, in bytes. */
159    uint64_t size;
160 };
161 
162 struct bo_export {
163    /** File descriptor associated with a handle export. */
164    int drm_fd;
165 
166    /** GEM handle in drm_fd */
167    uint32_t gem_handle;
168 
169    struct list_head link;
170 };
171 
172 struct iris_memregion {
173    struct intel_memory_class_instance *region;
174    uint64_t size;
175 };
176 
177 #define NUM_SLAB_ALLOCATORS 3
178 
179 struct iris_slab {
180    struct pb_slab base;
181 
182    /** The BO representing the entire slab */
183    struct iris_bo *bo;
184 
185    /** Array of iris_bo structs representing BOs allocated out of this slab */
186    struct iris_bo *entries;
187 };
188 
189 #define BUCKET_ARRAY_SIZE (14 * 4)
190 
191 struct iris_bucket_cache {
192    struct bo_cache_bucket bucket[BUCKET_ARRAY_SIZE];
193    int num_buckets;
194 };
195 
196 struct iris_bufmgr {
197    /**
198     * List into the list of bufmgr.
199     */
200    struct list_head link;
201 
202    uint32_t refcount;
203 
204    int fd;
205 
206    simple_mtx_t lock;
207    simple_mtx_t bo_deps_lock;
208 
209    /** Array of lists of cached gem objects of power-of-two sizes */
210    struct iris_bucket_cache *bucket_cache;
211 
212    time_t time;
213 
214    struct hash_table *name_table;
215    struct hash_table *handle_table;
216 
217    /**
218     * List of BOs which we've effectively freed, but are hanging on to
219     * until they're idle before closing and returning the VMA.
220     */
221    struct list_head zombie_list;
222 
223    struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT];
224 
225    struct iris_memregion vram, sys;
226 
227    /* Used only when use_global_vm is true. */
228    uint32_t global_vm_id;
229 
230    int next_screen_id;
231 
232    struct intel_device_info devinfo;
233    const struct iris_kmd_backend *kmd_backend;
234    struct intel_bind_timeline bind_timeline; /* Xe only */
235    bool bo_reuse:1;
236    bool use_global_vm:1;
237 
238    struct intel_aux_map_context *aux_map_ctx;
239 
240    struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS];
241 
242    struct iris_border_color_pool border_color_pool;
243 
244    struct iris_bo *dummy_aux_bo;
245 };
246 
247 static simple_mtx_t global_bufmgr_list_mutex = SIMPLE_MTX_INITIALIZER;
248 static struct list_head global_bufmgr_list = {
249    .next = &global_bufmgr_list,
250    .prev = &global_bufmgr_list,
251 };
252 
253 static void bo_free(struct iris_bo *bo);
254 
255 static struct iris_bo *
find_and_ref_external_bo(struct hash_table * ht,unsigned int key)256 find_and_ref_external_bo(struct hash_table *ht, unsigned int key)
257 {
258    struct hash_entry *entry = _mesa_hash_table_search(ht, &key);
259    struct iris_bo *bo = entry ? entry->data : NULL;
260 
261    if (bo) {
262       assert(iris_bo_is_external(bo));
263       assert(iris_bo_is_real(bo));
264       assert(!bo->real.reusable);
265 
266       /* Being non-reusable, the BO cannot be in the cache lists, but it
267        * may be in the zombie list if it had reached zero references, but
268        * we hadn't yet closed it...and then reimported the same BO.  If it
269        * is, then remove it since it's now been resurrected.
270        */
271       if (list_is_linked(&bo->head))
272          list_del(&bo->head);
273 
274       iris_bo_reference(bo);
275    }
276 
277    return bo;
278 }
279 
280 /**
281  * This function finds the correct bucket fit for the input size.
282  * The function works with O(1) complexity when the requested size
283  * was queried instead of iterating the size through all the buckets.
284  */
285 static struct bo_cache_bucket *
bucket_for_size(struct iris_bufmgr * bufmgr,uint64_t size,enum iris_heap heap,unsigned flags)286 bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size,
287                 enum iris_heap heap, unsigned flags)
288 {
289    if (flags & BO_ALLOC_PROTECTED)
290       return NULL;
291 
292    /* TODO: Enable bo cache for compressed bos
293     * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11362
294     */
295    if (bufmgr->devinfo.verx10 == 200 && (flags & BO_ALLOC_COMPRESSED))
296       return NULL;
297 
298    const struct intel_device_info *devinfo = &bufmgr->devinfo;
299    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
300 
301    if (devinfo->kmd_type == INTEL_KMD_TYPE_XE &&
302        (flags & (BO_ALLOC_SHARED | BO_ALLOC_SCANOUT)))
303       return NULL;
304 
305    /* Calculating the pages and rounding up to the page size. */
306    const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
307 
308    /* Row  Bucket sizes    clz((x-1) | 3)   Row    Column
309     *        in pages                      stride   size
310     *   0:   1  2  3  4 -> 30 30 30 30        4       1
311     *   1:   5  6  7  8 -> 29 29 29 29        4       1
312     *   2:  10 12 14 16 -> 28 28 28 28        8       2
313     *   3:  20 24 28 32 -> 27 27 27 27       16       4
314     */
315    const unsigned row = 30 - __builtin_clz((pages - 1) | 3);
316    const unsigned row_max_pages = 4 << row;
317 
318    /* The '& ~2' is the special case for row 1. In row 1, max pages /
319     * 2 is 2, but the previous row maximum is zero (because there is
320     * no previous row). All row maximum sizes are power of 2, so that
321     * is the only case where that bit will be set.
322     */
323    const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2;
324    int col_size_log2 = row - 1;
325    col_size_log2 += (col_size_log2 < 0);
326 
327    const unsigned col = (pages - prev_row_max_pages +
328                         ((1 << col_size_log2) - 1)) >> col_size_log2;
329 
330    /* Calculating the index based on the row and column. */
331    const unsigned index = (row * 4) + (col - 1);
332 
333    return (index < cache->num_buckets) ? &cache->bucket[index] : NULL;
334 }
335 
336 enum iris_memory_zone
iris_memzone_for_address(uint64_t address)337 iris_memzone_for_address(uint64_t address)
338 {
339    STATIC_ASSERT(IRIS_MEMZONE_OTHER_START    > IRIS_MEMZONE_DYNAMIC_START);
340    STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START  > IRIS_MEMZONE_SCRATCH_START);
341    STATIC_ASSERT(IRIS_MEMZONE_SCRATCH_START == IRIS_MEMZONE_BINDER_START);
342    STATIC_ASSERT(IRIS_MEMZONE_BINDER_START   > IRIS_MEMZONE_SHADER_START);
343    STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START  > IRIS_MEMZONE_SURFACE_START);
344    STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START);
345 
346    if (address >= IRIS_MEMZONE_OTHER_START)
347       return IRIS_MEMZONE_OTHER;
348 
349    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
350       return IRIS_MEMZONE_BORDER_COLOR_POOL;
351 
352    if (address > IRIS_MEMZONE_DYNAMIC_START)
353       return IRIS_MEMZONE_DYNAMIC;
354 
355    if (address >= IRIS_MEMZONE_SURFACE_START)
356       return IRIS_MEMZONE_SURFACE;
357 
358    if (address >= (IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE))
359       return IRIS_MEMZONE_BINDER;
360 
361    if (address >= IRIS_MEMZONE_SCRATCH_START)
362       return IRIS_MEMZONE_SCRATCH;
363 
364    return IRIS_MEMZONE_SHADER;
365 }
366 
367 /**
368  * Allocate a section of virtual memory for a buffer, assigning an address.
369  *
370  * This uses either the bucket allocator for the given size, or the large
371  * object allocator (util_vma).
372  */
373 static uint64_t
vma_alloc(struct iris_bufmgr * bufmgr,enum iris_memory_zone memzone,uint64_t size,uint64_t alignment)374 vma_alloc(struct iris_bufmgr *bufmgr,
375           enum iris_memory_zone memzone,
376           uint64_t size,
377           uint64_t alignment)
378 {
379    simple_mtx_assert_locked(&bufmgr->lock);
380 
381    const unsigned _2mb = 2 * 1024 * 1024;
382 
383    /* Force minimum alignment based on device requirements */
384    assert((alignment & (alignment - 1)) == 0);
385    alignment = MAX2(alignment, bufmgr->devinfo.mem_alignment);
386 
387    /* If the allocation is a multiple of 2MB, ensure the virtual address is
388     * aligned to 2MB, so that it's possible for the kernel to use 64K pages.
389     */
390    if (size % _2mb == 0)
391       alignment = MAX2(alignment, _2mb);
392 
393    if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL)
394       return IRIS_BORDER_COLOR_POOL_ADDRESS;
395 
396    uint64_t addr =
397       util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment);
398 
399    assert((addr >> 48ull) == 0);
400    assert((addr % alignment) == 0);
401 
402    return intel_canonical_address(addr);
403 }
404 
405 static void
vma_free(struct iris_bufmgr * bufmgr,uint64_t address,uint64_t size)406 vma_free(struct iris_bufmgr *bufmgr,
407          uint64_t address,
408          uint64_t size)
409 {
410    simple_mtx_assert_locked(&bufmgr->lock);
411 
412    if (address == IRIS_BORDER_COLOR_POOL_ADDRESS)
413       return;
414 
415    /* Un-canonicalize the address. */
416    address = intel_48b_address(address);
417 
418    if (address == 0ull)
419       return;
420 
421    enum iris_memory_zone memzone = iris_memzone_for_address(address);
422 
423    assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator));
424 
425    util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size);
426 }
427 
428 /* Exports a BO's implicit synchronization state to a drm_syncobj, returning
429  * its wrapping iris_syncobj. The drm_syncobj is created new and has to be
430  * destroyed by the caller after the execbuf ioctl.
431  */
432 struct iris_syncobj *
iris_bo_export_sync_state(struct iris_bo * bo)433 iris_bo_export_sync_state(struct iris_bo *bo)
434 {
435    struct iris_bufmgr *bufmgr = bo->bufmgr;
436    int drm_fd = iris_bufmgr_get_fd(bufmgr);
437 
438    struct iris_syncobj *iris_syncobj = iris_create_syncobj(bufmgr);
439 
440    struct dma_buf_export_sync_file export_sync_file_ioctl = {
441       .flags = DMA_BUF_SYNC_RW, /* TODO */
442       .fd = -1,
443    };
444    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_EXPORT_SYNC_FILE,
445                    &export_sync_file_ioctl)) {
446       fprintf(stderr, "DMA_BUF_IOCTL_EXPORT_SYNC_FILE ioctl failed (%d)\n",
447               errno);
448       goto error_export;
449    }
450 
451    int sync_file_fd = export_sync_file_ioctl.fd;
452    assert(sync_file_fd >= 0);
453 
454    struct drm_syncobj_handle syncobj_import_ioctl = {
455       .handle = iris_syncobj->handle,
456       .flags = DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE,
457       .fd = sync_file_fd,
458    };
459    if (intel_ioctl(drm_fd, DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE,
460                    &syncobj_import_ioctl)) {
461       fprintf(stderr, "DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE ioctl failed (%d)\n",
462               errno);
463    }
464 
465    close(sync_file_fd);
466 
467    return iris_syncobj;
468 error_export:
469    iris_syncobj_destroy(bufmgr, iris_syncobj);
470    return NULL;
471 }
472 
473 /* Import the state of a sync_file_fd (which we should have gotten from
474  * batch_syncobj_to_sync_file_fd) into a BO as its implicit synchronization
475  * state.
476  */
477 void
iris_bo_import_sync_state(struct iris_bo * bo,int sync_file_fd)478 iris_bo_import_sync_state(struct iris_bo *bo, int sync_file_fd)
479 {
480    struct dma_buf_import_sync_file import_sync_file_ioctl = {
481       .flags = DMA_BUF_SYNC_WRITE,
482       .fd = sync_file_fd,
483    };
484    if (intel_ioctl(bo->real.prime_fd, DMA_BUF_IOCTL_IMPORT_SYNC_FILE,
485                    &import_sync_file_ioctl))
486       fprintf(stderr, "DMA_BUF_IOCTL_IMPORT_SYNC_FILE ioctl failed (%d)\n",
487               errno);
488 }
489 
490 /* A timeout of 0 just checks for busyness. */
491 static int
iris_bo_wait_syncobj(struct iris_bo * bo,int64_t timeout_ns)492 iris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns)
493 {
494    int ret = 0;
495    struct iris_bufmgr *bufmgr = bo->bufmgr;
496    const bool is_external = iris_bo_is_real(bo) && bo->real.prime_fd != -1;
497    struct iris_syncobj *external_implicit_syncobj = NULL;
498 
499    /* If we know it's idle, don't bother with the kernel round trip.
500     * Can't do that for Xe KMD with external BOs since we have to check the
501     * implicit synchronization information.
502     */
503    if (!is_external && bo->idle)
504       return 0;
505 
506    simple_mtx_lock(&bufmgr->bo_deps_lock);
507 
508    const int handles_len = bo->deps_size * IRIS_BATCH_COUNT * 2 + is_external;
509    uint32_t *handles = handles_len <= 32 ?
510                         (uint32_t *)alloca(handles_len * sizeof(*handles)) :
511                         (uint32_t *)malloc(handles_len * sizeof(*handles));
512    int handle_count = 0;
513 
514    if (is_external) {
515       external_implicit_syncobj = iris_bo_export_sync_state(bo);
516       if (external_implicit_syncobj)
517          handles[handle_count++] = external_implicit_syncobj->handle;
518    }
519 
520    for (int d = 0; d < bo->deps_size; d++) {
521       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
522          struct iris_syncobj *r = bo->deps[d].read_syncobjs[b];
523          struct iris_syncobj *w = bo->deps[d].write_syncobjs[b];
524          if (r)
525             handles[handle_count++] = r->handle;
526          if (w)
527             handles[handle_count++] = w->handle;
528       }
529    }
530 
531    if (handle_count == 0)
532       goto out;
533 
534    /* Unlike the gem wait, negative values are not infinite here. */
535    int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns);
536    if (timeout_abs < 0)
537       timeout_abs = INT64_MAX;
538 
539    struct drm_syncobj_wait args = {
540       .handles = (uintptr_t) handles,
541       .timeout_nsec = timeout_abs,
542       .count_handles = handle_count,
543       .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
544    };
545 
546    ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args);
547    if (ret != 0) {
548       ret = -errno;
549       goto out;
550    }
551 
552    /* We just waited everything, so clean all the deps. */
553    for (int d = 0; d < bo->deps_size; d++) {
554       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
555          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
556          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
557       }
558    }
559 
560 out:
561    if (handles_len > 32)
562       free(handles);
563    if (external_implicit_syncobj)
564       iris_syncobj_reference(bufmgr, &external_implicit_syncobj, NULL);
565 
566    simple_mtx_unlock(&bufmgr->bo_deps_lock);
567    return ret;
568 }
569 
570 static bool
iris_bo_busy_syncobj(struct iris_bo * bo)571 iris_bo_busy_syncobj(struct iris_bo *bo)
572 {
573    return iris_bo_wait_syncobj(bo, 0) == -ETIME;
574 }
575 
576 bool
iris_bo_busy(struct iris_bo * bo)577 iris_bo_busy(struct iris_bo *bo)
578 {
579    bool busy;
580 
581    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
582    case INTEL_KMD_TYPE_I915:
583       if (iris_bo_is_external(bo))
584          busy = iris_i915_bo_busy_gem(bo);
585       else
586          busy = iris_bo_busy_syncobj(bo);
587       break;
588    case INTEL_KMD_TYPE_XE:
589       busy = iris_bo_busy_syncobj(bo);
590       break;
591    default:
592       unreachable("missing");
593       busy = true;
594    }
595 
596    bo->idle = !busy;
597 
598    return busy;
599 }
600 
601 /**
602  * Specify the volatility of the buffer.
603  * \param bo Buffer to create a name for
604  * \param state The purgeable status
605  *
606  * Use IRIS_MADVICE_DONT_NEED to mark the buffer as purgeable, and it will be
607  * reclaimed under memory pressure. If you subsequently require the buffer,
608  * then you must pass IRIS_MADVICE_WILL_NEED to mark the buffer as required.
609  *
610  * Returns true if the buffer was retained, or false if it was discarded
611  * whilst marked as IRIS_MADVICE_DONT_NEED.
612  */
613 static inline bool
iris_bo_madvise(struct iris_bo * bo,enum iris_madvice state)614 iris_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
615 {
616    /* We can't madvise suballocated BOs. */
617    assert(iris_bo_is_real(bo));
618 
619    return bo->bufmgr->kmd_backend->bo_madvise(bo, state);
620 }
621 
622 static struct iris_bo *
bo_calloc(void)623 bo_calloc(void)
624 {
625    struct iris_bo *bo = calloc(1, sizeof(*bo));
626    if (!bo)
627       return NULL;
628 
629    list_inithead(&bo->real.exports);
630 
631    bo->hash = _mesa_hash_pointer(bo);
632 
633    return bo;
634 }
635 
636 static void
bo_unmap(struct iris_bo * bo)637 bo_unmap(struct iris_bo *bo)
638 {
639    assert(iris_bo_is_real(bo));
640 
641    VG_NOACCESS(bo->real.map, bo->size);
642    os_munmap(bo->real.map, bo->size);
643    bo->real.map = NULL;
644 }
645 
646 static struct pb_slabs *
get_slabs(struct iris_bufmgr * bufmgr,uint64_t size)647 get_slabs(struct iris_bufmgr *bufmgr, uint64_t size)
648 {
649    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
650       struct pb_slabs *slabs = &bufmgr->bo_slabs[i];
651 
652       if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1))
653          return slabs;
654    }
655 
656    unreachable("should have found a valid slab for this size");
657 }
658 
659 /* Return the power of two size of a slab entry matching the input size. */
660 static unsigned
get_slab_pot_entry_size(struct iris_bufmgr * bufmgr,unsigned size)661 get_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size)
662 {
663    unsigned entry_size = util_next_power_of_two(size);
664    unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order;
665 
666    return MAX2(entry_size, min_entry_size);
667 }
668 
669 /* Return the slab entry alignment. */
670 static unsigned
get_slab_entry_alignment(struct iris_bufmgr * bufmgr,unsigned size)671 get_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size)
672 {
673    unsigned entry_size = get_slab_pot_entry_size(bufmgr, size);
674 
675    if (size <= entry_size * 3 / 4)
676       return entry_size / 4;
677 
678    return entry_size;
679 }
680 
681 static bool
iris_can_reclaim_slab(void * priv,struct pb_slab_entry * entry)682 iris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry)
683 {
684    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
685 
686    return !iris_bo_busy(bo);
687 }
688 
689 static void
iris_slab_free(void * priv,struct pb_slab * pslab)690 iris_slab_free(void *priv, struct pb_slab *pslab)
691 {
692    struct iris_bufmgr *bufmgr = priv;
693    struct iris_slab *slab = (void *) pslab;
694    struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx;
695 
696    assert(!slab->bo->aux_map_address);
697 
698    /* Since we're freeing the whole slab, all buffers allocated out of it
699     * must be reclaimable.  We require buffers to be idle to be reclaimed
700     * (see iris_can_reclaim_slab()), so we know all entries must be idle.
701     * Therefore, we can safely unmap their aux table entries.
702     */
703    for (unsigned i = 0; i < pslab->num_entries; i++) {
704       struct iris_bo *bo = &slab->entries[i];
705       if (aux_map_ctx && bo->aux_map_address) {
706          intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size);
707          bo->aux_map_address = 0;
708       }
709 
710       /* Unref read/write dependency syncobjs and free the array. */
711       for (int d = 0; d < bo->deps_size; d++) {
712          for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
713             iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
714             iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
715          }
716       }
717       free(bo->deps);
718    }
719 
720    iris_bo_unreference(slab->bo);
721 
722    free(slab->entries);
723    free(slab);
724 }
725 
726 static struct pb_slab *
iris_slab_alloc(void * priv,unsigned heap,unsigned entry_size,unsigned group_index)727 iris_slab_alloc(void *priv,
728                 unsigned heap,
729                 unsigned entry_size,
730                 unsigned group_index)
731 {
732    struct iris_bufmgr *bufmgr = priv;
733    struct iris_slab *slab = calloc(1, sizeof(struct iris_slab));
734    uint32_t flags = BO_ALLOC_NO_SUBALLOC;
735    unsigned slab_size = 0;
736    /* We only support slab allocation for IRIS_MEMZONE_OTHER */
737    enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER;
738 
739    if (!slab)
740       return NULL;
741 
742    struct pb_slabs *slabs = bufmgr->bo_slabs;
743 
744    /* Determine the slab buffer size. */
745    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
746       unsigned max_entry_size =
747          1 << (slabs[i].min_order + slabs[i].num_orders - 1);
748 
749       if (entry_size <= max_entry_size) {
750          /* The slab size is twice the size of the largest possible entry. */
751          slab_size = max_entry_size * 2;
752 
753          if (!util_is_power_of_two_nonzero(entry_size)) {
754             assert(util_is_power_of_two_nonzero(entry_size * 4 / 3));
755 
756             /* If the entry size is 3/4 of a power of two, we would waste
757              * space and not gain anything if we allocated only twice the
758              * power of two for the backing buffer:
759              *
760              *    2 * 3/4 = 1.5 usable with buffer size 2
761              *
762              * Allocating 5 times the entry size leads us to the next power
763              * of two and results in a much better memory utilization:
764              *
765              *    5 * 3/4 = 3.75 usable with buffer size 4
766              */
767             if (entry_size * 5 > slab_size)
768                slab_size = util_next_power_of_two(entry_size * 5);
769          }
770 
771          /* The largest slab should have the same size as the PTE fragment
772           * size to get faster address translation.
773           *
774           * TODO: move this to intel_device_info?
775           */
776          const unsigned pte_size = 2 * 1024 * 1024;
777 
778          if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size)
779             slab_size = pte_size;
780 
781          break;
782       }
783    }
784    assert(slab_size != 0);
785 
786    switch (heap) {
787    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
788    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
789       flags |= BO_ALLOC_COMPRESSED;
790       break;
791    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
792    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
793       flags |= BO_ALLOC_SMEM;
794       break;
795    case IRIS_HEAP_DEVICE_LOCAL:
796       flags |= BO_ALLOC_LMEM;
797       break;
798    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
799       flags |= BO_ALLOC_LMEM | BO_ALLOC_CPU_VISIBLE;
800       break;
801    default:
802       flags |= BO_ALLOC_PLAIN;
803    }
804 
805    slab->bo =
806       iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags);
807    if (!slab->bo)
808       goto fail;
809 
810    slab_size = slab->bo->size;
811 
812    slab->base.num_entries = slab_size / entry_size;
813    slab->base.num_free = slab->base.num_entries;
814    slab->base.group_index = group_index;
815    slab->base.entry_size = entry_size;
816    slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries));
817    if (!slab->entries)
818       goto fail_bo;
819 
820    list_inithead(&slab->base.free);
821 
822    for (unsigned i = 0; i < slab->base.num_entries; i++) {
823       struct iris_bo *bo = &slab->entries[i];
824 
825       bo->size = entry_size;
826       bo->bufmgr = bufmgr;
827       bo->hash = _mesa_hash_pointer(bo);
828       bo->gem_handle = 0;
829       bo->address = intel_canonical_address(slab->bo->address + i * entry_size);
830       bo->aux_map_address = 0;
831       bo->index = -1;
832       bo->refcount = 0;
833       bo->idle = true;
834       bo->zeroed = slab->bo->zeroed;
835 
836       bo->slab.entry.slab = &slab->base;
837 
838       bo->slab.real = iris_get_backing_bo(slab->bo);
839 
840       list_addtail(&bo->slab.entry.head, &slab->base.free);
841    }
842 
843    return &slab->base;
844 
845 fail_bo:
846    iris_bo_unreference(slab->bo);
847 fail:
848    free(slab);
849    return NULL;
850 }
851 
852 /**
853  * Selects a heap for the given buffer allocation flags.
854  *
855  * This determines the cacheability, coherency, and mmap mode settings.
856  */
857 static enum iris_heap
flags_to_heap(struct iris_bufmgr * bufmgr,unsigned flags)858 flags_to_heap(struct iris_bufmgr *bufmgr, unsigned flags)
859 {
860    const struct intel_device_info *devinfo = &bufmgr->devinfo;
861 
862    if (bufmgr->vram.size > 0) {
863       if (flags & BO_ALLOC_COMPRESSED)
864          return IRIS_HEAP_DEVICE_LOCAL_COMPRESSED;
865 
866       /* Discrete GPUs currently always snoop CPU caches. */
867       if ((flags & BO_ALLOC_SMEM) || (flags & BO_ALLOC_COHERENT))
868          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
869 
870       if ((flags & BO_ALLOC_LMEM) ||
871           ((flags & BO_ALLOC_SCANOUT) && !(flags & BO_ALLOC_SHARED))) {
872 
873          if ((flags & BO_ALLOC_CPU_VISIBLE) && !intel_vram_all_mappable(devinfo))
874             return IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR;
875 
876          return IRIS_HEAP_DEVICE_LOCAL;
877       }
878 
879       return IRIS_HEAP_DEVICE_LOCAL_PREFERRED;
880    } else if (devinfo->has_llc) {
881       assert(!(flags & BO_ALLOC_LMEM));
882 
883       if (flags & (BO_ALLOC_SCANOUT | BO_ALLOC_SHARED))
884          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
885 
886       return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
887    } else {
888       assert(!devinfo->has_llc);
889       assert(!(flags & BO_ALLOC_LMEM));
890 
891       if (flags & BO_ALLOC_COMPRESSED)
892          return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
893 
894       if (flags & BO_ALLOC_COHERENT)
895          return IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
896 
897       return IRIS_HEAP_SYSTEM_MEMORY_UNCACHED;
898    }
899 }
900 
901 static bool
zero_bo(struct iris_bufmgr * bufmgr,unsigned flags,struct iris_bo * bo)902 zero_bo(struct iris_bufmgr *bufmgr,
903         unsigned flags,
904         struct iris_bo *bo)
905 {
906    assert(flags & BO_ALLOC_ZEROED);
907 
908    if (bo->zeroed)
909       return true;
910 
911    if (bufmgr->devinfo.has_flat_ccs && (flags & BO_ALLOC_LMEM)) {
912       /* With flat CCS, all allocations in LMEM have memory ranges with
913        * corresponding CCS elements. These elements are only accessible
914        * through GPU commands, but we don't issue GPU commands here.
915        */
916       return false;
917    }
918 
919    void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
920    if (!map)
921       return false;
922 
923    memset(map, 0, bo->size);
924    bo->zeroed = true;
925    return true;
926 }
927 
928 static struct iris_bo *
alloc_bo_from_slabs(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,unsigned flags)929 alloc_bo_from_slabs(struct iris_bufmgr *bufmgr,
930                     const char *name,
931                     uint64_t size,
932                     uint32_t alignment,
933                     unsigned flags)
934 {
935    if (flags & BO_ALLOC_NO_SUBALLOC)
936       return NULL;
937 
938    struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1];
939    unsigned max_slab_entry_size =
940       1 << (last_slab->min_order + last_slab->num_orders - 1);
941 
942    if (size > max_slab_entry_size)
943       return NULL;
944 
945    struct pb_slab_entry *entry;
946 
947    enum iris_heap heap = flags_to_heap(bufmgr, flags);
948 
949    unsigned alloc_size = size;
950 
951    /* Always use slabs for sizes less than 4 KB because the kernel aligns
952     * everything to 4 KB.
953     */
954    if (size < alignment && alignment <= 4 * 1024)
955       alloc_size = alignment;
956 
957    if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) {
958       /* 3/4 allocations can return too small alignment.
959        * Try again with a power of two allocation size.
960        */
961       unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size);
962 
963       if (alignment <= pot_size) {
964          /* This size works but wastes some memory to fulfill the alignment. */
965          alloc_size = pot_size;
966       } else {
967          /* can't fulfill alignment requirements */
968          return NULL;
969       }
970    }
971 
972    struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size);
973    entry = pb_slab_alloc(slabs, alloc_size, heap);
974    if (!entry) {
975       /* Clean up and try again... */
976       pb_slabs_reclaim(slabs);
977 
978       entry = pb_slab_alloc(slabs, alloc_size, heap);
979    }
980    if (!entry)
981       return NULL;
982 
983    struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry);
984 
985    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
986       /* This buffer was associated with an aux-buffer range.  We only allow
987        * slab allocated buffers to be reclaimed when idle (not in use by an
988        * executing batch).  (See iris_can_reclaim_slab().)  So we know that
989        * our previous aux mapping is no longer in use, and we can safely
990        * remove it.
991        */
992       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
993                                 bo->size);
994       bo->aux_map_address = 0;
995    }
996 
997    p_atomic_set(&bo->refcount, 1);
998    bo->name = name;
999    bo->size = size;
1000 
1001    /* Zero the contents if necessary.  If this fails, fall back to
1002     * allocating a fresh BO, which will always be zeroed by the kernel.
1003     */
1004    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1005       pb_slab_free(slabs, &bo->slab.entry);
1006       return NULL;
1007    }
1008 
1009    return bo;
1010 }
1011 
1012 static struct iris_bo *
alloc_bo_from_cache(struct iris_bufmgr * bufmgr,struct bo_cache_bucket * bucket,uint32_t alignment,enum iris_memory_zone memzone,enum iris_mmap_mode mmap_mode,unsigned flags,bool match_zone)1013 alloc_bo_from_cache(struct iris_bufmgr *bufmgr,
1014                     struct bo_cache_bucket *bucket,
1015                     uint32_t alignment,
1016                     enum iris_memory_zone memzone,
1017                     enum iris_mmap_mode mmap_mode,
1018                     unsigned flags,
1019                     bool match_zone)
1020 {
1021    if (!bucket)
1022       return NULL;
1023 
1024    struct iris_bo *bo = NULL;
1025 
1026    simple_mtx_assert_locked(&bufmgr->lock);
1027 
1028    list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) {
1029       assert(iris_bo_is_real(cur));
1030 
1031       /* Find one that's got the right mapping type.  We used to swap maps
1032        * around but the kernel doesn't allow this on discrete GPUs.
1033        */
1034       if (mmap_mode != cur->real.mmap_mode)
1035          continue;
1036 
1037       /* Try a little harder to find one that's already in the right memzone */
1038       if (match_zone && memzone != iris_memzone_for_address(cur->address))
1039          continue;
1040 
1041       if (cur->real.capture != !!(flags & BO_ALLOC_CAPTURE))
1042          continue;
1043 
1044       /* If the last BO in the cache is busy, there are no idle BOs.  Bail,
1045        * either falling back to a non-matching memzone, or if that fails,
1046        * allocating a fresh buffer.
1047        */
1048       if (iris_bo_busy(cur))
1049          return NULL;
1050 
1051       list_del(&cur->head);
1052 
1053       /* Tell the kernel we need this BO and check if it still exist */
1054       if (!iris_bo_madvise(cur, IRIS_MADVICE_WILL_NEED)) {
1055          /* This BO was purged, throw it out and keep looking. */
1056          bo_free(cur);
1057          continue;
1058       }
1059 
1060       if (cur->aux_map_address) {
1061          /* This buffer was associated with an aux-buffer range. We make sure
1062           * that buffers are not reused from the cache while the buffer is (busy)
1063           * being used by an executing batch. Since we are here, the buffer is no
1064           * longer being used by a batch and the buffer was deleted (in order to
1065           * end up in the cache). Therefore its old aux-buffer range can be
1066           * removed from the aux-map.
1067           */
1068          if (cur->bufmgr->aux_map_ctx)
1069             intel_aux_map_unmap_range(cur->bufmgr->aux_map_ctx, cur->address,
1070                                       cur->size);
1071          cur->aux_map_address = 0;
1072       }
1073 
1074       /* If the cached BO isn't in the right memory zone, or the alignment
1075        * isn't sufficient, free the old memory and assign it a new address.
1076        */
1077       if (memzone != iris_memzone_for_address(cur->address) ||
1078           cur->address % alignment != 0) {
1079          if (!bufmgr->kmd_backend->gem_vm_unbind(cur)) {
1080             DBG("Unable to unbind vm of buf %u\n", cur->gem_handle);
1081             bo_free(cur);
1082             continue;
1083          }
1084 
1085          vma_free(bufmgr, cur->address, cur->size);
1086          cur->address = 0ull;
1087       }
1088 
1089       bo = cur;
1090       break;
1091    }
1092 
1093    if (!bo)
1094       return NULL;
1095 
1096    /* Zero the contents if necessary.  If this fails, fall back to
1097     * allocating a fresh BO, which will always be zeroed by the kernel.
1098     */
1099    assert(bo->zeroed == false);
1100    if ((flags & BO_ALLOC_ZEROED) && !zero_bo(bufmgr, flags, bo)) {
1101       bo_free(bo);
1102       return NULL;
1103    }
1104 
1105    return bo;
1106 }
1107 
1108 static struct iris_bo *
alloc_fresh_bo(struct iris_bufmgr * bufmgr,uint64_t bo_size,unsigned flags)1109 alloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, unsigned flags)
1110 {
1111    struct iris_bo *bo = bo_calloc();
1112    if (!bo)
1113       return NULL;
1114 
1115    /* Try to allocate memory in multiples of 2MB, as this allows us to use
1116     * 64K pages rather than the less-efficient 4K pages.  Most BOs smaller
1117     * than 64MB should hit the BO cache or slab allocations anyway, so this
1118     * shouldn't waste too much memory.  We do exclude small (< 1MB) sizes to
1119     * be defensive in case any of those bypass the caches and end up here.
1120     */
1121    if (bo_size >= 1024 * 1024)
1122       bo_size = align64(bo_size, 2 * 1024 * 1024);
1123 
1124    bo->real.heap = flags_to_heap(bufmgr, flags);
1125 
1126    const struct intel_memory_class_instance *regions[2];
1127    uint16_t num_regions = 0;
1128 
1129    if (bufmgr->vram.size > 0) {
1130       switch (bo->real.heap) {
1131       case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1132          /* For vram allocations, still use system memory as a fallback. */
1133          regions[num_regions++] = bufmgr->vram.region;
1134          regions[num_regions++] = bufmgr->sys.region;
1135          break;
1136       case IRIS_HEAP_DEVICE_LOCAL:
1137       case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1138       case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1139          regions[num_regions++] = bufmgr->vram.region;
1140          break;
1141       case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1142          regions[num_regions++] = bufmgr->sys.region;
1143          break;
1144       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1145          /* not valid, compressed in discrete is always created with
1146           * IRIS_HEAP_DEVICE_LOCAL_PREFERRED_COMPRESSED
1147           */
1148       case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1149          /* not valid; discrete cards always enable snooping */
1150       case IRIS_HEAP_MAX:
1151          unreachable("invalid heap for BO");
1152       }
1153    } else {
1154       regions[num_regions++] = bufmgr->sys.region;
1155    }
1156 
1157    bo->gem_handle = bufmgr->kmd_backend->gem_create(bufmgr, regions,
1158                                                     num_regions, bo_size,
1159                                                     bo->real.heap, flags);
1160    if (bo->gem_handle == 0) {
1161       free(bo);
1162       return NULL;
1163    }
1164    bo->bufmgr = bufmgr;
1165    bo->size = bo_size;
1166    bo->idle = true;
1167    bo->zeroed = true;
1168    bo->real.capture = (flags & BO_ALLOC_CAPTURE) != 0;
1169 
1170    return bo;
1171 }
1172 
1173 const char *
1174 iris_heap_to_string[IRIS_HEAP_MAX] = {
1175    [IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT] = "system-cached-coherent",
1176    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED] = "system-uncached",
1177    [IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED] = "system-uncached-compressed",
1178    [IRIS_HEAP_DEVICE_LOCAL] = "local",
1179    [IRIS_HEAP_DEVICE_LOCAL_COMPRESSED] = "local-compressed",
1180    [IRIS_HEAP_DEVICE_LOCAL_PREFERRED] = "local-preferred",
1181    [IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR] = "local-cpu-visible-small-bar",
1182 };
1183 
1184 static enum iris_mmap_mode
heap_to_mmap_mode(struct iris_bufmgr * bufmgr,enum iris_heap heap)1185 heap_to_mmap_mode(struct iris_bufmgr *bufmgr, enum iris_heap heap)
1186 {
1187    const struct intel_device_info *devinfo = &bufmgr->devinfo;
1188 
1189    switch (heap) {
1190    case IRIS_HEAP_DEVICE_LOCAL:
1191       return intel_vram_all_mappable(devinfo) ? IRIS_MMAP_WC : IRIS_MMAP_NONE;
1192    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
1193    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
1194       return IRIS_MMAP_WC;
1195    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
1196       return IRIS_MMAP_WB;
1197    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
1198       return IRIS_MMAP_WC;
1199    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
1200    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
1201       /* compressed bos are not mmaped */
1202       return IRIS_MMAP_NONE;
1203    default:
1204       unreachable("invalid heap");
1205    }
1206 }
1207 
1208 struct iris_bo *
iris_bo_alloc(struct iris_bufmgr * bufmgr,const char * name,uint64_t size,uint32_t alignment,enum iris_memory_zone memzone,unsigned flags)1209 iris_bo_alloc(struct iris_bufmgr *bufmgr,
1210               const char *name,
1211               uint64_t size,
1212               uint32_t alignment,
1213               enum iris_memory_zone memzone,
1214               unsigned flags)
1215 {
1216    struct iris_bo *bo;
1217    unsigned int page_size = getpagesize();
1218    enum iris_heap heap = flags_to_heap(bufmgr, flags);
1219    struct bo_cache_bucket *bucket =
1220       bucket_for_size(bufmgr, size, heap, flags);
1221 
1222    if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT))
1223       flags |= BO_ALLOC_NO_SUBALLOC;
1224 
1225    /* By default, capture all driver-internal buffers like shader kernels,
1226     * surface states, dynamic states, border colors, and so on.
1227     */
1228    if (memzone < IRIS_MEMZONE_OTHER || INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1229       flags |= BO_ALLOC_CAPTURE;
1230 
1231    bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags);
1232 
1233    if (bo)
1234       return bo;
1235 
1236    /* Round the size up to the bucket size, or if we don't have caching
1237     * at this size, a multiple of the page size.
1238     */
1239    uint64_t bo_size =
1240       bucket ? bucket->size : MAX2(align64(size, page_size), page_size);
1241    enum iris_mmap_mode mmap_mode = heap_to_mmap_mode(bufmgr, heap);
1242 
1243    simple_mtx_lock(&bufmgr->lock);
1244 
1245    /* Get a buffer out of the cache if available.  First, we try to find
1246     * one with a matching memory zone so we can avoid reallocating VMA.
1247     */
1248    bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1249                             flags, true);
1250 
1251    /* If that fails, we try for any cached BO, without matching memzone. */
1252    if (!bo) {
1253       bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode,
1254                                flags, false);
1255    }
1256 
1257    simple_mtx_unlock(&bufmgr->lock);
1258 
1259    if (!bo) {
1260       bo = alloc_fresh_bo(bufmgr, bo_size, flags);
1261       if (!bo)
1262          return NULL;
1263    }
1264 
1265    if (bo->address == 0ull) {
1266       simple_mtx_lock(&bufmgr->lock);
1267       bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment);
1268       simple_mtx_unlock(&bufmgr->lock);
1269 
1270       if (bo->address == 0ull)
1271          goto err_free;
1272 
1273       if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1274          goto err_vm_alloc;
1275    }
1276 
1277    bo->name = name;
1278    p_atomic_set(&bo->refcount, 1);
1279    bo->real.reusable = bucket && bufmgr->bo_reuse;
1280    bo->real.protected = flags & BO_ALLOC_PROTECTED;
1281    bo->index = -1;
1282    bo->real.prime_fd = -1;
1283 
1284    assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode);
1285    bo->real.mmap_mode = mmap_mode;
1286 
1287    /* On integrated GPUs, enable snooping to ensure coherency if needed.
1288     * For discrete, we instead use SMEM and avoid WB maps for coherency.
1289     */
1290    if ((flags & BO_ALLOC_COHERENT) &&
1291        !bufmgr->devinfo.has_llc && bufmgr->devinfo.has_caching_uapi) {
1292       if (bufmgr->kmd_backend->bo_set_caching(bo, true) != 0)
1293          goto err_free;
1294    }
1295 
1296    DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle,
1297        bo->name, memzone_name(memzone), iris_heap_to_string[bo->real.heap],
1298        (unsigned long long) size);
1299 
1300    return bo;
1301 
1302 err_vm_alloc:
1303    simple_mtx_lock(&bufmgr->lock);
1304    vma_free(bufmgr, bo->address, bo->size);
1305    simple_mtx_unlock(&bufmgr->lock);
1306 err_free:
1307    simple_mtx_lock(&bufmgr->lock);
1308    bo_free(bo);
1309    simple_mtx_unlock(&bufmgr->lock);
1310    return NULL;
1311 }
1312 
1313 static int
iris_bo_close(int fd,uint32_t gem_handle)1314 iris_bo_close(int fd, uint32_t gem_handle)
1315 {
1316    struct drm_gem_close close = {
1317       .handle = gem_handle,
1318    };
1319    return intel_ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close);
1320 }
1321 
1322 struct iris_bo *
iris_bo_create_userptr(struct iris_bufmgr * bufmgr,const char * name,void * ptr,size_t size,enum iris_memory_zone memzone)1323 iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name,
1324                        void *ptr, size_t size,
1325                        enum iris_memory_zone memzone)
1326 {
1327    struct iris_bo *bo;
1328 
1329    bo = bo_calloc();
1330    if (!bo)
1331       return NULL;
1332 
1333    bo->gem_handle = bufmgr->kmd_backend->gem_create_userptr(bufmgr, ptr, size);
1334    if (bo->gem_handle == 0)
1335       goto err_free;
1336 
1337    bo->name = name;
1338    bo->size = size;
1339    bo->real.map = ptr;
1340    bo->real.userptr = true;
1341 
1342    bo->bufmgr = bufmgr;
1343 
1344    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1345       bo->real.capture = true;
1346 
1347    simple_mtx_lock(&bufmgr->lock);
1348    bo->address = vma_alloc(bufmgr, memzone, size, 1);
1349    simple_mtx_unlock(&bufmgr->lock);
1350 
1351    if (bo->address == 0ull)
1352       goto err_close;
1353 
1354    p_atomic_set(&bo->refcount, 1);
1355    bo->index = -1;
1356    bo->idle = true;
1357    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1358    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
1359    bo->real.prime_fd = -1;
1360 
1361    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1362       goto err_vma_free;
1363 
1364    return bo;
1365 
1366 err_vma_free:
1367    simple_mtx_lock(&bufmgr->lock);
1368    vma_free(bufmgr, bo->address, bo->size);
1369    simple_mtx_unlock(&bufmgr->lock);
1370 err_close:
1371    bufmgr->kmd_backend->gem_close(bufmgr, bo);
1372 err_free:
1373    free(bo);
1374    return NULL;
1375 }
1376 
1377 static bool
needs_prime_fd(struct iris_bufmgr * bufmgr)1378 needs_prime_fd(struct iris_bufmgr *bufmgr)
1379 {
1380    return bufmgr->devinfo.kmd_type == INTEL_KMD_TYPE_XE;
1381 }
1382 
1383 static bool
iris_bo_set_prime_fd(struct iris_bo * bo)1384 iris_bo_set_prime_fd(struct iris_bo *bo)
1385 {
1386    struct iris_bufmgr *bufmgr = bo->bufmgr;
1387 
1388    if (needs_prime_fd(bufmgr) && bo->real.prime_fd == -1) {
1389       if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
1390                              DRM_CLOEXEC | DRM_RDWR, &bo->real.prime_fd)) {
1391          fprintf(stderr, "Failed to get prime fd for bo %s/%u\n",
1392                  bo->name, bo->gem_handle);
1393          return false;
1394       }
1395    }
1396 
1397    return true;
1398 }
1399 
1400 /**
1401  * Returns a iris_bo wrapping the given buffer object handle.
1402  *
1403  * This can be used when one application needs to pass a buffer object
1404  * to another.
1405  */
1406 struct iris_bo *
iris_bo_gem_create_from_name(struct iris_bufmgr * bufmgr,const char * name,unsigned int handle)1407 iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr,
1408                              const char *name, unsigned int handle)
1409 {
1410    struct iris_bo *bo;
1411 
1412    /* At the moment most applications only have a few named bo.
1413     * For instance, in a DRI client only the render buffers passed
1414     * between X and the client are named. And since X returns the
1415     * alternating names for the front/back buffer a linear search
1416     * provides a sufficiently fast match.
1417     */
1418    simple_mtx_lock(&bufmgr->lock);
1419    bo = find_and_ref_external_bo(bufmgr->name_table, handle);
1420    if (bo)
1421       goto out;
1422 
1423    struct drm_gem_open open_arg = { .name = handle };
1424    int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
1425    if (ret != 0) {
1426       DBG("Couldn't reference %s handle 0x%08x: %s\n",
1427           name, handle, strerror(errno));
1428       bo = NULL;
1429       goto out;
1430    }
1431    /* Now see if someone has used a prime handle to get this
1432     * object from the kernel before by looking through the list
1433     * again for a matching gem_handle
1434     */
1435    bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle);
1436    if (bo)
1437       goto out;
1438 
1439    bo = bo_calloc();
1440    if (!bo) {
1441       struct iris_bo close_bo = {
1442             .gem_handle = open_arg.handle,
1443       };
1444       bufmgr->kmd_backend->gem_close(bufmgr, &close_bo);
1445       goto out;
1446    }
1447 
1448    p_atomic_set(&bo->refcount, 1);
1449 
1450    bo->size = open_arg.size;
1451    bo->bufmgr = bufmgr;
1452    bo->gem_handle = open_arg.handle;
1453    bo->name = name;
1454    bo->index = -1;
1455    bo->real.global_name = handle;
1456    bo->real.prime_fd = -1;
1457    bo->real.reusable = false;
1458    bo->real.imported = true;
1459    /* Xe KMD expects at least 1-way coherency for imports */
1460    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1461    bo->real.mmap_mode = IRIS_MMAP_NONE;
1462    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1463       bo->real.capture = true;
1464    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1);
1465    if (bo->address == 0ull)
1466       goto err_free;
1467 
1468    if (!iris_bo_set_prime_fd(bo))
1469       goto err_vm_alloc;
1470 
1471    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1472       goto err_vm_alloc;
1473 
1474    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1475    _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
1476 
1477    DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
1478 
1479 out:
1480    simple_mtx_unlock(&bufmgr->lock);
1481    return bo;
1482 
1483 err_vm_alloc:
1484    vma_free(bufmgr, bo->address, bo->size);
1485 err_free:
1486    bo_free(bo);
1487    simple_mtx_unlock(&bufmgr->lock);
1488    return NULL;
1489 }
1490 
1491 static void
bo_close(struct iris_bo * bo)1492 bo_close(struct iris_bo *bo)
1493 {
1494    struct iris_bufmgr *bufmgr = bo->bufmgr;
1495 
1496    simple_mtx_assert_locked(&bufmgr->lock);
1497    assert(iris_bo_is_real(bo));
1498 
1499    if (iris_bo_is_external(bo)) {
1500       struct hash_entry *entry;
1501 
1502       if (bo->real.global_name) {
1503          entry = _mesa_hash_table_search(bufmgr->name_table,
1504                                          &bo->real.global_name);
1505          _mesa_hash_table_remove(bufmgr->name_table, entry);
1506       }
1507 
1508       entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
1509       _mesa_hash_table_remove(bufmgr->handle_table, entry);
1510 
1511       list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) {
1512          iris_bo_close(export->drm_fd, export->gem_handle);
1513 
1514          list_del(&export->link);
1515          free(export);
1516       }
1517    } else {
1518       assert(list_is_empty(&bo->real.exports));
1519    }
1520 
1521    /* Unbind and return the VMA for reuse */
1522    if (bufmgr->kmd_backend->gem_vm_unbind(bo))
1523       vma_free(bo->bufmgr, bo->address, bo->size);
1524    else
1525       DBG("Unable to unbind vm of buf %u\n", bo->gem_handle);
1526 
1527    if (bo->real.prime_fd != -1)
1528       close(bo->real.prime_fd);
1529 
1530    /* Close this object */
1531    if (bufmgr->kmd_backend->gem_close(bufmgr, bo) != 0) {
1532       DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1533           bo->gem_handle, bo->name, strerror(errno));
1534    }
1535 
1536    if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) {
1537       intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address,
1538                                 bo->size);
1539    }
1540 
1541    for (int d = 0; d < bo->deps_size; d++) {
1542       for (int b = 0; b < IRIS_BATCH_COUNT; b++) {
1543          iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL);
1544          iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL);
1545       }
1546    }
1547    free(bo->deps);
1548 
1549    free(bo);
1550 }
1551 
1552 static void
bo_free(struct iris_bo * bo)1553 bo_free(struct iris_bo *bo)
1554 {
1555    struct iris_bufmgr *bufmgr = bo->bufmgr;
1556 
1557    simple_mtx_assert_locked(&bufmgr->lock);
1558    assert(iris_bo_is_real(bo));
1559 
1560    if (!bo->real.userptr && bo->real.map)
1561       bo_unmap(bo);
1562 
1563    if (bo->idle || !iris_bo_busy(bo)) {
1564       bo_close(bo);
1565    } else {
1566       /* Defer closing the GEM BO and returning the VMA for reuse until the
1567        * BO is idle.  Just move it to the dead list for now.
1568        */
1569       list_addtail(&bo->head, &bufmgr->zombie_list);
1570    }
1571 }
1572 
1573 static enum iris_heap
iris_get_heap_max(struct iris_bufmgr * bufmgr)1574 iris_get_heap_max(struct iris_bufmgr *bufmgr)
1575 {
1576    if (bufmgr->vram.size) {
1577       return intel_vram_all_mappable(&bufmgr->devinfo) ?
1578              IRIS_HEAP_MAX_LARGE_BAR : IRIS_HEAP_MAX;
1579    }
1580 
1581    return bufmgr->devinfo.ver >= 20 ? IRIS_HEAP_MAX_NO_VRAM :
1582                                       IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED;
1583 }
1584 
1585 /** Frees all cached buffers significantly older than @time. */
1586 static void
cleanup_bo_cache(struct iris_bufmgr * bufmgr,time_t time)1587 cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time)
1588 {
1589    simple_mtx_assert_locked(&bufmgr->lock);
1590 
1591    if (bufmgr->time == time)
1592       return;
1593 
1594    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1595       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1596 
1597       for (int i = 0; i < cache->num_buckets; i++) {
1598          struct bo_cache_bucket *bucket = &cache->bucket[i];
1599 
1600          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1601             if (time - bo->real.free_time <= 1)
1602                break;
1603 
1604             list_del(&bo->head);
1605 
1606             bo_free(bo);
1607          }
1608       }
1609    }
1610 
1611    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1612       /* Stop once we reach a busy BO - all others past this point were
1613        * freed more recently so are likely also busy.
1614        */
1615       if (!bo->idle && iris_bo_busy(bo))
1616          break;
1617 
1618       list_del(&bo->head);
1619       bo_close(bo);
1620    }
1621 
1622    bufmgr->time = time;
1623 }
1624 
1625 static void
bo_unreference_final(struct iris_bo * bo,time_t time)1626 bo_unreference_final(struct iris_bo *bo, time_t time)
1627 {
1628    struct iris_bufmgr *bufmgr = bo->bufmgr;
1629 
1630    DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name);
1631 
1632    assert(iris_bo_is_real(bo));
1633 
1634    struct bo_cache_bucket *bucket = !bo->real.reusable ? NULL :
1635       bucket_for_size(bufmgr, bo->size, bo->real.heap, 0);
1636 
1637    /* Put the buffer into our internal cache for reuse if we can. */
1638    if (bucket && iris_bo_madvise(bo, IRIS_MADVICE_DONT_NEED)) {
1639       bo->real.free_time = time;
1640       bo->name = NULL;
1641 
1642       list_addtail(&bo->head, &bucket->head);
1643    } else {
1644       bo_free(bo);
1645    }
1646 }
1647 
1648 void
iris_bo_unreference(struct iris_bo * bo)1649 iris_bo_unreference(struct iris_bo *bo)
1650 {
1651    if (bo == NULL)
1652       return;
1653 
1654    assert(p_atomic_read(&bo->refcount) > 0);
1655 
1656    if (atomic_add_unless(&bo->refcount, -1, 1)) {
1657       struct iris_bufmgr *bufmgr = bo->bufmgr;
1658       struct timespec time;
1659 
1660       clock_gettime(CLOCK_MONOTONIC, &time);
1661 
1662       bo->zeroed = false;
1663       if (bo->gem_handle == 0) {
1664          pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry);
1665       } else {
1666          simple_mtx_lock(&bufmgr->lock);
1667 
1668          if (p_atomic_dec_zero(&bo->refcount)) {
1669             bo_unreference_final(bo, time.tv_sec);
1670             cleanup_bo_cache(bufmgr, time.tv_sec);
1671          }
1672 
1673          simple_mtx_unlock(&bufmgr->lock);
1674       }
1675    }
1676 }
1677 
1678 static void
bo_wait_with_stall_warning(struct util_debug_callback * dbg,struct iris_bo * bo,const char * action)1679 bo_wait_with_stall_warning(struct util_debug_callback *dbg,
1680                            struct iris_bo *bo,
1681                            const char *action)
1682 {
1683    bool busy = dbg && !bo->idle;
1684    double elapsed = unlikely(busy) ? -get_time() : 0.0;
1685 
1686    iris_bo_wait_rendering(bo);
1687 
1688    if (unlikely(busy)) {
1689       elapsed += get_time();
1690       if (elapsed > 1e-5) /* 0.01ms */ {
1691          perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n",
1692                     action, bo->name, elapsed * 1000);
1693       }
1694    }
1695 }
1696 
1697 static void
print_flags(unsigned flags)1698 print_flags(unsigned flags)
1699 {
1700    if (flags & MAP_READ)
1701       DBG("READ ");
1702    if (flags & MAP_WRITE)
1703       DBG("WRITE ");
1704    if (flags & MAP_ASYNC)
1705       DBG("ASYNC ");
1706    if (flags & MAP_PERSISTENT)
1707       DBG("PERSISTENT ");
1708    if (flags & MAP_COHERENT)
1709       DBG("COHERENT ");
1710    if (flags & MAP_RAW)
1711       DBG("RAW ");
1712    DBG("\n");
1713 }
1714 
1715 void *
iris_bo_map(struct util_debug_callback * dbg,struct iris_bo * bo,unsigned flags)1716 iris_bo_map(struct util_debug_callback *dbg,
1717             struct iris_bo *bo, unsigned flags)
1718 {
1719    struct iris_bufmgr *bufmgr = bo->bufmgr;
1720    void *map = NULL;
1721 
1722    if (bo->gem_handle == 0) {
1723       struct iris_bo *real = iris_get_backing_bo(bo);
1724       uint64_t offset = bo->address - real->address;
1725       map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset;
1726    } else {
1727       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
1728       if (bo->real.mmap_mode == IRIS_MMAP_NONE)
1729          return NULL;
1730 
1731       if (!bo->real.map) {
1732          DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name);
1733          map = bufmgr->kmd_backend->gem_mmap(bufmgr, bo);
1734          if (!map) {
1735             return NULL;
1736          }
1737 
1738          VG_DEFINED(map, bo->size);
1739 
1740          if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) {
1741             VG_NOACCESS(map, bo->size);
1742             os_munmap(map, bo->size);
1743          }
1744       }
1745       assert(bo->real.map);
1746       map = bo->real.map;
1747    }
1748 
1749    DBG("iris_bo_map: %d (%s) -> %p\n",
1750        bo->gem_handle, bo->name, bo->real.map);
1751    print_flags(flags);
1752 
1753    if (!(flags & MAP_ASYNC)) {
1754       bo_wait_with_stall_warning(dbg, bo, "memory mapping");
1755    }
1756 
1757    return map;
1758 }
1759 
1760 /**
1761  * Waits on a BO for the given amount of time.
1762  *
1763  * @bo: buffer object to wait for
1764  * @timeout_ns: amount of time to wait in nanoseconds.
1765  *   If value is less than 0, an infinite wait will occur.
1766  *
1767  * Returns 0 if the wait was successful ie. the last batch referencing the
1768  * object has completed within the allotted time. Otherwise some negative return
1769  * value describes the error. Of particular interest is -ETIME when the wait has
1770  * failed to yield the desired result.
1771  *
1772  * Similar to iris_bo_wait_rendering except a timeout parameter allows
1773  * the operation to give up after a certain amount of time. Another subtle
1774  * difference is the internal locking semantics are different (this variant does
1775  * not hold the lock for the duration of the wait). This makes the wait subject
1776  * to a larger userspace race window.
1777  *
1778  * The implementation shall wait until the object is no longer actively
1779  * referenced within a batch buffer at the time of the call. The wait will
1780  * not guarantee that the buffer is re-issued via another thread, or an flinked
1781  * handle. Userspace must make sure this race does not occur if such precision
1782  * is important.
1783  *
1784  * Note that some kernels have broken the infinite wait for negative values
1785  * promise, upgrade to latest stable kernels if this is the case.
1786  */
1787 static inline int
iris_bo_wait(struct iris_bo * bo,int64_t timeout_ns)1788 iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns)
1789 {
1790    int ret;
1791 
1792    switch (iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type) {
1793    case INTEL_KMD_TYPE_I915:
1794       if (iris_bo_is_external(bo))
1795          ret = iris_i915_bo_wait_gem(bo, timeout_ns);
1796       else
1797          ret = iris_bo_wait_syncobj(bo, timeout_ns);
1798       break;
1799    case INTEL_KMD_TYPE_XE:
1800       ret = iris_bo_wait_syncobj(bo, timeout_ns);
1801       break;
1802    default:
1803       unreachable("missing");
1804       ret = -1;
1805    }
1806 
1807    bo->idle = ret == 0;
1808 
1809    return ret;
1810 }
1811 
1812 /** Waits for all GPU rendering with the object to have completed. */
1813 void
iris_bo_wait_rendering(struct iris_bo * bo)1814 iris_bo_wait_rendering(struct iris_bo *bo)
1815 {
1816    /* We require a kernel recent enough for WAIT_IOCTL support.
1817     * See intel_init_bufmgr()
1818     */
1819    iris_bo_wait(bo, -1);
1820 }
1821 
1822 static void
iris_bufmgr_destroy_global_vm(struct iris_bufmgr * bufmgr)1823 iris_bufmgr_destroy_global_vm(struct iris_bufmgr *bufmgr)
1824 {
1825    switch (bufmgr->devinfo.kmd_type) {
1826    case INTEL_KMD_TYPE_I915:
1827       /* Nothing to do in i915 */
1828       break;
1829    case INTEL_KMD_TYPE_XE:
1830       intel_bind_timeline_finish(&bufmgr->bind_timeline, bufmgr->fd);
1831       iris_xe_destroy_global_vm(bufmgr);
1832       break;
1833    default:
1834       unreachable("missing");
1835    }
1836 }
1837 
1838 static void
iris_bufmgr_destroy(struct iris_bufmgr * bufmgr)1839 iris_bufmgr_destroy(struct iris_bufmgr *bufmgr)
1840 {
1841    iris_bo_unreference(bufmgr->dummy_aux_bo);
1842 
1843    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
1844 
1845    /* Free aux-map buffers */
1846    intel_aux_map_finish(bufmgr->aux_map_ctx);
1847 
1848    /* bufmgr will no longer try to free VMA entries in the aux-map */
1849    bufmgr->aux_map_ctx = NULL;
1850 
1851    for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
1852       if (bufmgr->bo_slabs[i].groups)
1853          pb_slabs_deinit(&bufmgr->bo_slabs[i]);
1854    }
1855 
1856    simple_mtx_lock(&bufmgr->lock);
1857 
1858    /* Free any cached buffer objects we were going to reuse */
1859    for (int h = 0; h < iris_get_heap_max(bufmgr); h++) {
1860       struct iris_bucket_cache *cache = &bufmgr->bucket_cache[h];
1861 
1862       for (int i = 0; i < cache->num_buckets; i++) {
1863          struct bo_cache_bucket *bucket = &cache->bucket[i];
1864 
1865          list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) {
1866             list_del(&bo->head);
1867 
1868             bo_free(bo);
1869          }
1870       }
1871    }
1872    free(bufmgr->bucket_cache);
1873 
1874    /* Close any buffer objects on the dead list. */
1875    list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) {
1876       list_del(&bo->head);
1877       bo_close(bo);
1878    }
1879 
1880    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
1881    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
1882 
1883    for (int z = 0; z < IRIS_MEMZONE_COUNT; z++)
1884          util_vma_heap_finish(&bufmgr->vma_allocator[z]);
1885 
1886    iris_bufmgr_destroy_global_vm(bufmgr);
1887 
1888    close(bufmgr->fd);
1889 
1890    simple_mtx_unlock(&bufmgr->lock);
1891 
1892    simple_mtx_destroy(&bufmgr->lock);
1893    simple_mtx_destroy(&bufmgr->bo_deps_lock);
1894 
1895    free(bufmgr);
1896 }
1897 
1898 int
iris_gem_get_tiling(struct iris_bo * bo,uint32_t * tiling)1899 iris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling)
1900 {
1901    struct iris_bufmgr *bufmgr = bo->bufmgr;
1902 
1903    if (!bufmgr->devinfo.has_tiling_uapi) {
1904       *tiling = 0;
1905       return 0;
1906    }
1907 
1908    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1909    return iris_i915_bo_get_tiling(bo, tiling);
1910 }
1911 
1912 int
iris_gem_set_tiling(struct iris_bo * bo,const struct isl_surf * surf)1913 iris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf)
1914 {
1915    struct iris_bufmgr *bufmgr = bo->bufmgr;
1916 
1917    /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's
1918     * actually not supported by the kernel in those cases.
1919     */
1920    if (!bufmgr->devinfo.has_tiling_uapi)
1921       return 0;
1922 
1923    assert(iris_bufmgr_get_device_info(bo->bufmgr)->kmd_type == INTEL_KMD_TYPE_I915);
1924    return iris_i915_bo_set_tiling(bo, surf);
1925 }
1926 
1927 struct iris_bo *
iris_bo_import_dmabuf(struct iris_bufmgr * bufmgr,int prime_fd,const uint64_t modifier)1928 iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd,
1929                       const uint64_t modifier)
1930 {
1931    uint32_t handle;
1932    struct iris_bo *bo;
1933 
1934    simple_mtx_lock(&bufmgr->lock);
1935    int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
1936    if (ret) {
1937       DBG("import_dmabuf: failed to obtain handle from fd: %s\n",
1938           strerror(errno));
1939       simple_mtx_unlock(&bufmgr->lock);
1940       return NULL;
1941    }
1942 
1943    /*
1944     * See if the kernel has already returned this buffer to us. Just as
1945     * for named buffers, we must not create two bo's pointing at the same
1946     * kernel object
1947     */
1948    bo = find_and_ref_external_bo(bufmgr->handle_table, handle);
1949    if (bo)
1950       goto out;
1951 
1952    bo = bo_calloc();
1953    if (!bo)
1954       goto out;
1955 
1956    p_atomic_set(&bo->refcount, 1);
1957 
1958    /* Determine size of bo.  The fd-to-handle ioctl really should
1959     * return the size, but it doesn't.  If we have kernel 3.12 or
1960     * later, we can lseek on the prime fd to get the size.  Older
1961     * kernels will just fail, in which case we fall back to the
1962     * provided (estimated or guess size). */
1963    ret = lseek(prime_fd, 0, SEEK_END);
1964    if (ret != -1)
1965       bo->size = ret;
1966 
1967    bo->bufmgr = bufmgr;
1968    bo->name = "prime";
1969    bo->index = -1;
1970    bo->real.reusable = false;
1971    bo->real.imported = true;
1972    /* Xe KMD expects at least 1-way coherency for imports */
1973    bo->real.heap = IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT;
1974    bo->real.mmap_mode = IRIS_MMAP_NONE;
1975    if (INTEL_DEBUG(DEBUG_CAPTURE_ALL))
1976       bo->real.capture = true;
1977    bo->gem_handle = handle;
1978    bo->real.prime_fd = needs_prime_fd(bufmgr) ? dup(prime_fd) : -1;
1979 
1980    uint64_t alignment = 1;
1981 
1982    /* When an aux map will be used, there is an alignment requirement on the
1983     * main surface from the mapping granularity. Some planes of the image may
1984     * have smaller alignment requirements, but this one should work for all.
1985     */
1986    if (bufmgr->devinfo.has_aux_map && isl_drm_modifier_has_aux(modifier))
1987       alignment = intel_aux_map_get_alignment(bufmgr->aux_map_ctx);
1988 
1989    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, alignment);
1990    if (bo->address == 0ull)
1991       goto err_free;
1992 
1993    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
1994       goto err_vm_alloc;
1995 
1996    _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
1997 
1998 out:
1999    simple_mtx_unlock(&bufmgr->lock);
2000    return bo;
2001 
2002 err_vm_alloc:
2003    vma_free(bufmgr, bo->address, bo->size);
2004 err_free:
2005    bo_free(bo);
2006    simple_mtx_unlock(&bufmgr->lock);
2007    return NULL;
2008 }
2009 
2010 static void
iris_bo_mark_exported_locked(struct iris_bo * bo)2011 iris_bo_mark_exported_locked(struct iris_bo *bo)
2012 {
2013    struct iris_bufmgr *bufmgr = bo->bufmgr;
2014 
2015    /* We cannot export suballocated BOs. */
2016    assert(iris_bo_is_real(bo));
2017    simple_mtx_assert_locked(&bufmgr->lock);
2018 
2019    if (!iris_bo_is_external(bo))
2020       _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
2021 
2022    if (!bo->real.exported) {
2023       /* If a BO is going to be used externally, it could be sent to the
2024        * display HW. So make sure our CPU mappings don't assume cache
2025        * coherency since display is outside that cache.
2026        */
2027       bo->real.exported = true;
2028       bo->real.reusable = false;
2029    }
2030 }
2031 
2032 void
iris_bo_mark_exported(struct iris_bo * bo)2033 iris_bo_mark_exported(struct iris_bo *bo)
2034 {
2035    struct iris_bufmgr *bufmgr = bo->bufmgr;
2036 
2037    /* We cannot export suballocated BOs. */
2038    assert(iris_bo_is_real(bo));
2039 
2040    if (bo->real.exported) {
2041       assert(!bo->real.reusable);
2042       return;
2043    }
2044 
2045    simple_mtx_lock(&bufmgr->lock);
2046    iris_bo_mark_exported_locked(bo);
2047    simple_mtx_unlock(&bufmgr->lock);
2048 
2049    iris_bo_set_prime_fd(bo);
2050 }
2051 
2052 int
iris_bo_export_dmabuf(struct iris_bo * bo,int * prime_fd)2053 iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd)
2054 {
2055    struct iris_bufmgr *bufmgr = bo->bufmgr;
2056 
2057    /* We cannot export suballocated BOs. */
2058    assert(iris_bo_is_real(bo));
2059 
2060    if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
2061                           DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0)
2062       return -errno;
2063 
2064    iris_bo_mark_exported(bo);
2065 
2066    return 0;
2067 }
2068 
2069 static uint32_t
iris_bo_export_gem_handle(struct iris_bo * bo)2070 iris_bo_export_gem_handle(struct iris_bo *bo)
2071 {
2072    /* We cannot export suballocated BOs. */
2073    assert(iris_bo_is_real(bo));
2074 
2075    iris_bo_mark_exported(bo);
2076 
2077    return bo->gem_handle;
2078 }
2079 
2080 int
iris_bo_flink(struct iris_bo * bo,uint32_t * name)2081 iris_bo_flink(struct iris_bo *bo, uint32_t *name)
2082 {
2083    struct iris_bufmgr *bufmgr = bo->bufmgr;
2084 
2085    /* We cannot export suballocated BOs. */
2086    assert(iris_bo_is_real(bo));
2087 
2088    if (!bo->real.global_name) {
2089       struct drm_gem_flink flink = { .handle = bo->gem_handle };
2090 
2091       if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
2092          return -errno;
2093 
2094       simple_mtx_lock(&bufmgr->lock);
2095       if (!bo->real.global_name) {
2096          iris_bo_mark_exported_locked(bo);
2097          bo->real.global_name = flink.name;
2098          _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo);
2099       }
2100       simple_mtx_unlock(&bufmgr->lock);
2101 
2102       iris_bo_set_prime_fd(bo);
2103    }
2104 
2105    *name = bo->real.global_name;
2106    return 0;
2107 }
2108 
2109 int
iris_bo_export_gem_handle_for_device(struct iris_bo * bo,int drm_fd,uint32_t * out_handle)2110 iris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd,
2111                                      uint32_t *out_handle)
2112 {
2113    /* We cannot export suballocated BOs. */
2114    assert(iris_bo_is_real(bo));
2115 
2116    /* Only add the new GEM handle to the list of export if it belongs to a
2117     * different GEM device. Otherwise we might close the same buffer multiple
2118     * times.
2119     */
2120    struct iris_bufmgr *bufmgr = bo->bufmgr;
2121    int ret = os_same_file_description(drm_fd, bufmgr->fd);
2122    WARN_ONCE(ret < 0,
2123              "Kernel has no file descriptor comparison support: %s\n",
2124              strerror(errno));
2125    if (ret == 0) {
2126       *out_handle = iris_bo_export_gem_handle(bo);
2127       return 0;
2128    }
2129 
2130    struct bo_export *export = calloc(1, sizeof(*export));
2131    if (!export)
2132       return -ENOMEM;
2133 
2134    export->drm_fd = drm_fd;
2135 
2136    int dmabuf_fd = -1;
2137    int err = iris_bo_export_dmabuf(bo, &dmabuf_fd);
2138    if (err) {
2139       free(export);
2140       return err;
2141    }
2142 
2143    simple_mtx_lock(&bufmgr->lock);
2144    err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle);
2145    close(dmabuf_fd);
2146    if (err) {
2147       simple_mtx_unlock(&bufmgr->lock);
2148       free(export);
2149       return err;
2150    }
2151 
2152    bool found = false;
2153    list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) {
2154       if (iter->drm_fd != drm_fd)
2155          continue;
2156       /* Here we assume that for a given DRM fd, we'll always get back the
2157        * same GEM handle for a given buffer.
2158        */
2159       assert(iter->gem_handle == export->gem_handle);
2160       free(export);
2161       export = iter;
2162       found = true;
2163       break;
2164    }
2165    if (!found)
2166       list_addtail(&export->link, &bo->real.exports);
2167 
2168    simple_mtx_unlock(&bufmgr->lock);
2169 
2170    *out_handle = export->gem_handle;
2171 
2172    return 0;
2173 }
2174 
2175 static void
add_bucket(struct iris_bufmgr * bufmgr,int size,enum iris_heap heap)2176 add_bucket(struct iris_bufmgr *bufmgr, int size, enum iris_heap heap)
2177 {
2178    struct iris_bucket_cache *cache = &bufmgr->bucket_cache[heap];
2179    unsigned int i = cache->num_buckets++;
2180 
2181    list_inithead(&cache->bucket[i].head);
2182    cache->bucket[i].size = size;
2183 
2184    assert(bucket_for_size(bufmgr, size, heap, 0) == &cache->bucket[i]);
2185    assert(bucket_for_size(bufmgr, size - 2048, heap, 0) == &cache->bucket[i]);
2186    assert(bucket_for_size(bufmgr, size + 1, heap, 0) != &cache->bucket[i]);
2187 }
2188 
2189 static void
init_cache_buckets(struct iris_bufmgr * bufmgr,enum iris_heap heap)2190 init_cache_buckets(struct iris_bufmgr *bufmgr, enum iris_heap heap)
2191 {
2192    uint64_t size, cache_max_size = 64 * 1024 * 1024;
2193 
2194    /* OK, so power of two buckets was too wasteful of memory.
2195     * Give 3 other sizes between each power of two, to hopefully
2196     * cover things accurately enough.  (The alternative is
2197     * probably to just go for exact matching of sizes, and assume
2198     * that for things like composited window resize the tiled
2199     * width/height alignment and rounding of sizes to pages will
2200     * get us useful cache hit rates anyway)
2201     */
2202    add_bucket(bufmgr, PAGE_SIZE,     heap);
2203    add_bucket(bufmgr, PAGE_SIZE * 2, heap);
2204    add_bucket(bufmgr, PAGE_SIZE * 3, heap);
2205 
2206    /* Initialize the linked lists for BO reuse cache. */
2207    for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) {
2208       add_bucket(bufmgr, size, heap);
2209 
2210       add_bucket(bufmgr, size + size * 1 / 4, heap);
2211       add_bucket(bufmgr, size + size * 2 / 4, heap);
2212       add_bucket(bufmgr, size + size * 3 / 4, heap);
2213    }
2214 }
2215 
2216 static struct intel_buffer *
intel_aux_map_buffer_alloc(void * driver_ctx,uint32_t size)2217 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
2218 {
2219    struct intel_buffer *buf = malloc(sizeof(struct intel_buffer));
2220    if (!buf)
2221       return NULL;
2222 
2223    struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx;
2224 
2225    unsigned int page_size = getpagesize();
2226    size = MAX2(ALIGN(size, page_size), page_size);
2227 
2228    struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, BO_ALLOC_CAPTURE);
2229    if (!bo) {
2230       free(buf);
2231       return NULL;
2232    }
2233 
2234    simple_mtx_lock(&bufmgr->lock);
2235 
2236    bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024);
2237    if (bo->address == 0ull)
2238       goto err_free;
2239 
2240    if (!bufmgr->kmd_backend->gem_vm_bind(bo))
2241       goto err_vm_alloc;
2242 
2243    simple_mtx_unlock(&bufmgr->lock);
2244 
2245    bo->name = "aux-map";
2246    p_atomic_set(&bo->refcount, 1);
2247    bo->index = -1;
2248    bo->real.mmap_mode = heap_to_mmap_mode(bufmgr, bo->real.heap);
2249    bo->real.prime_fd = -1;
2250 
2251    buf->driver_bo = bo;
2252    buf->gpu = bo->address;
2253    buf->gpu_end = buf->gpu + bo->size;
2254    buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW);
2255    return buf;
2256 
2257 err_vm_alloc:
2258    vma_free(bufmgr, bo->address, bo->size);
2259 err_free:
2260    free(buf);
2261    bo_free(bo);
2262    simple_mtx_unlock(&bufmgr->lock);
2263    return NULL;
2264 }
2265 
2266 static void
intel_aux_map_buffer_free(void * driver_ctx,struct intel_buffer * buffer)2267 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
2268 {
2269    iris_bo_unreference((struct iris_bo*)buffer->driver_bo);
2270    free(buffer);
2271 }
2272 
2273 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
2274    .alloc = intel_aux_map_buffer_alloc,
2275    .free = intel_aux_map_buffer_free,
2276 };
2277 
2278 static bool
iris_bufmgr_get_meminfo(struct iris_bufmgr * bufmgr,struct intel_device_info * devinfo)2279 iris_bufmgr_get_meminfo(struct iris_bufmgr *bufmgr,
2280                         struct intel_device_info *devinfo)
2281 {
2282    bufmgr->sys.region = &devinfo->mem.sram.mem;
2283    bufmgr->sys.size = devinfo->mem.sram.mappable.size;
2284 
2285    /* When the resizable bar feature is disabled,
2286     * then vram.mappable.size is only 256MB.
2287     * The second half of the total size is in the vram.unmappable.size
2288     * variable.
2289     */
2290    bufmgr->vram.region = &devinfo->mem.vram.mem;
2291    bufmgr->vram.size = devinfo->mem.vram.mappable.size +
2292                        devinfo->mem.vram.unmappable.size;
2293 
2294    return true;
2295 }
2296 
2297 static bool
iris_bufmgr_init_global_vm(struct iris_bufmgr * bufmgr)2298 iris_bufmgr_init_global_vm(struct iris_bufmgr *bufmgr)
2299 {
2300    switch (bufmgr->devinfo.kmd_type) {
2301    case INTEL_KMD_TYPE_I915:
2302       bufmgr->use_global_vm = iris_i915_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2303       /* i915 don't require VM, so returning true even if use_global_vm is false */
2304       return true;
2305    case INTEL_KMD_TYPE_XE:
2306       if (!intel_bind_timeline_init(&bufmgr->bind_timeline, bufmgr->fd))
2307          return false;
2308 
2309       bufmgr->use_global_vm = iris_xe_init_global_vm(bufmgr, &bufmgr->global_vm_id);
2310       /* Xe requires VM */
2311       return bufmgr->use_global_vm;
2312    default:
2313       unreachable("missing");
2314       return false;
2315    }
2316 }
2317 
2318 /**
2319  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2320  * and manage map buffer objections.
2321  *
2322  * \param fd File descriptor of the opened DRM device.
2323  */
2324 static struct iris_bufmgr *
iris_bufmgr_create(struct intel_device_info * devinfo,int fd,bool bo_reuse)2325 iris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse)
2326 {
2327    if (devinfo->gtt_size <= IRIS_MEMZONE_OTHER_START)
2328       return NULL;
2329 
2330    struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr));
2331    if (bufmgr == NULL)
2332       return NULL;
2333 
2334    /* Handles to buffer objects belong to the device fd and are not
2335     * reference counted by the kernel.  If the same fd is used by
2336     * multiple parties (threads sharing the same screen bufmgr, or
2337     * even worse the same device fd passed to multiple libraries)
2338     * ownership of those handles is shared by those independent parties.
2339     *
2340     * Don't do this! Ensure that each library/bufmgr has its own device
2341     * fd so that its namespace does not clash with another.
2342     */
2343    bufmgr->fd = os_dupfd_cloexec(fd);
2344    if (bufmgr->fd == -1)
2345       goto error_dup;
2346 
2347    p_atomic_set(&bufmgr->refcount, 1);
2348 
2349    simple_mtx_init(&bufmgr->lock, mtx_plain);
2350    simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain);
2351 
2352    list_inithead(&bufmgr->zombie_list);
2353 
2354    bufmgr->devinfo = *devinfo;
2355    devinfo = &bufmgr->devinfo;
2356    bufmgr->bo_reuse = bo_reuse;
2357    iris_bufmgr_get_meminfo(bufmgr, devinfo);
2358    bufmgr->kmd_backend = iris_kmd_backend_get(devinfo->kmd_type);
2359 
2360    intel_common_update_device_info(bufmgr->fd, devinfo);
2361 
2362    if (!iris_bufmgr_init_global_vm(bufmgr))
2363       goto error_init_vm;
2364 
2365    STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull);
2366    const uint64_t _4GB = 1ull << 32;
2367    const uint64_t _2GB = 1ul << 31;
2368 
2369    /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
2370    const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
2371 
2372    const struct {
2373       uint64_t start;
2374       uint64_t size;
2375    } vma[IRIS_MEMZONE_COUNT] = {
2376       [IRIS_MEMZONE_SHADER] = {
2377          .start = PAGE_SIZE,
2378          .size  = _4GB_minus_1 - PAGE_SIZE
2379       },
2380       [IRIS_MEMZONE_BINDER] = {
2381          .start = IRIS_MEMZONE_BINDER_START + IRIS_SCRATCH_ZONE_SIZE,
2382          .size  = IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2383       },
2384       [IRIS_MEMZONE_SCRATCH] = {
2385          .start = IRIS_MEMZONE_SCRATCH_START,
2386          .size  = IRIS_SCRATCH_ZONE_SIZE
2387       },
2388       [IRIS_MEMZONE_SURFACE] = {
2389          .start = IRIS_MEMZONE_SURFACE_START,
2390          .size = _4GB_minus_1 - IRIS_BINDER_ZONE_SIZE - IRIS_SCRATCH_ZONE_SIZE
2391       },
2392       [IRIS_MEMZONE_DYNAMIC] = {
2393          .start = IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE,
2394 
2395          /* Wa_2209859288: the Tigerlake PRM's workarounds volume says:
2396           *
2397           *    "PSDunit is dropping MSB of the blend state pointer from SD
2398           *     FIFO [...] Limit the Blend State Pointer to < 2G"
2399           *
2400           * We restrict the dynamic state pool to 2GB so that we don't ever
2401           * get a BLEND_STATE pointer with the MSB set.  We aren't likely to
2402           * need the full 4GB for dynamic state anyway.
2403           */
2404          .size  = (devinfo->ver >= 12 ? _2GB : _4GB_minus_1)
2405                   - IRIS_BORDER_COLOR_POOL_SIZE
2406       },
2407       [IRIS_MEMZONE_OTHER] = {
2408          .start = IRIS_MEMZONE_OTHER_START,
2409 
2410          /* Leave the last 4GB out of the high vma range, so that no state
2411           * base address + size can overflow 48 bits.
2412           */
2413          .size  = (devinfo->gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START,
2414       },
2415    };
2416 
2417    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2418       util_vma_heap_init(&bufmgr->vma_allocator[i],
2419                          vma[i].start, vma[i].size);
2420    }
2421 
2422    if (INTEL_DEBUG(DEBUG_HEAPS)) {
2423       for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++) {
2424          fprintf(stderr, "%-11s | 0x%016" PRIx64 "-0x%016" PRIx64 "\n",
2425                  memzone_name(i), vma[i].start,
2426                  vma[i].start + vma[i].size - 1);
2427       }
2428    }
2429 
2430    bufmgr->bucket_cache = calloc(iris_get_heap_max(bufmgr),
2431                                  sizeof(*bufmgr->bucket_cache));
2432    if (!bufmgr->bucket_cache)
2433       goto error_bucket_cache;
2434    for (int h = 0; h < iris_get_heap_max(bufmgr); h++)
2435       init_cache_buckets(bufmgr, h);
2436 
2437    unsigned min_slab_order = 8;  /* 256 bytes */
2438    unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */
2439    unsigned num_slab_orders_per_allocator =
2440       (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS;
2441 
2442    /* Divide the size order range among slab managers. */
2443    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2444       unsigned min_order = min_slab_order;
2445       unsigned max_order =
2446          MIN2(min_order + num_slab_orders_per_allocator, max_slab_order);
2447 
2448       if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order,
2449                          iris_get_heap_max(bufmgr), true, bufmgr,
2450                          iris_can_reclaim_slab,
2451                          iris_slab_alloc,
2452                          (void *) iris_slab_free)) {
2453          goto error_slabs_init;
2454       }
2455       min_slab_order = max_order + 1;
2456    }
2457 
2458    bufmgr->name_table =
2459       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2460    bufmgr->handle_table =
2461       _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal);
2462 
2463    if (devinfo->has_aux_map) {
2464       bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator,
2465                                                devinfo);
2466       assert(bufmgr->aux_map_ctx);
2467    }
2468 
2469    iris_init_border_color_pool(bufmgr, &bufmgr->border_color_pool);
2470 
2471    if (intel_needs_workaround(devinfo, 14019708328)) {
2472       bufmgr->dummy_aux_bo = iris_bo_alloc(bufmgr, "dummy_aux", 4096, 4096,
2473                                            IRIS_MEMZONE_OTHER, BO_ALLOC_PLAIN);
2474          if (!bufmgr->dummy_aux_bo)
2475             goto error_dummy_aux;
2476    }
2477 
2478    return bufmgr;
2479 
2480 error_dummy_aux:
2481    iris_destroy_border_color_pool(&bufmgr->border_color_pool);
2482    intel_aux_map_finish(bufmgr->aux_map_ctx);
2483    _mesa_hash_table_destroy(bufmgr->handle_table, NULL);
2484    _mesa_hash_table_destroy(bufmgr->name_table, NULL);
2485 error_slabs_init:
2486    for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) {
2487       if (!bufmgr->bo_slabs[i].groups)
2488          break;
2489 
2490       pb_slabs_deinit(&bufmgr->bo_slabs[i]);
2491    }
2492    free(bufmgr->bucket_cache);
2493 error_bucket_cache:
2494    for (unsigned i = 0; i < IRIS_MEMZONE_COUNT; i++)
2495       util_vma_heap_finish(&bufmgr->vma_allocator[i]);
2496    iris_bufmgr_destroy_global_vm(bufmgr);
2497 error_init_vm:
2498    close(bufmgr->fd);
2499 error_dup:
2500    free(bufmgr);
2501    return NULL;
2502 }
2503 
2504 static struct iris_bufmgr *
iris_bufmgr_ref(struct iris_bufmgr * bufmgr)2505 iris_bufmgr_ref(struct iris_bufmgr *bufmgr)
2506 {
2507    p_atomic_inc(&bufmgr->refcount);
2508    return bufmgr;
2509 }
2510 
2511 void
iris_bufmgr_unref(struct iris_bufmgr * bufmgr)2512 iris_bufmgr_unref(struct iris_bufmgr *bufmgr)
2513 {
2514    simple_mtx_lock(&global_bufmgr_list_mutex);
2515    if (p_atomic_dec_zero(&bufmgr->refcount)) {
2516       list_del(&bufmgr->link);
2517       iris_bufmgr_destroy(bufmgr);
2518    }
2519    simple_mtx_unlock(&global_bufmgr_list_mutex);
2520 }
2521 
2522 /** Returns a new unique id, to be used by screens. */
2523 int
iris_bufmgr_create_screen_id(struct iris_bufmgr * bufmgr)2524 iris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr)
2525 {
2526    return p_atomic_inc_return(&bufmgr->next_screen_id) - 1;
2527 }
2528 
2529 /**
2530  * Gets an already existing GEM buffer manager or create a new one.
2531  *
2532  * \param fd File descriptor of the opened DRM device.
2533  */
2534 struct iris_bufmgr *
iris_bufmgr_get_for_fd(int fd,bool bo_reuse)2535 iris_bufmgr_get_for_fd(int fd, bool bo_reuse)
2536 {
2537    struct intel_device_info devinfo;
2538    struct stat st;
2539 
2540    if (fstat(fd, &st))
2541       return NULL;
2542 
2543    struct iris_bufmgr *bufmgr = NULL;
2544 
2545    simple_mtx_lock(&global_bufmgr_list_mutex);
2546    list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) {
2547       struct stat iter_st;
2548       if (fstat(iter_bufmgr->fd, &iter_st))
2549          continue;
2550 
2551       if (st.st_rdev == iter_st.st_rdev) {
2552          assert(iter_bufmgr->bo_reuse == bo_reuse);
2553          bufmgr = iris_bufmgr_ref(iter_bufmgr);
2554          goto unlock;
2555       }
2556    }
2557 
2558    if (!intel_get_device_info_from_fd(fd, &devinfo, 8, -1))
2559       return NULL;
2560 
2561    if (devinfo.ver < 8 || devinfo.platform == INTEL_PLATFORM_CHV)
2562       return NULL;
2563 
2564    bufmgr = iris_bufmgr_create(&devinfo, fd, bo_reuse);
2565    if (bufmgr)
2566       list_addtail(&bufmgr->link, &global_bufmgr_list);
2567 
2568  unlock:
2569    simple_mtx_unlock(&global_bufmgr_list_mutex);
2570 
2571    return bufmgr;
2572 }
2573 
2574 int
iris_bufmgr_get_fd(struct iris_bufmgr * bufmgr)2575 iris_bufmgr_get_fd(struct iris_bufmgr *bufmgr)
2576 {
2577    return bufmgr->fd;
2578 }
2579 
2580 void*
iris_bufmgr_get_aux_map_context(struct iris_bufmgr * bufmgr)2581 iris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr)
2582 {
2583    return bufmgr->aux_map_ctx;
2584 }
2585 
2586 simple_mtx_t *
iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr * bufmgr)2587 iris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr)
2588 {
2589    return &bufmgr->bo_deps_lock;
2590 }
2591 
2592 struct iris_border_color_pool *
iris_bufmgr_get_border_color_pool(struct iris_bufmgr * bufmgr)2593 iris_bufmgr_get_border_color_pool(struct iris_bufmgr *bufmgr)
2594 {
2595    return &bufmgr->border_color_pool;
2596 }
2597 
2598 uint64_t
iris_bufmgr_vram_size(struct iris_bufmgr * bufmgr)2599 iris_bufmgr_vram_size(struct iris_bufmgr *bufmgr)
2600 {
2601    return bufmgr->vram.size;
2602 }
2603 
2604 uint64_t
iris_bufmgr_sram_size(struct iris_bufmgr * bufmgr)2605 iris_bufmgr_sram_size(struct iris_bufmgr *bufmgr)
2606 {
2607    return bufmgr->sys.size;
2608 }
2609 
2610 const struct intel_device_info *
iris_bufmgr_get_device_info(struct iris_bufmgr * bufmgr)2611 iris_bufmgr_get_device_info(struct iris_bufmgr *bufmgr)
2612 {
2613    return &bufmgr->devinfo;
2614 }
2615 
2616 const struct iris_kmd_backend *
iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr * bufmgr)2617 iris_bufmgr_get_kernel_driver_backend(struct iris_bufmgr *bufmgr)
2618 {
2619    return bufmgr->kmd_backend;
2620 }
2621 
2622 uint32_t
iris_bufmgr_get_global_vm_id(struct iris_bufmgr * bufmgr)2623 iris_bufmgr_get_global_vm_id(struct iris_bufmgr *bufmgr)
2624 {
2625    return bufmgr->global_vm_id;
2626 }
2627 
2628 bool
iris_bufmgr_use_global_vm_id(struct iris_bufmgr * bufmgr)2629 iris_bufmgr_use_global_vm_id(struct iris_bufmgr *bufmgr)
2630 {
2631    return bufmgr->use_global_vm;
2632 }
2633 
2634 bool
iris_bufmgr_compute_engine_supported(struct iris_bufmgr * bufmgr)2635 iris_bufmgr_compute_engine_supported(struct iris_bufmgr *bufmgr)
2636 {
2637    return bufmgr->devinfo.engine_class_supported_count[INTEL_ENGINE_CLASS_COMPUTE];
2638 }
2639 
2640 /**
2641  * Return the pat entry based on the bo heap and allocation flags.
2642  */
2643 const struct intel_device_info_pat_entry *
iris_heap_to_pat_entry(const struct intel_device_info * devinfo,enum iris_heap heap)2644 iris_heap_to_pat_entry(const struct intel_device_info *devinfo,
2645                        enum iris_heap heap)
2646 {
2647    switch (heap) {
2648    case IRIS_HEAP_SYSTEM_MEMORY_CACHED_COHERENT:
2649       return &devinfo->pat.cached_coherent;
2650    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED:
2651       return &devinfo->pat.writecombining;
2652    case IRIS_HEAP_DEVICE_LOCAL:
2653    case IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR:
2654    case IRIS_HEAP_DEVICE_LOCAL_PREFERRED:
2655       return &devinfo->pat.writecombining;
2656    case IRIS_HEAP_SYSTEM_MEMORY_UNCACHED_COMPRESSED:
2657    case IRIS_HEAP_DEVICE_LOCAL_COMPRESSED:
2658       return &devinfo->pat.compressed;
2659    default:
2660       unreachable("invalid heap for platforms using PAT entries");
2661    }
2662 }
2663 
2664 struct intel_bind_timeline *
iris_bufmgr_get_bind_timeline(struct iris_bufmgr * bufmgr)2665 iris_bufmgr_get_bind_timeline(struct iris_bufmgr *bufmgr)
2666 {
2667    return &bufmgr->bind_timeline;
2668 }
2669 
2670 uint64_t
iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr * bufmgr)2671 iris_bufmgr_get_dummy_aux_address(struct iris_bufmgr *bufmgr)
2672 {
2673    return bufmgr->dummy_aux_bo ? bufmgr->dummy_aux_bo->address : 0;
2674 }
2675