xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/xe/iris_kmd_backend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "iris_kmd_backend.h"
24 
25 #include <sys/mman.h>
26 
27 #include "common/intel_debug_identifier.h"
28 #include "common/intel_gem.h"
29 #include "dev/intel_debug.h"
30 #include "iris/iris_bufmgr.h"
31 #include "iris/iris_batch.h"
32 #include "iris/iris_context.h"
33 
34 #include "drm-uapi/xe_drm.h"
35 
36 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
37 
38 static uint32_t
xe_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap_flags,unsigned alloc_flags)39 xe_gem_create(struct iris_bufmgr *bufmgr,
40               const struct intel_memory_class_instance **regions,
41               uint16_t regions_count, uint64_t size,
42               enum iris_heap heap_flags, unsigned alloc_flags)
43 {
44    /* Xe still don't have support for protected content */
45    if (alloc_flags & BO_ALLOC_PROTECTED)
46       return -EINVAL;
47 
48    uint32_t vm_id = iris_bufmgr_get_global_vm_id(bufmgr);
49    vm_id = alloc_flags & BO_ALLOC_SHARED ? 0 : vm_id;
50 
51    uint32_t flags = 0;
52    /* TODO: we might need to consider scanout for shared buffers too as we
53     * do not know what the process this is shared with will do with it
54     */
55    if (alloc_flags & BO_ALLOC_SCANOUT)
56       flags |= DRM_XE_GEM_CREATE_FLAG_SCANOUT;
57    if (!intel_vram_all_mappable(iris_bufmgr_get_device_info(bufmgr)) &&
58        (heap_flags == IRIS_HEAP_DEVICE_LOCAL_PREFERRED ||
59         heap_flags == IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR))
60       flags |= DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM;
61 
62    struct drm_xe_gem_create gem_create = {
63      .vm_id = vm_id,
64      .size = align64(size, iris_bufmgr_get_device_info(bufmgr)->mem_alignment),
65      .flags = flags,
66    };
67    for (uint16_t i = 0; i < regions_count; i++)
68       gem_create.placement |= BITFIELD_BIT(regions[i]->instance);
69 
70    const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
71    const struct intel_device_info_pat_entry *pat_entry;
72    pat_entry = iris_heap_to_pat_entry(devinfo, heap_flags);
73    switch (pat_entry->mmap) {
74    case INTEL_DEVICE_INFO_MMAP_MODE_WC:
75       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
76       break;
77    case INTEL_DEVICE_INFO_MMAP_MODE_WB:
78       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WB;
79       break;
80    default:
81       unreachable("missing");
82       gem_create.cpu_caching = DRM_XE_GEM_CPU_CACHING_WC;
83    }
84 
85    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_GEM_CREATE,
86                    &gem_create))
87       return 0;
88 
89    return gem_create.handle;
90 }
91 
92 static void *
xe_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)93 xe_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
94 {
95    struct drm_xe_gem_mmap_offset args = {
96       .handle = bo->gem_handle,
97    };
98    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_GEM_MMAP_OFFSET, &args))
99       return NULL;
100 
101    void *map = mmap(NULL, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
102                     iris_bufmgr_get_fd(bufmgr), args.offset);
103    return map != MAP_FAILED ? map : NULL;
104 }
105 
106 static inline int
xe_gem_vm_bind_op(struct iris_bo * bo,uint32_t op)107 xe_gem_vm_bind_op(struct iris_bo *bo, uint32_t op)
108 {
109    struct iris_bufmgr *bufmgr = bo->bufmgr;
110    struct intel_bind_timeline *bind_timeline = iris_bufmgr_get_bind_timeline(bufmgr);
111    const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
112    uint32_t handle = op == DRM_XE_VM_BIND_OP_UNMAP ? 0 : bo->gem_handle;
113    struct drm_xe_sync xe_sync = {
114       .handle = intel_bind_timeline_get_syncobj(bind_timeline),
115       .type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ,
116       .flags = DRM_XE_SYNC_FLAG_SIGNAL,
117    };
118    uint64_t range, obj_offset = 0;
119    uint32_t flags = 0;
120    int ret, fd;
121 
122    fd = iris_bufmgr_get_fd(bufmgr);
123 
124    if (iris_bo_is_imported(bo))
125       range = bo->size;
126    else
127       range = align64(bo->size, devinfo->mem_alignment);
128 
129    if (bo->real.userptr) {
130       handle = 0;
131       obj_offset = (uintptr_t)bo->real.map;
132       if (op == DRM_XE_VM_BIND_OP_MAP)
133          op = DRM_XE_VM_BIND_OP_MAP_USERPTR;
134    }
135 
136    if (bo->real.capture)
137       flags |= DRM_XE_VM_BIND_FLAG_DUMPABLE;
138 
139    struct drm_xe_vm_bind args = {
140       .vm_id = iris_bufmgr_get_global_vm_id(bufmgr),
141       .num_syncs = 1,
142       .syncs = (uintptr_t)&xe_sync,
143       .num_binds = 1,
144       .bind.obj = handle,
145       .bind.obj_offset = obj_offset,
146       .bind.range = range,
147       .bind.addr = intel_48b_address(bo->address),
148       .bind.op = op,
149       .bind.pat_index = iris_heap_to_pat_entry(devinfo, bo->real.heap)->index,
150       .bind.flags = flags,
151    };
152 
153    xe_sync.timeline_value = intel_bind_timeline_bind_begin(bind_timeline);
154    ret = intel_ioctl(fd, DRM_IOCTL_XE_VM_BIND, &args);
155    intel_bind_timeline_bind_end(bind_timeline);
156 
157    if (ret)
158       DBG("vm_bind_op: DRM_IOCTL_XE_VM_BIND failed(%i)", ret);
159 
160    return ret;
161 }
162 
163 static bool
xe_gem_vm_bind(struct iris_bo * bo)164 xe_gem_vm_bind(struct iris_bo *bo)
165 {
166    return xe_gem_vm_bind_op(bo, DRM_XE_VM_BIND_OP_MAP) == 0;
167 }
168 
169 static bool
xe_gem_vm_unbind(struct iris_bo * bo)170 xe_gem_vm_unbind(struct iris_bo *bo)
171 {
172    return xe_gem_vm_bind_op(bo, DRM_XE_VM_BIND_OP_UNMAP) == 0;
173 }
174 
175 static bool
xe_bo_madvise(struct iris_bo * bo,enum iris_madvice state)176 xe_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
177 {
178    /* Only applicable if VM was created with DRM_XE_VM_CREATE_FAULT_MODE but
179     * that is not compatible with DRM_XE_VM_CREATE_SCRATCH_PAGE
180     *
181     * So returning as retained.
182     */
183    return true;
184 }
185 
186 static int
xe_bo_set_caching(struct iris_bo * bo,bool cached)187 xe_bo_set_caching(struct iris_bo *bo, bool cached)
188 {
189    /* Xe don't have caching UAPI so this function should never be called */
190    assert(0);
191    return -1;
192 }
193 
194 static enum pipe_reset_status
xe_batch_check_for_reset(struct iris_batch * batch)195 xe_batch_check_for_reset(struct iris_batch *batch)
196 {
197    enum pipe_reset_status status = PIPE_NO_RESET;
198    struct drm_xe_exec_queue_get_property exec_queue_get_property = {
199       .exec_queue_id = batch->xe.exec_queue_id,
200       .property = DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN,
201    };
202    int ret = intel_ioctl(iris_bufmgr_get_fd(batch->screen->bufmgr),
203                          DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY,
204                          &exec_queue_get_property);
205 
206    if (ret || exec_queue_get_property.value)
207       status = PIPE_GUILTY_CONTEXT_RESET;
208 
209    return status;
210 }
211 
212 static uint32_t
xe_batch_submit_external_bo_count(struct iris_batch * batch)213 xe_batch_submit_external_bo_count(struct iris_batch *batch)
214 {
215    uint32_t count = 0;
216 
217    for (int i = 0; i < batch->exec_count; i++) {
218       if (iris_bo_is_external(batch->exec_bos[i]))
219          count++;
220    }
221 
222    return count;
223 }
224 
225 struct iris_implicit_sync {
226    struct iris_implicit_sync_entry {
227       struct iris_bo *bo;
228       struct iris_syncobj *iris_syncobj;
229    } *entries;
230    uint32_t entry_count;
231 
232    struct iris_syncobj *batch_signal_syncobj;
233 };
234 
235 static bool
iris_implicit_sync_add_bo(struct iris_batch * batch,struct iris_implicit_sync * sync,struct iris_bo * bo)236 iris_implicit_sync_add_bo(struct iris_batch *batch,
237                           struct iris_implicit_sync *sync,
238                           struct iris_bo *bo)
239 {
240    struct iris_syncobj *syncobj = iris_bo_export_sync_state(bo);
241 
242    if (!syncobj)
243       return false;
244 
245    sync->entries[sync->entry_count].bo = bo;
246    sync->entries[sync->entry_count].iris_syncobj = syncobj;
247    sync->entry_count++;
248 
249    iris_batch_add_syncobj(batch, syncobj, IRIS_BATCH_FENCE_WAIT);
250 
251    return true;
252 }
253 
254 /* Cleans up the state of 'sync'. */
255 static void
iris_implicit_sync_finish(struct iris_batch * batch,struct iris_implicit_sync * sync)256 iris_implicit_sync_finish(struct iris_batch *batch,
257                           struct iris_implicit_sync *sync)
258 {
259    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
260 
261    for (int i = 0; i < sync->entry_count; i++)
262       iris_syncobj_reference(bufmgr, &sync->entries[i].iris_syncobj, NULL);
263 
264    free(sync->entries);
265    sync->entry_count = 0;
266 }
267 
268 /* Import implicit synchronization data from the batch bos that require
269  * implicit synchronization int our batch buffer so the batch will wait for
270  * these bos to be idle before starting.
271  */
272 static int
iris_implicit_sync_import(struct iris_batch * batch,struct iris_implicit_sync * sync)273 iris_implicit_sync_import(struct iris_batch *batch,
274                           struct iris_implicit_sync *sync)
275 {
276    uint32_t len = xe_batch_submit_external_bo_count(batch);
277 
278    if (!len)
279       return 0;
280 
281    sync->entries = malloc(sizeof(*sync->entries) * len);
282    if (!sync->entries)
283       return -ENOMEM;
284 
285    for (int i = 0; i < batch->exec_count; i++) {
286       struct iris_bo *bo = batch->exec_bos[i];
287 
288       if (!iris_bo_is_real(bo) || !iris_bo_is_external(bo)) {
289          assert(iris_get_backing_bo(bo)->real.prime_fd == -1);
290          continue;
291       }
292 
293       if (bo->real.prime_fd == -1) {
294          fprintf(stderr, "Bo(%s/%i %sported) with prime_fd unset in iris_implicit_sync_import()\n",
295                  bo->name, bo->gem_handle, bo->real.imported ? "im" : "ex");
296          continue;
297       }
298 
299       if (!iris_implicit_sync_add_bo(batch, sync, bo)) {
300          iris_implicit_sync_finish(batch, sync);
301          return -1;
302       }
303    }
304 
305    return 0;
306 }
307 
308 /* Export implicit synchronization data from our batch buffer into the bos
309  * that require implicit synchronization so other clients relying on it can do
310  * implicit synchronization with these bos, which will wait for the batch
311  * buffer we just submitted to signal its syncobj.
312  */
313 static bool
iris_implicit_sync_export(struct iris_batch * batch,struct iris_implicit_sync * sync)314 iris_implicit_sync_export(struct iris_batch *batch,
315                           struct iris_implicit_sync *sync)
316 {
317    int sync_file_fd;
318 
319    if (!iris_batch_syncobj_to_sync_file_fd(batch, &sync_file_fd))
320       return false;
321 
322    for (int i = 0; i < sync->entry_count; i++)
323       iris_bo_import_sync_state(sync->entries[i].bo, sync_file_fd);
324 
325    close(sync_file_fd);
326 
327    return true;
328 }
329 
330 static int
xe_batch_submit(struct iris_batch * batch)331 xe_batch_submit(struct iris_batch *batch)
332 {
333    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
334    struct intel_bind_timeline *bind_timeline = iris_bufmgr_get_bind_timeline(bufmgr);
335    simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
336    struct iris_implicit_sync implicit_sync = {};
337    struct drm_xe_sync *syncs = NULL;
338    unsigned long sync_len;
339    int ret, i;
340 
341    iris_bo_unmap(batch->bo);
342 
343    /* The decode operation may map and wait on the batch buffer, which could
344     * in theory try to grab bo_deps_lock. Let's keep it safe and decode
345     * outside the lock.
346     */
347    if (INTEL_DEBUG(DEBUG_BATCH) &&
348        intel_debug_batch_in_range(batch->ice->frame))
349       iris_batch_decode_batch(batch);
350 
351    simple_mtx_lock(bo_deps_lock);
352 
353    iris_batch_update_syncobjs(batch);
354 
355    ret = iris_implicit_sync_import(batch, &implicit_sync);
356    if (ret)
357       goto error_implicit_sync_import;
358 
359    sync_len = iris_batch_num_fences(batch) + 1 /* vm bind sync */;
360    syncs = calloc(sync_len, sizeof(*syncs));
361    if (!syncs) {
362       ret = -ENOMEM;
363       goto error_no_sync_mem;
364    }
365 
366    i = 0;
367    util_dynarray_foreach(&batch->exec_fences, struct iris_batch_fence, fence) {
368       if (fence->flags & IRIS_BATCH_FENCE_SIGNAL)
369          syncs[i].flags = DRM_XE_SYNC_FLAG_SIGNAL;
370 
371       syncs[i].handle = fence->handle;
372       syncs[i].type = DRM_XE_SYNC_TYPE_SYNCOBJ;
373       i++;
374    }
375 
376    syncs[i].handle = intel_bind_timeline_get_syncobj(bind_timeline);
377    syncs[i].type = DRM_XE_SYNC_TYPE_TIMELINE_SYNCOBJ;
378    syncs[i].timeline_value = intel_bind_timeline_get_last_point(bind_timeline);
379 
380    if ((INTEL_DEBUG(DEBUG_BATCH) &&
381         intel_debug_batch_in_range(batch->ice->frame)) ||
382        INTEL_DEBUG(DEBUG_SUBMIT)) {
383       iris_dump_fence_list(batch);
384       iris_dump_bo_list(batch);
385    }
386 
387    struct drm_xe_exec exec = {
388       .exec_queue_id = batch->xe.exec_queue_id,
389       .num_batch_buffer = 1,
390       .address = batch->exec_bos[0]->address,
391       .syncs = (uintptr_t)syncs,
392       .num_syncs = sync_len,
393    };
394    if (!batch->screen->devinfo->no_hw)
395        ret = intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_XE_EXEC, &exec);
396 
397    if (ret) {
398       ret = -errno;
399       goto error_exec;
400    }
401 
402    if (!iris_implicit_sync_export(batch, &implicit_sync))
403       ret = -1;
404 
405 error_exec:
406    iris_implicit_sync_finish(batch, &implicit_sync);
407 
408    simple_mtx_unlock(bo_deps_lock);
409 
410    free(syncs);
411 
412    for (int i = 0; i < batch->exec_count; i++) {
413       struct iris_bo *bo = batch->exec_bos[i];
414 
415       bo->idle = false;
416       bo->index = -1;
417 
418       iris_get_backing_bo(bo)->idle = false;
419 
420       iris_bo_unreference(bo);
421    }
422 
423    return ret;
424 
425 error_no_sync_mem:
426    iris_implicit_sync_finish(batch, &implicit_sync);
427 error_implicit_sync_import:
428    simple_mtx_unlock(bo_deps_lock);
429    return ret;
430 }
431 
432 static int
xe_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)433 xe_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
434 {
435    if (bo->real.userptr)
436       return 0;
437 
438    struct drm_gem_close close = {
439       .handle = bo->gem_handle,
440    };
441    return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
442 }
443 
444 static uint32_t
xe_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)445 xe_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
446 {
447    /* We return UINT32_MAX, because Xe doesn't create handles for userptrs but
448     * it needs a gem_handle different than 0 so iris_bo_is_real() returns true
449     * for userptr bos.
450     * UINT32_MAX handle here will not conflict with an actual gem handle with
451     * same id as userptr bos are not put to slab or bo cache.
452     */
453    return UINT32_MAX;
454 }
455 
xe_get_backend(void)456 const struct iris_kmd_backend *xe_get_backend(void)
457 {
458    static const struct iris_kmd_backend xe_backend = {
459       .gem_create = xe_gem_create,
460       .gem_create_userptr = xe_gem_create_userptr,
461       .gem_close = xe_gem_close,
462       .gem_mmap = xe_gem_mmap,
463       .gem_vm_bind = xe_gem_vm_bind,
464       .gem_vm_unbind = xe_gem_vm_unbind,
465       .bo_madvise = xe_bo_madvise,
466       .bo_set_caching = xe_bo_set_caching,
467       .batch_check_for_reset = xe_batch_check_for_reset,
468       .batch_submit = xe_batch_submit,
469    };
470    return &xe_backend;
471 }
472