xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/iris/i915/iris_kmd_backend.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "iris/iris_kmd_backend.h"
24 
25 #include <sys/mman.h>
26 
27 #include "common/intel_debug_identifier.h"
28 #include "common/intel_gem.h"
29 #include "common/i915/intel_gem.h"
30 #include "dev/intel_debug.h"
31 
32 #include "drm-uapi/i915_drm.h"
33 
34 #include "iris/iris_bufmgr.h"
35 #include "iris/iris_batch.h"
36 #include "iris/iris_context.h"
37 
38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
39 
40 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)41 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
42                     uint32_t read_domains, uint32_t write_domains)
43 {
44    struct drm_i915_gem_set_domain sd = {
45       .handle = handle,
46       .read_domains = read_domains,
47       .write_domain = write_domains,
48    };
49    return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
50                       DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
51 }
52 
53 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)54 i915_gem_create(struct iris_bufmgr *bufmgr,
55                 const struct intel_memory_class_instance **regions,
56                 uint16_t regions_count, uint64_t size,
57                 enum iris_heap heap, unsigned alloc_flags)
58 {
59    const struct intel_device_info *devinfo =
60       iris_bufmgr_get_device_info(bufmgr);
61    if (unlikely(!devinfo->mem.use_class_instance)) {
62       struct drm_i915_gem_create create_legacy = { .size = size };
63 
64       assert(regions_count == 1 &&
65              regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
66 
67       /* All new BOs we get from the kernel are zeroed, so we don't need to
68        * worry about that here.
69        */
70       if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
71                       &create_legacy))
72          return 0;
73 
74       return create_legacy.handle;
75    }
76 
77    struct drm_i915_gem_memory_class_instance i915_regions[2];
78    assert(regions_count <= ARRAY_SIZE(i915_regions));
79    for (uint16_t i = 0; i < regions_count; i++) {
80       i915_regions[i].memory_class = regions[i]->klass;
81       i915_regions[i].memory_instance = regions[i]->instance;
82    }
83 
84    struct drm_i915_gem_create_ext create = {
85       .size = size,
86    };
87    struct drm_i915_gem_create_ext_memory_regions ext_regions = {
88       .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
89       .num_regions = regions_count,
90       .regions = (uintptr_t)i915_regions,
91    };
92    intel_i915_gem_add_ext(&create.extensions,
93                           I915_GEM_CREATE_EXT_MEMORY_REGIONS,
94                           &ext_regions.base);
95 
96    if (iris_bufmgr_vram_size(bufmgr) > 0 &&
97        !intel_vram_all_mappable(devinfo) &&
98        heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
99       /* For lmem + smem placements, the NEEDS_CPU_ACCESS flag will avoid a
100        * page fault when the CPU tries to access the BO.
101        * Although it's counterintuitive, we cannot set this flag for
102        * IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR because i915 does not
103        * accept that flag for lmem only placements.
104        * When lmem only BOs are accessed by the CPU, i915 will fault and
105        * automatically migrate the BO to the lmem portion that is CPU
106        * accessible.
107        * The CPU_VISIBLE heap is still valuable for other reasons however
108        * (e.g., it tells the functions which calculate the iris_mmap_mode
109        * that it can be mapped).
110        */
111       create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
112 
113    /* Protected param */
114    struct drm_i915_gem_create_ext_protected_content protected_param = {
115       .flags = 0,
116    };
117    if (alloc_flags & BO_ALLOC_PROTECTED) {
118       intel_i915_gem_add_ext(&create.extensions,
119                              I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
120                              &protected_param.base);
121    }
122 
123    /* Set PAT param */
124    struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
125    if (devinfo->has_set_pat_uapi) {
126       set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap)->index;
127       intel_i915_gem_add_ext(&create.extensions,
128                              I915_GEM_CREATE_EXT_SET_PAT,
129                              &set_pat_param.base);
130    }
131 
132    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
133                    &create))
134       return 0;
135 
136    if (iris_bufmgr_vram_size(bufmgr) == 0)
137       /* Calling set_domain() will allocate pages for the BO outside of the
138        * struct mutex lock in the kernel, which is more efficient than waiting
139        * to create them during the first execbuf that uses the BO.
140        */
141       i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
142 
143    return create.handle;
144 }
145 
146 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)147 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
148 {
149    uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
150                                   I915_MADV_WILLNEED : I915_MADV_DONTNEED;
151    struct drm_i915_gem_madvise madv = {
152       .handle = bo->gem_handle,
153       .madv = i915_state,
154       .retained = 1,
155    };
156 
157    intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
158 
159    return madv.retained;
160 }
161 
162 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)163 i915_bo_set_caching(struct iris_bo *bo, bool cached)
164 {
165    struct drm_i915_gem_caching arg = {
166       .handle = bo->gem_handle,
167       .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
168    };
169    return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
170                       DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
171 }
172 
173 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)174 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
175 {
176    struct drm_i915_gem_mmap_offset mmap_arg = {
177       .handle = bo->gem_handle,
178    };
179 
180    if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
181       /* On discrete memory platforms, we cannot control the mmap caching mode
182        * at mmap time.  Instead, it's fixed when the object is created (this
183        * is a limitation of TTM).
184        *
185        * On DG1, our only currently enabled discrete platform, there is no
186        * control over what mode we get.  For SMEM, we always get WB because
187        * it's fast (probably what we want) and when the device views SMEM
188        * across PCIe, it's always snooped.  The only caching mode allowed by
189        * DG1 hardware for LMEM is WC.
190        */
191       if (iris_heap_is_device_local(bo->real.heap))
192          assert(bo->real.mmap_mode == IRIS_MMAP_WC);
193       else
194          assert(bo->real.mmap_mode == IRIS_MMAP_WB);
195 
196       mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
197    } else {
198       /* Only integrated platforms get to select a mmap caching mode here */
199       static const uint32_t mmap_offset_for_mode[] = {
200          [IRIS_MMAP_UC]    = I915_MMAP_OFFSET_UC,
201          [IRIS_MMAP_WC]    = I915_MMAP_OFFSET_WC,
202          [IRIS_MMAP_WB]    = I915_MMAP_OFFSET_WB,
203       };
204       assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
205       assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
206       mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
207    }
208 
209    /* Get the fake offset back */
210    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
211                    &mmap_arg)) {
212       DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
213           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
214       return NULL;
215    }
216 
217    /* And map it */
218    void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
219                     iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
220    if (map == MAP_FAILED) {
221       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
222           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
223       return NULL;
224    }
225 
226    return map;
227 }
228 
229 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)230 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
231 {
232    assert(iris_bufmgr_vram_size(bufmgr) == 0);
233    assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
234           bo->real.mmap_mode == IRIS_MMAP_WC);
235 
236    struct drm_i915_gem_mmap mmap_arg = {
237       .handle = bo->gem_handle,
238       .size = bo->size,
239       .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
240    };
241 
242    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
243                    &mmap_arg)) {
244       DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
245           __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
246       return NULL;
247    }
248 
249    return (void *)(uintptr_t) mmap_arg.addr_ptr;
250 }
251 
252 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)253 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
254 {
255    assert(iris_bo_is_real(bo));
256 
257    if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
258       return i915_gem_mmap_offset(bufmgr, bo);
259    else
260       return i915_gem_mmap_legacy(bufmgr, bo);
261 }
262 
263 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)264 i915_batch_check_for_reset(struct iris_batch *batch)
265 {
266    struct iris_screen *screen = batch->screen;
267    enum pipe_reset_status status = PIPE_NO_RESET;
268    struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
269 
270    if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
271       DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
272 
273    if (stats.batch_active != 0) {
274       /* A reset was observed while a batch from this hardware context was
275        * executing.  Assume that this context was at fault.
276        */
277       status = PIPE_GUILTY_CONTEXT_RESET;
278    } else if (stats.batch_pending != 0) {
279       /* A reset was observed while a batch from this context was in progress,
280        * but the batch was not executing.  In this case, assume that the
281        * context was not at fault.
282        */
283       status = PIPE_INNOCENT_CONTEXT_RESET;
284    }
285 
286    return status;
287 }
288 
289 /**
290  * Submit the batch to the GPU via execbuffer2.
291  */
292 static int
i915_batch_submit(struct iris_batch * batch)293 i915_batch_submit(struct iris_batch *batch)
294 {
295    struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
296    simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
297 
298    iris_bo_unmap(batch->bo);
299 
300    struct drm_i915_gem_exec_object2 *validation_list =
301       malloc(batch->exec_count * sizeof(*validation_list));
302 
303    size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
304    int *index_for_handle = malloc(sz);
305    memset(index_for_handle, -1, sz);
306 
307    unsigned validation_count = 0;
308    for (int i = 0; i < batch->exec_count; i++) {
309       struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
310       assert(bo->gem_handle != 0);
311 
312       bool written = BITSET_TEST(batch->bos_written, i);
313       int prev_index = index_for_handle[bo->gem_handle];
314       if (prev_index != -1) {
315          if (written)
316             validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
317       } else {
318          uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
319          flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
320          flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
321          flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
322          flags |= written ? EXEC_OBJECT_WRITE : 0;
323 
324          index_for_handle[bo->gem_handle] = validation_count;
325          validation_list[validation_count] =
326             (struct drm_i915_gem_exec_object2) {
327                .handle = bo->gem_handle,
328                .offset = bo->address,
329                .flags  = flags,
330             };
331          ++validation_count;
332       }
333    }
334 
335    free(index_for_handle);
336 
337    /* The decode operation may map and wait on the batch buffer, which could
338     * in theory try to grab bo_deps_lock. Let's keep it safe and decode
339     * outside the lock.
340     */
341    if (INTEL_DEBUG(DEBUG_BATCH) &&
342        intel_debug_batch_in_range(batch->ice->frame))
343       iris_batch_decode_batch(batch);
344 
345    simple_mtx_lock(bo_deps_lock);
346 
347    iris_batch_update_syncobjs(batch);
348 
349    if ((INTEL_DEBUG(DEBUG_BATCH) &&
350         intel_debug_batch_in_range(batch->ice->frame)) ||
351        INTEL_DEBUG(DEBUG_SUBMIT)) {
352       iris_dump_fence_list(batch);
353       iris_dump_bo_list(batch);
354    }
355 
356    /* The requirement for using I915_EXEC_NO_RELOC are:
357     *
358     *   The addresses written in the objects must match the corresponding
359     *   reloc.address which in turn must match the corresponding
360     *   execobject.offset.
361     *
362     *   Any render targets written to in the batch must be flagged with
363     *   EXEC_OBJECT_WRITE.
364     *
365     *   To avoid stalling, execobject.offset should match the current
366     *   address of that object within the active context.
367     */
368    struct drm_i915_gem_execbuffer2 execbuf = {
369       .buffers_ptr = (uintptr_t) validation_list,
370       .buffer_count = validation_count,
371       .batch_start_offset = 0,
372       /* This must be QWord aligned. */
373       .batch_len = ALIGN(batch->primary_batch_size, 8),
374       .flags = batch->i915.exec_flags |
375                I915_EXEC_NO_RELOC |
376                I915_EXEC_BATCH_FIRST |
377                I915_EXEC_HANDLE_LUT,
378       .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
379    };
380 
381    if (iris_batch_num_fences(batch)) {
382       execbuf.flags |= I915_EXEC_FENCE_ARRAY;
383       execbuf.num_cliprects = iris_batch_num_fences(batch);
384       execbuf.cliprects_ptr =
385          (uintptr_t)util_dynarray_begin(&batch->exec_fences);
386    }
387 
388    int ret = 0;
389    if (!batch->screen->devinfo->no_hw) {
390       do {
391          ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
392       } while (ret && errno == ENOMEM);
393 
394       if (ret)
395     ret = -errno;
396    }
397 
398    simple_mtx_unlock(bo_deps_lock);
399 
400    for (int i = 0; i < batch->exec_count; i++) {
401       struct iris_bo *bo = batch->exec_bos[i];
402 
403       bo->idle = false;
404       bo->index = -1;
405 
406       iris_get_backing_bo(bo)->idle = false;
407 
408       iris_bo_unreference(bo);
409    }
410 
411    free(validation_list);
412 
413    return ret;
414 }
415 
416 static bool
i915_gem_vm_bind(struct iris_bo * bo)417 i915_gem_vm_bind(struct iris_bo *bo)
418 {
419    /*
420     * i915 does not support VM_BIND yet. The binding operation happens at
421     * submission when we supply BO handle & offset in the execbuffer list.
422     */
423    return true;
424 }
425 
426 static bool
i915_gem_vm_unbind(struct iris_bo * bo)427 i915_gem_vm_unbind(struct iris_bo *bo)
428 {
429    return true;
430 }
431 
432 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)433 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
434 {
435    struct drm_gem_close close = {
436       .handle = bo->gem_handle,
437    };
438    return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
439 }
440 
441 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)442 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
443 {
444    const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
445    struct drm_i915_gem_userptr arg = {
446       .user_ptr = (uintptr_t)ptr,
447       .user_size = size,
448       .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
449    };
450    if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
451       return 0;
452 
453    if (!devinfo->has_userptr_probe) {
454       /* Check the buffer for validity before we try and use it in a batch */
455       if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
456          struct drm_gem_close close = {
457                .handle = arg.handle,
458          };
459          intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
460          return 0;
461       }
462    }
463 
464    return arg.handle;
465 }
466 
i915_get_backend(void)467 const struct iris_kmd_backend *i915_get_backend(void)
468 {
469    static const struct iris_kmd_backend i915_backend = {
470       .gem_create = i915_gem_create,
471       .gem_create_userptr = i915_gem_create_userptr,
472       .gem_close = i915_gem_close,
473       .bo_madvise = i915_bo_madvise,
474       .bo_set_caching = i915_bo_set_caching,
475       .gem_mmap = i915_gem_mmap,
476       .batch_check_for_reset = i915_batch_check_for_reset,
477       .batch_submit = i915_batch_submit,
478       .gem_vm_bind = i915_gem_vm_bind,
479       .gem_vm_unbind = i915_gem_vm_unbind,
480    };
481    return &i915_backend;
482 }
483