1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23 #include "iris/iris_kmd_backend.h"
24
25 #include <sys/mman.h>
26
27 #include "common/intel_debug_identifier.h"
28 #include "common/intel_gem.h"
29 #include "common/i915/intel_gem.h"
30 #include "dev/intel_debug.h"
31
32 #include "drm-uapi/i915_drm.h"
33
34 #include "iris/iris_bufmgr.h"
35 #include "iris/iris_batch.h"
36 #include "iris/iris_context.h"
37
38 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
39
40 static int
i915_gem_set_domain(struct iris_bufmgr * bufmgr,uint32_t handle,uint32_t read_domains,uint32_t write_domains)41 i915_gem_set_domain(struct iris_bufmgr *bufmgr, uint32_t handle,
42 uint32_t read_domains, uint32_t write_domains)
43 {
44 struct drm_i915_gem_set_domain sd = {
45 .handle = handle,
46 .read_domains = read_domains,
47 .write_domain = write_domains,
48 };
49 return intel_ioctl(iris_bufmgr_get_fd(bufmgr),
50 DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd);
51 }
52
53 static uint32_t
i915_gem_create(struct iris_bufmgr * bufmgr,const struct intel_memory_class_instance ** regions,uint16_t regions_count,uint64_t size,enum iris_heap heap,unsigned alloc_flags)54 i915_gem_create(struct iris_bufmgr *bufmgr,
55 const struct intel_memory_class_instance **regions,
56 uint16_t regions_count, uint64_t size,
57 enum iris_heap heap, unsigned alloc_flags)
58 {
59 const struct intel_device_info *devinfo =
60 iris_bufmgr_get_device_info(bufmgr);
61 if (unlikely(!devinfo->mem.use_class_instance)) {
62 struct drm_i915_gem_create create_legacy = { .size = size };
63
64 assert(regions_count == 1 &&
65 regions[0]->klass == I915_MEMORY_CLASS_SYSTEM);
66
67 /* All new BOs we get from the kernel are zeroed, so we don't need to
68 * worry about that here.
69 */
70 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE,
71 &create_legacy))
72 return 0;
73
74 return create_legacy.handle;
75 }
76
77 struct drm_i915_gem_memory_class_instance i915_regions[2];
78 assert(regions_count <= ARRAY_SIZE(i915_regions));
79 for (uint16_t i = 0; i < regions_count; i++) {
80 i915_regions[i].memory_class = regions[i]->klass;
81 i915_regions[i].memory_instance = regions[i]->instance;
82 }
83
84 struct drm_i915_gem_create_ext create = {
85 .size = size,
86 };
87 struct drm_i915_gem_create_ext_memory_regions ext_regions = {
88 .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
89 .num_regions = regions_count,
90 .regions = (uintptr_t)i915_regions,
91 };
92 intel_i915_gem_add_ext(&create.extensions,
93 I915_GEM_CREATE_EXT_MEMORY_REGIONS,
94 &ext_regions.base);
95
96 if (iris_bufmgr_vram_size(bufmgr) > 0 &&
97 !intel_vram_all_mappable(devinfo) &&
98 heap == IRIS_HEAP_DEVICE_LOCAL_PREFERRED)
99 /* For lmem + smem placements, the NEEDS_CPU_ACCESS flag will avoid a
100 * page fault when the CPU tries to access the BO.
101 * Although it's counterintuitive, we cannot set this flag for
102 * IRIS_HEAP_DEVICE_LOCAL_CPU_VISIBLE_SMALL_BAR because i915 does not
103 * accept that flag for lmem only placements.
104 * When lmem only BOs are accessed by the CPU, i915 will fault and
105 * automatically migrate the BO to the lmem portion that is CPU
106 * accessible.
107 * The CPU_VISIBLE heap is still valuable for other reasons however
108 * (e.g., it tells the functions which calculate the iris_mmap_mode
109 * that it can be mapped).
110 */
111 create.flags |= I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS;
112
113 /* Protected param */
114 struct drm_i915_gem_create_ext_protected_content protected_param = {
115 .flags = 0,
116 };
117 if (alloc_flags & BO_ALLOC_PROTECTED) {
118 intel_i915_gem_add_ext(&create.extensions,
119 I915_GEM_CREATE_EXT_PROTECTED_CONTENT,
120 &protected_param.base);
121 }
122
123 /* Set PAT param */
124 struct drm_i915_gem_create_ext_set_pat set_pat_param = { 0 };
125 if (devinfo->has_set_pat_uapi) {
126 set_pat_param.pat_index = iris_heap_to_pat_entry(devinfo, heap)->index;
127 intel_i915_gem_add_ext(&create.extensions,
128 I915_GEM_CREATE_EXT_SET_PAT,
129 &set_pat_param.base);
130 }
131
132 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_CREATE_EXT,
133 &create))
134 return 0;
135
136 if (iris_bufmgr_vram_size(bufmgr) == 0)
137 /* Calling set_domain() will allocate pages for the BO outside of the
138 * struct mutex lock in the kernel, which is more efficient than waiting
139 * to create them during the first execbuf that uses the BO.
140 */
141 i915_gem_set_domain(bufmgr, create.handle, I915_GEM_DOMAIN_CPU, 0);
142
143 return create.handle;
144 }
145
146 static bool
i915_bo_madvise(struct iris_bo * bo,enum iris_madvice state)147 i915_bo_madvise(struct iris_bo *bo, enum iris_madvice state)
148 {
149 uint32_t i915_state = state == IRIS_MADVICE_WILL_NEED ?
150 I915_MADV_WILLNEED : I915_MADV_DONTNEED;
151 struct drm_i915_gem_madvise madv = {
152 .handle = bo->gem_handle,
153 .madv = i915_state,
154 .retained = 1,
155 };
156
157 intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr), DRM_IOCTL_I915_GEM_MADVISE, &madv);
158
159 return madv.retained;
160 }
161
162 static int
i915_bo_set_caching(struct iris_bo * bo,bool cached)163 i915_bo_set_caching(struct iris_bo *bo, bool cached)
164 {
165 struct drm_i915_gem_caching arg = {
166 .handle = bo->gem_handle,
167 .caching = cached ? I915_CACHING_CACHED : I915_CACHING_NONE,
168 };
169 return intel_ioctl(iris_bufmgr_get_fd(bo->bufmgr),
170 DRM_IOCTL_I915_GEM_SET_CACHING, &arg);
171 }
172
173 static void *
i915_gem_mmap_offset(struct iris_bufmgr * bufmgr,struct iris_bo * bo)174 i915_gem_mmap_offset(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
175 {
176 struct drm_i915_gem_mmap_offset mmap_arg = {
177 .handle = bo->gem_handle,
178 };
179
180 if (iris_bufmgr_get_device_info(bufmgr)->has_local_mem) {
181 /* On discrete memory platforms, we cannot control the mmap caching mode
182 * at mmap time. Instead, it's fixed when the object is created (this
183 * is a limitation of TTM).
184 *
185 * On DG1, our only currently enabled discrete platform, there is no
186 * control over what mode we get. For SMEM, we always get WB because
187 * it's fast (probably what we want) and when the device views SMEM
188 * across PCIe, it's always snooped. The only caching mode allowed by
189 * DG1 hardware for LMEM is WC.
190 */
191 if (iris_heap_is_device_local(bo->real.heap))
192 assert(bo->real.mmap_mode == IRIS_MMAP_WC);
193 else
194 assert(bo->real.mmap_mode == IRIS_MMAP_WB);
195
196 mmap_arg.flags = I915_MMAP_OFFSET_FIXED;
197 } else {
198 /* Only integrated platforms get to select a mmap caching mode here */
199 static const uint32_t mmap_offset_for_mode[] = {
200 [IRIS_MMAP_UC] = I915_MMAP_OFFSET_UC,
201 [IRIS_MMAP_WC] = I915_MMAP_OFFSET_WC,
202 [IRIS_MMAP_WB] = I915_MMAP_OFFSET_WB,
203 };
204 assert(bo->real.mmap_mode != IRIS_MMAP_NONE);
205 assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode));
206 mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode];
207 }
208
209 /* Get the fake offset back */
210 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP_OFFSET,
211 &mmap_arg)) {
212 DBG("%s:%d: Error preparing buffer %d (%s): %s .\n",
213 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
214 return NULL;
215 }
216
217 /* And map it */
218 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
219 iris_bufmgr_get_fd(bufmgr), mmap_arg.offset);
220 if (map == MAP_FAILED) {
221 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
222 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
223 return NULL;
224 }
225
226 return map;
227 }
228
229 static void *
i915_gem_mmap_legacy(struct iris_bufmgr * bufmgr,struct iris_bo * bo)230 i915_gem_mmap_legacy(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
231 {
232 assert(iris_bufmgr_vram_size(bufmgr) == 0);
233 assert(bo->real.mmap_mode == IRIS_MMAP_WB ||
234 bo->real.mmap_mode == IRIS_MMAP_WC);
235
236 struct drm_i915_gem_mmap mmap_arg = {
237 .handle = bo->gem_handle,
238 .size = bo->size,
239 .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0,
240 };
241
242 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_MMAP,
243 &mmap_arg)) {
244 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
245 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno));
246 return NULL;
247 }
248
249 return (void *)(uintptr_t) mmap_arg.addr_ptr;
250 }
251
252 static void *
i915_gem_mmap(struct iris_bufmgr * bufmgr,struct iris_bo * bo)253 i915_gem_mmap(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
254 {
255 assert(iris_bo_is_real(bo));
256
257 if (likely(iris_bufmgr_get_device_info(bufmgr)->has_mmap_offset))
258 return i915_gem_mmap_offset(bufmgr, bo);
259 else
260 return i915_gem_mmap_legacy(bufmgr, bo);
261 }
262
263 static enum pipe_reset_status
i915_batch_check_for_reset(struct iris_batch * batch)264 i915_batch_check_for_reset(struct iris_batch *batch)
265 {
266 struct iris_screen *screen = batch->screen;
267 enum pipe_reset_status status = PIPE_NO_RESET;
268 struct drm_i915_reset_stats stats = { .ctx_id = batch->i915.ctx_id };
269
270 if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
271 DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
272
273 if (stats.batch_active != 0) {
274 /* A reset was observed while a batch from this hardware context was
275 * executing. Assume that this context was at fault.
276 */
277 status = PIPE_GUILTY_CONTEXT_RESET;
278 } else if (stats.batch_pending != 0) {
279 /* A reset was observed while a batch from this context was in progress,
280 * but the batch was not executing. In this case, assume that the
281 * context was not at fault.
282 */
283 status = PIPE_INNOCENT_CONTEXT_RESET;
284 }
285
286 return status;
287 }
288
289 /**
290 * Submit the batch to the GPU via execbuffer2.
291 */
292 static int
i915_batch_submit(struct iris_batch * batch)293 i915_batch_submit(struct iris_batch *batch)
294 {
295 struct iris_bufmgr *bufmgr = batch->screen->bufmgr;
296 simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr);
297
298 iris_bo_unmap(batch->bo);
299
300 struct drm_i915_gem_exec_object2 *validation_list =
301 malloc(batch->exec_count * sizeof(*validation_list));
302
303 size_t sz = (batch->max_gem_handle + 1) * sizeof(int);
304 int *index_for_handle = malloc(sz);
305 memset(index_for_handle, -1, sz);
306
307 unsigned validation_count = 0;
308 for (int i = 0; i < batch->exec_count; i++) {
309 struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]);
310 assert(bo->gem_handle != 0);
311
312 bool written = BITSET_TEST(batch->bos_written, i);
313 int prev_index = index_for_handle[bo->gem_handle];
314 if (prev_index != -1) {
315 if (written)
316 validation_list[prev_index].flags |= EXEC_OBJECT_WRITE;
317 } else {
318 uint32_t flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED;
319 flags |= bo->real.capture ? EXEC_OBJECT_CAPTURE : 0;
320 flags |= bo == batch->screen->workaround_bo ? EXEC_OBJECT_ASYNC : 0;
321 flags |= iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC;
322 flags |= written ? EXEC_OBJECT_WRITE : 0;
323
324 index_for_handle[bo->gem_handle] = validation_count;
325 validation_list[validation_count] =
326 (struct drm_i915_gem_exec_object2) {
327 .handle = bo->gem_handle,
328 .offset = bo->address,
329 .flags = flags,
330 };
331 ++validation_count;
332 }
333 }
334
335 free(index_for_handle);
336
337 /* The decode operation may map and wait on the batch buffer, which could
338 * in theory try to grab bo_deps_lock. Let's keep it safe and decode
339 * outside the lock.
340 */
341 if (INTEL_DEBUG(DEBUG_BATCH) &&
342 intel_debug_batch_in_range(batch->ice->frame))
343 iris_batch_decode_batch(batch);
344
345 simple_mtx_lock(bo_deps_lock);
346
347 iris_batch_update_syncobjs(batch);
348
349 if ((INTEL_DEBUG(DEBUG_BATCH) &&
350 intel_debug_batch_in_range(batch->ice->frame)) ||
351 INTEL_DEBUG(DEBUG_SUBMIT)) {
352 iris_dump_fence_list(batch);
353 iris_dump_bo_list(batch);
354 }
355
356 /* The requirement for using I915_EXEC_NO_RELOC are:
357 *
358 * The addresses written in the objects must match the corresponding
359 * reloc.address which in turn must match the corresponding
360 * execobject.offset.
361 *
362 * Any render targets written to in the batch must be flagged with
363 * EXEC_OBJECT_WRITE.
364 *
365 * To avoid stalling, execobject.offset should match the current
366 * address of that object within the active context.
367 */
368 struct drm_i915_gem_execbuffer2 execbuf = {
369 .buffers_ptr = (uintptr_t) validation_list,
370 .buffer_count = validation_count,
371 .batch_start_offset = 0,
372 /* This must be QWord aligned. */
373 .batch_len = ALIGN(batch->primary_batch_size, 8),
374 .flags = batch->i915.exec_flags |
375 I915_EXEC_NO_RELOC |
376 I915_EXEC_BATCH_FIRST |
377 I915_EXEC_HANDLE_LUT,
378 .rsvd1 = batch->i915.ctx_id, /* rsvd1 is actually the context ID */
379 };
380
381 if (iris_batch_num_fences(batch)) {
382 execbuf.flags |= I915_EXEC_FENCE_ARRAY;
383 execbuf.num_cliprects = iris_batch_num_fences(batch);
384 execbuf.cliprects_ptr =
385 (uintptr_t)util_dynarray_begin(&batch->exec_fences);
386 }
387
388 int ret = 0;
389 if (!batch->screen->devinfo->no_hw) {
390 do {
391 ret = intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
392 } while (ret && errno == ENOMEM);
393
394 if (ret)
395 ret = -errno;
396 }
397
398 simple_mtx_unlock(bo_deps_lock);
399
400 for (int i = 0; i < batch->exec_count; i++) {
401 struct iris_bo *bo = batch->exec_bos[i];
402
403 bo->idle = false;
404 bo->index = -1;
405
406 iris_get_backing_bo(bo)->idle = false;
407
408 iris_bo_unreference(bo);
409 }
410
411 free(validation_list);
412
413 return ret;
414 }
415
416 static bool
i915_gem_vm_bind(struct iris_bo * bo)417 i915_gem_vm_bind(struct iris_bo *bo)
418 {
419 /*
420 * i915 does not support VM_BIND yet. The binding operation happens at
421 * submission when we supply BO handle & offset in the execbuffer list.
422 */
423 return true;
424 }
425
426 static bool
i915_gem_vm_unbind(struct iris_bo * bo)427 i915_gem_vm_unbind(struct iris_bo *bo)
428 {
429 return true;
430 }
431
432 static int
i915_gem_close(struct iris_bufmgr * bufmgr,struct iris_bo * bo)433 i915_gem_close(struct iris_bufmgr *bufmgr, struct iris_bo *bo)
434 {
435 struct drm_gem_close close = {
436 .handle = bo->gem_handle,
437 };
438 return intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
439 }
440
441 static uint32_t
i915_gem_create_userptr(struct iris_bufmgr * bufmgr,void * ptr,uint64_t size)442 i915_gem_create_userptr(struct iris_bufmgr *bufmgr, void *ptr, uint64_t size)
443 {
444 const struct intel_device_info *devinfo = iris_bufmgr_get_device_info(bufmgr);
445 struct drm_i915_gem_userptr arg = {
446 .user_ptr = (uintptr_t)ptr,
447 .user_size = size,
448 .flags = devinfo->has_userptr_probe ? I915_USERPTR_PROBE : 0,
449 };
450 if (intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_I915_GEM_USERPTR, &arg))
451 return 0;
452
453 if (!devinfo->has_userptr_probe) {
454 /* Check the buffer for validity before we try and use it in a batch */
455 if (i915_gem_set_domain(bufmgr, arg.handle, I915_GEM_DOMAIN_CPU, 0)) {
456 struct drm_gem_close close = {
457 .handle = arg.handle,
458 };
459 intel_ioctl(iris_bufmgr_get_fd(bufmgr), DRM_IOCTL_GEM_CLOSE, &close);
460 return 0;
461 }
462 }
463
464 return arg.handle;
465 }
466
i915_get_backend(void)467 const struct iris_kmd_backend *i915_get_backend(void)
468 {
469 static const struct iris_kmd_backend i915_backend = {
470 .gem_create = i915_gem_create,
471 .gem_create_userptr = i915_gem_create_userptr,
472 .gem_close = i915_gem_close,
473 .bo_madvise = i915_bo_madvise,
474 .bo_set_caching = i915_bo_set_caching,
475 .gem_mmap = i915_gem_mmap,
476 .batch_check_for_reset = i915_batch_check_for_reset,
477 .batch_submit = i915_batch_submit,
478 .gem_vm_bind = i915_gem_vm_bind,
479 .gem_vm_unbind = i915_gem_vm_unbind,
480 };
481 return &i915_backend;
482 }
483