1 /*
2 * Copyright © 2023 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Xu, Zhengguo <[email protected]>
25 */
26
27 #ifdef HAVE_LIBGEN_H
28 #include <libgen.h>
29 #endif
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34 #include <signal.h>
35 #include <getopt.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <sys/wait.h>
39 #include <sys/types.h>
40 #include <sys/syscall.h>
41 #include <sys/utsname.h>
42 #include <termios.h>
43 #ifndef ETIME
44 #define ETIME ETIMEDOUT
45 #endif
46
47 #include <map>
48 #include <vector>
49 #include <queue>
50 #include <list>
51 #include <mutex>
52 #include <shared_mutex>
53 #include <algorithm>
54
55 #ifdef HAVE_VALGRIND
56 #include <valgrind/valgrind.h>
57 #include <valgrind/memcheck.h>
58
59 #define VG(x) x
60 #else
61 #define VG(x) do {} while (0)
62 #endif
63
64 #ifdef HAVE_CONFIG_H
65 #include "config.h"
66 #endif
67 #include "mos_bufmgr_api.h"
68 #include "mos_util_debug.h"
69 #include "intel_hwconfig_types.h"
70 #include "xf86drm.h"
71 #include "mos_vma.h"
72 #include "libdrm_lists.h"
73 #include "mos_bufmgr_xe.h"
74 #include "mos_synchronization_xe.h"
75 #include "mos_utilities.h"
76 #include "mos_bufmgr_util_debug.h"
77 #include "media_user_setting_value.h"
78 #include "linux_system_info.h"
79 #include "mos_oca_defs_specific.h"
80
81 //These two struct used by mos_bufmgr_priv.h
82 typedef struct MOS_OCA_EXEC_LIST_INFO mos_oca_exec_list_info;
83 //struct MEDIA_SYSTEM_INFO;
84
85 #include "mos_bufmgr_priv.h"
86
87 #define PAGE_SIZE_4K (1ull << 12)
88 #define MAX(a, b) ((a) > (b) ? (a) : (b))
89
90 //mos_xe_mem_class currently used as index of default_alignment
91 enum mos_xe_mem_class
92 {
93 MOS_XE_MEM_CLASS_SYSMEM = 0, //For DRM_XE_MEM_REGION_CLASS_SYSMEM
94 MOS_XE_MEM_CLASS_VRAM, //For DRM_XE_MEM_REGION_CLASS_VRAM
95 MOS_XE_MEM_CLASS_MAX
96 };
97
98 struct mos_xe_context {
99 struct mos_linux_context ctx;
100
101 /**
102 * Always keep the latest avaiable timeline index for
103 * such execution's fence out point.
104 */
105 struct mos_xe_dep* timeline_dep;
106
107 /**
108 * The UMD's dummy exec_queue id for exec_queue ctx.
109 */
110 uint32_t dummy_exec_queue_id;
111
112 /**
113 * Indicate to the ctx width.
114 */
115 uint8_t ctx_width;
116 /**
117 * Indicate to num placements when creating exec_queue.
118 */
119 uint8_t num_placements;
120 /**
121 * Indicate to engine class used to create exec_queue.
122 */
123 uint16_t engine_class;
124 /**
125 * Indicate to engine capability of queried exec_queue.
126 */
127 uint64_t engine_caps;
128 /**
129 * Indicate to creation flags, current value should be always zero.
130 */
131 uint32_t flags;
132 /**
133 * Indicate whether it is protected ctx.
134 */
135 bool is_protected;
136
137 /**
138 * Indicate to exec_queue reset count on this context;
139 * Note, this count depends on context restore, if uplayer tries to query
140 * reset statue before context restore, this value may be incorrect.
141 */
142 uint32_t reset_count;
143 };
144
145 typedef struct mos_xe_device {
146 /**
147 * Note: we agree that hw_config[0] points to the number of hw config in total
148 * And hw config data starts from hw_config[1]
149 */
150 uint32_t *hw_config = nullptr;
151 struct drm_xe_query_config *config = nullptr;
152 struct drm_xe_query_engines *engines = nullptr;
153 struct drm_xe_query_mem_regions *mem_regions = nullptr;
154 struct drm_xe_query_gt_list *gt_list = nullptr;
155
156 /**
157 * Note: we agree here that uc_versions[0] for guc version and uc_versions[1] for huc version
158 */
159 struct drm_xe_query_uc_fw_version uc_versions[UC_TYPE_MAX];
160 } mos_xe_device;
161
162 typedef struct mos_xe_bufmgr_gem {
163 struct mos_bufmgr bufmgr;
164
165 atomic_t ref_count;
166
167 int fd;
168
169 std::recursive_mutex m_lock;
170
171 drmMMListHead managers;
172 drmMMListHead named;
173
174 mos_vma_heap vma_heap[MEMZONE_COUNT];
175
176 bool object_capture_disabled; // Note: useless on xe and remove it in furture.
177
178 #define MEM_PROFILER_BUFFER_SIZE 256
179 char mem_profiler_buffer[MEM_PROFILER_BUFFER_SIZE];
180 char* mem_profiler_path;
181 int mem_profiler_fd;
182
183 uint32_t gt_id;
184
185 /**
186 * This RW lock is used for avoid reading or writing the same sync obj in KMD.
187 * Reading sync obj ioctl: exec and syncobj wait.
188 * Writing sync obj ioctl: reset sync obj, destroy sync obj and create sync obj.
189 */
190 std::shared_timed_mutex sync_obj_rw_lock;
191
192 /**
193 * Save the pair of UMD dummy exec_queue id and ctx pointer.
194 */
195 std::map<uint32_t, struct mos_xe_context*> global_ctx_info;
196
197 uint32_t vm_id;
198
199 /**
200 * Everything queried from kmd that indicates to hw infomation.
201 */
202 struct mos_xe_device xe_device;
203
204 //Note: DON't put these fields in xe_device
205 bool has_vram;
206 uint8_t va_bits;
207 /** bitmask of all memory regions */
208 uint64_t mem_regions_mask;
209 /** @default_alignment: safe alignment regardless region location */
210 uint32_t default_alignment[MOS_XE_MEM_CLASS_MAX] = {PAGE_SIZE_4K, PAGE_SIZE_4K};
211 //End of Note
212
213 /**
214 * Indicates whether gpu-gpu and cpu-gpu synchronization is disabled.
215 * This is mainly for debug purpose, and synchronizarion should be always enabled by default.
216 * It could be disabled by env INTEL_SYNCHRONIZATION_DISABLE.
217 */
218 bool is_disable_synchronization;
219
220 /** indicate to exec_queue property of timeslice */
221 #define EXEC_QUEUE_TIMESLICE_DEFAULT -1
222 #define EXEC_QUEUE_TIMESLICE_MAX 100000 //100ms
223 int32_t exec_queue_timeslice;
224 } mos_xe_bufmgr_gem;
225
226 typedef struct mos_xe_exec_bo {
227 /** indicate to real exec bo*/
228 struct mos_linux_bo *bo;
229
230 /**
231 * Save read, write flags etc.
232 * Two flags defined here: EXEC_OBJECT_READ_XE and EXEC_OBJECT_WRITE_XE.
233 * Whether this bo needs exec sync depends on this flags.
234 */
235 uint32_t flags;
236 } mos_xe_exec_bo;
237
238 typedef struct mos_xe_bo_gem {
239 /**
240 * Maximun size for bo name
241 */
242 #define MAX_NAME_SIZE 128
243
244 struct mos_linux_bo bo;
245
246 /**
247 * Reference count
248 */
249 atomic_t ref_count;
250 /**
251 * Map count when map bo is called
252 */
253 atomic_t map_count;
254
255 //Note7: unify gem_handle and bo.handle by deleting this one; Refine mos_linux_bo.handle to typt of uint32_t
256 /**
257 * Bo handle allocared from drm
258 * Note: conbine with bo.handle to use same one.
259 */
260 uint32_t gem_handle;
261 /**
262 * Save bo name, this is for debug usage;
263 * Suggest giving bo name when allocating bo.
264 */
265 char name[MAX_NAME_SIZE];
266
267 /**
268 *
269 * List contains prime fd'd objects
270 */
271 drmMMListHead name_list;
272
273 /**
274 * Mapped address for the buffer, saved across map/unmap cycles
275 */
276 void *mem_virtual;
277
278 /**
279 * Boolean of whether this buffer was allocated with userptr
280 */
281 bool is_userptr;
282
283 /**
284 * Memory region on created the surfaces for local/system memory;
285 * This field only indicates to memory region type, it not memory region instance.
286 */
287 int mem_region;
288
289 /**
290 * We should always get the syncobj handle from the bo handle by bellow 4 steps in each time:
291 * 1. get the prime_handle from bo.handle
292 * 2. get syncfile fd from prime_fd
293 * 3. get syncobj_handle from syncfile by
294 * 4. close prime_fd and syncfile fd.
295 *
296 * If umd wants external process to sync between them, umd should always import its batch
297 * syncobj handle into each external bo's dma sync buffer.
298 *
299 * Boolean of whether this buffer is imported from external
300 */
301 bool is_imported;
302 /**
303 * @cpu_caching: The CPU caching mode to select for this object. If
304 * mmaping the object the mode selected here will also be used.
305 *
306 * Supported values:
307 *
308 * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
309 * caching. On iGPU this can't be used for scanout surfaces. Currently
310 * not allowed for objects placed in VRAM.
311 *
312 * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
313 * is uncached. Scanout surfaces should likely use this. All objects
314 * that can be placed in VRAM must use this.
315 */
316 uint16_t cpu_caching;
317
318 /**
319 * @pat_index: The platform defined @pat_index to use for this mapping.
320 * The index basically maps to some predefined memory attributes,
321 * including things like caching, coherency, compression etc. The exact
322 * meaning of the pat_index is platform specific. When the KMD sets up
323 * the binding the index here is encoded into the ppGTT PTE.
324 *
325 * For coherency the @pat_index needs to be at least 1way coherent when
326 * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
327 * will extract the coherency mode from the @pat_index and reject if
328 * there is a mismatch (see note below for pre-MTL platforms).
329 *
330 * Note: On pre-MTL platforms there is only a caching mode and no
331 * explicit coherency mode, but on such hardware there is always a
332 * shared-LLC (or is dgpu) so all GT memory accesses are coherent with
333 * CPU caches even with the caching mode set as uncached. It's only the
334 * display engine that is incoherent (on dgpu it must be in VRAM which
335 * is always mapped as WC on the CPU). However to keep the uapi somewhat
336 * consistent with newer platforms the KMD groups the different cache
337 * levels into the following coherency buckets on all pre-MTL platforms:
338 *
339 * ppGTT UC -> COH_NONE
340 * ppGTT WC -> COH_NONE
341 * ppGTT WT -> COH_NONE
342 * ppGTT WB -> COH_AT_LEAST_1WAY
343 *
344 * In practice UC/WC/WT should only ever used for scanout surfaces on
345 * such platforms (or perhaps in general for dma-buf if shared with
346 * another device) since it is only the display engine that is actually
347 * incoherent. Everything else should typically use WB given that we
348 * have a shared-LLC. On MTL+ this completely changes and the HW
349 * defines the coherency mode as part of the @pat_index, where
350 * incoherent GT access is possible.
351 *
352 * Note: For userptr and externally imported dma-buf the kernel expects
353 * either 1WAY or 2WAY for the @pat_index.
354 */
355 uint16_t pat_index;
356
357 /**
358 * Boolean of whether this buffer is exported to external
359 */
360 bool is_exported;
361
362 /**
363 * For cmd bo, it has an exec bo list which saves all exec bo in it.
364 * Uplayer caller should alway update this list before exec submission and clear the list after exec submission.
365 */
366 std::map<uintptr_t, struct mos_xe_exec_bo> exec_list;
367
368 #define INVALID_EXEC_QUEUE_ID -1
369 /**
370 * Save last dummy write exec_queue id.
371 * Init this field as INVALID_EXEC_QUEUE_ID at begining.
372 */
373 uint32_t last_exec_write_exec_queue;
374
375 /**
376 * Save last dummy read exec_queue id.
377 * Init this field as INVALID_EXEC_QUEUE_ID at begining.
378 */
379 uint32_t last_exec_read_exec_queue;
380
381 /**
382 * Read dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
383 * This map saves read deps of this bo on all exec exec_queue;
384 * Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
385 * Refer to exec call to get more details.
386 */
387 std::map<uint32_t, struct mos_xe_bo_dep> read_deps;
388
389 /**
390 * Write dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
391 * This map saves write deps of this bo on all exec exec_queue;
392 * Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
393 * Refer to exec call to get more details.
394 */
395 std::map<uint32_t, struct mos_xe_bo_dep> write_deps;
396
397 } mos_xe_bo_gem;
398
399 struct mos_xe_external_bo_info {
400 /**
401 * syncobj handle created by umd to import external bo syncfile
402 */
403 int syncobj_handle;
404 /**
405 * prime fd export from external bo handle
406 */
407 int prime_fd;
408 };
409
410 #define MOS_UNIMPLEMENT(param) (void)(param)
411
412 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
413 static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
414
415 static void mos_bo_free_xe(struct mos_linux_bo *bo);
416 static int mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine);
417 int mos_query_engines_xe(struct mos_bufmgr *bufmgr,
418 __u16 engine_class,
419 __u64 caps,
420 unsigned int *nengine,
421 void *engine_map);
422 static void mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo);
423
424 static struct mos_xe_bufmgr_gem *
mos_bufmgr_gem_find(int fd)425 mos_bufmgr_gem_find(int fd)
426 {
427 struct mos_xe_bufmgr_gem *bufmgr_gem;
428
429 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
430 if (bufmgr_gem->fd == fd) {
431 atomic_inc(&bufmgr_gem->ref_count);
432 return bufmgr_gem;
433 }
434 }
435
436 return nullptr;
437 }
438
439 #define MOS_DRM_CHK_XE_DEV(xe_dev, info, query_func, retval) \
440 MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev, retval); \
441 if (xe_dev->info == nullptr) \
442 { \
443 xe_dev->info = query_func(fd); \
444 MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev->info, retval); \
445 }
446
447 static struct drm_xe_query_gt_list *
__mos_query_gt_list_xe(int fd)448 __mos_query_gt_list_xe(int fd)
449 {
450 int ret = 0;
451 struct drm_xe_query_gt_list *gt_list;
452 struct drm_xe_device_query query;
453 memclear(query);
454 query.query = DRM_XE_DEVICE_QUERY_GT_LIST;
455
456 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
457 &query);
458 if (ret || !query.size)
459 {
460 return nullptr;
461 }
462
463 gt_list = (drm_xe_query_gt_list *)calloc(1, query.size);
464 MOS_DRM_CHK_NULL_RETURN_VALUE(gt_list, nullptr);
465
466 query.data = (uintptr_t)(gt_list);
467 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
468 &query);
469 if (ret || !query.size || 0 == gt_list->num_gt)
470 {
471 MOS_XE_SAFE_FREE(gt_list);
472 return nullptr;
473 }
474
475 return gt_list;
476 }
477
__mos_query_mem_regions_instance_mask_xe(struct mos_bufmgr * bufmgr)478 static uint32_t __mos_query_mem_regions_instance_mask_xe(struct mos_bufmgr *bufmgr)
479 {
480 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
481 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
482 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
483 int fd = bufmgr_gem->fd;
484 uint64_t __memory_regions = 0;
485
486 MOS_DRM_CHK_XE_DEV(dev, gt_list, __mos_query_gt_list_xe, 0)
487
488 struct drm_xe_query_gt_list *gt_list = dev->gt_list;
489 for (int i = 0; i < gt_list->num_gt; i++) {
490 /**
491 * Note: __memory_regions is the mem region instance mask on all tiles and gts
492 */
493 __memory_regions |= gt_list->gt_list[i].near_mem_regions |
494 gt_list->gt_list[i].far_mem_regions;
495 }
496
497 bufmgr_gem->mem_regions_mask = __memory_regions;
498
499 return __memory_regions;
500 }
501
502 static struct drm_xe_query_mem_regions *
__mos_query_mem_regions_xe(int fd)503 __mos_query_mem_regions_xe(int fd)
504 {
505 int ret = 0;
506 struct drm_xe_query_mem_regions *mem_regions;
507 struct drm_xe_device_query query;
508 memclear(query);
509 query.query = DRM_XE_DEVICE_QUERY_MEM_REGIONS;
510
511 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
512 &query);
513 if (ret || !query.size)
514 {
515 return nullptr;
516 }
517
518 mem_regions = (drm_xe_query_mem_regions *)calloc(1, query.size);
519 MOS_DRM_CHK_NULL_RETURN_VALUE(mem_regions, nullptr);
520
521 query.data = (uintptr_t)(mem_regions);
522 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
523 if (ret || !query.size || 0 == mem_regions->num_mem_regions)
524 {
525 MOS_XE_SAFE_FREE(mem_regions);
526 return nullptr;
527 }
528
529 return mem_regions;
530 }
531
__mos_query_vram_region_count_xe(struct mos_xe_device * dev,int fd)532 uint8_t __mos_query_vram_region_count_xe(struct mos_xe_device *dev, int fd)
533 {
534 uint8_t vram_regions = 0;
535
536 MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, 0)
537
538 struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
539 for (int i =0; i < mem_regions->num_mem_regions; i++)
540 {
541 if (mem_regions->mem_regions[i].mem_class == DRM_XE_MEM_REGION_CLASS_VRAM)
542 {
543 vram_regions++;
544 }
545 }
546
547 return vram_regions;
548 }
549
mos_force_gt_reset_xe(int fd,int gt_id)550 int mos_force_gt_reset_xe(int fd, int gt_id)
551 {
552 char reset_string[128];
553
554 sprintf(reset_string, "cat /sys/kernel/debug/dri/0/gt%d/force_reset", gt_id);
555 return system(reset_string);
556 }
557
558 static struct drm_xe_query_config *
__mos_query_config_xe(int fd)559 __mos_query_config_xe(int fd)
560 {
561 struct drm_xe_query_config *config;
562 struct drm_xe_device_query query;
563 int ret = 0;
564
565 memclear(query);
566 query.query = DRM_XE_DEVICE_QUERY_CONFIG;
567 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
568 if (ret || !query.size)
569 {
570 return nullptr;
571 }
572
573 config = (drm_xe_query_config *) malloc(query.size);
574 if (config != nullptr)
575 {
576 memset(config, 0, query.size);
577 }
578 else
579 {
580 MOS_DRM_ASSERTMESSAGE("malloc config failed");
581 return nullptr;
582 }
583
584 query.data = (uintptr_t)config;
585 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
586 if (ret || !query.size || 0 == config->num_params)
587 {
588 MOS_XE_SAFE_FREE(config);
589 return nullptr;
590 }
591
592 return config;
593 }
594
595 static int
__mos_get_default_alignment_xe(struct mos_bufmgr * bufmgr)596 __mos_get_default_alignment_xe(struct mos_bufmgr *bufmgr)
597 {
598 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL)
599 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
600 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
601 int fd = bufmgr_gem->fd;
602 MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, -ENODEV)
603 struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
604 uint16_t mem_class;
605
606 for (int i = 0; i < mem_regions->num_mem_regions; i++)
607 {
608 if (DRM_XE_MEM_REGION_CLASS_SYSMEM == mem_regions->mem_regions[i].mem_class)
609 {
610 mem_class = MOS_XE_MEM_CLASS_SYSMEM;
611 }
612 else if (DRM_XE_MEM_REGION_CLASS_VRAM == mem_regions->mem_regions[i].mem_class)
613 {
614 mem_class = MOS_XE_MEM_CLASS_VRAM;
615 }
616 else
617 {
618 MOS_DRM_ASSERTMESSAGE("Unsupported mem class");
619 return -EINVAL;
620 }
621
622 if (bufmgr_gem->default_alignment[mem_class] < mem_regions->mem_regions[i].min_page_size)
623 {
624 bufmgr_gem->default_alignment[mem_class] = mem_regions->mem_regions[i].min_page_size;
625 }
626 }
627
628 return 0;
629 }
630
631 /**
632 * Note: Need to add this func to bufmgr api later
633 */
634 static int
mos_query_uc_version_xe(struct mos_bufmgr * bufmgr,struct mos_drm_uc_version * version)635 mos_query_uc_version_xe(struct mos_bufmgr *bufmgr, struct mos_drm_uc_version *version)
636 {
637 int ret = 0;
638 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
639 int fd = bufmgr_gem->fd;
640 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
641
642 if (bufmgr && version && version->uc_type < UC_TYPE_MAX)
643 {
644 /**
645 * Note: query uc version from kmd if no historic data in bufmgr, otherwise using historic data.
646 */
647 if (dev->uc_versions[version->uc_type].uc_type != version->uc_type)
648 {
649 struct drm_xe_device_query query;
650 memclear(query);
651 query.size = sizeof(struct drm_xe_query_uc_fw_version);
652 query.query = DRM_XE_DEVICE_QUERY_UC_FW_VERSION;
653 memclear(dev->uc_versions[version->uc_type]);
654 dev->uc_versions[version->uc_type].uc_type = version->uc_type;
655 query.data = (uintptr_t)&dev->uc_versions[version->uc_type];
656
657 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_DEVICE_QUERY,
658 &query);
659 if (ret)
660 {
661 memclear(dev->uc_versions[version->uc_type]);
662 dev->uc_versions[version->uc_type].uc_type = UC_TYPE_INVALID;
663 MOS_DRM_ASSERTMESSAGE("Failed to query UC version, uc type: %d, errno: %d", version->uc_type, ret);
664 return ret;
665 }
666 }
667
668 version->major_version = dev->uc_versions[version->uc_type].major_ver;
669 version->minor_version = dev->uc_versions[version->uc_type].minor_ver;
670 }
671
672 return ret;
673 }
674
__mos_has_vram_xe(struct mos_bufmgr * bufmgr)675 bool __mos_has_vram_xe(struct mos_bufmgr *bufmgr)
676 {
677 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
678 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
679 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
680 int fd = bufmgr_gem->fd;
681 MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
682 struct drm_xe_query_config *config = dev->config;
683 bool has_vram = ((config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM) > 0);
684 bufmgr_gem->has_vram = has_vram;
685 return has_vram;
686 }
687
__mos_query_va_bits_xe(struct mos_bufmgr * bufmgr)688 uint8_t __mos_query_va_bits_xe(struct mos_bufmgr *bufmgr)
689 {
690 uint8_t va_bits = 48;
691 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, va_bits)
692 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
693 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
694 int fd = bufmgr_gem->fd;
695 bufmgr_gem->va_bits = va_bits;
696 MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, va_bits)
697 struct drm_xe_query_config *config = dev->config;
698 va_bits = config->info[DRM_XE_QUERY_CONFIG_VA_BITS] & 0xff;
699 bufmgr_gem->va_bits = va_bits;
700 return va_bits;
701 }
702
703 static uint64_t
mos_get_platform_information_xe(struct mos_bufmgr * bufmgr)704 mos_get_platform_information_xe(struct mos_bufmgr *bufmgr)
705 {
706 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
707 return bufmgr->platform_information;
708 }
709
710 static void
mos_set_platform_information_xe(struct mos_bufmgr * bufmgr,uint64_t p)711 mos_set_platform_information_xe(struct mos_bufmgr *bufmgr, uint64_t p)
712 {
713 if (bufmgr)
714 bufmgr->platform_information |= p;
715 }
716
717 static enum mos_memory_zone
__mos_bo_memzone_for_address_xe(uint64_t address)718 __mos_bo_memzone_for_address_xe(uint64_t address)
719 {
720 if (address >= MEMZONE_PRIME_START)
721 return MEMZONE_PRIME;
722 else if (address >= MEMZONE_DEVICE_START)
723 return MEMZONE_DEVICE;
724 else
725 return MEMZONE_SYS;
726 }
727
728 static void
__mos_bo_vma_free_xe(struct mos_bufmgr * bufmgr,uint64_t address,uint64_t size)729 __mos_bo_vma_free_xe(struct mos_bufmgr *bufmgr,
730 uint64_t address,
731 uint64_t size)
732 {
733 CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", );
734 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
735
736 CHK_CONDITION(0ull == address, "invalid address.\n", );
737 enum mos_memory_zone memzone = __mos_bo_memzone_for_address_xe(address);
738 mos_vma_heap_free(&bufmgr_gem->vma_heap[memzone], address, size);
739 }
740
741 static void
__mos_bo_mark_mmaps_incoherent_xe(struct mos_linux_bo * bo)742 __mos_bo_mark_mmaps_incoherent_xe(struct mos_linux_bo *bo)
743 {
744 #if HAVE_VALGRIND
745 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
746
747 if (bo_gem->mem_virtual)
748 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
749 #endif
750 }
751
752 static inline void
mos_bo_reference_xe(struct mos_linux_bo * bo)753 mos_bo_reference_xe(struct mos_linux_bo *bo)
754 {
755 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
756
757 atomic_inc(&bo_gem->ref_count);
758 }
759
mos_bo_unreference_xe(struct mos_linux_bo * bo)760 drm_export void mos_bo_unreference_xe(struct mos_linux_bo *bo)
761 {
762 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
763
764 if (atomic_read(&bo_gem->ref_count) <= 0)
765 return;
766
767 if (atomic_dec_and_test(&bo_gem->ref_count))
768 {
769 /* release memory associated with this object */
770 /* Clear any left-over mappings */
771 if (atomic_read(&bo_gem->map_count) > 0)
772 {
773 atomic_set(&bo_gem->map_count, 0);
774 __mos_bo_mark_mmaps_incoherent_xe(bo);
775 }
776
777 DRMLISTDEL(&bo_gem->name_list);
778
779 mos_bo_free_xe(bo);
780 }
781 }
782
783 static uint32_t
__mos_vm_create_xe(struct mos_bufmgr * bufmgr)784 __mos_vm_create_xe(struct mos_bufmgr *bufmgr)
785 {
786 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
787 struct drm_xe_vm_create vm;
788 int ret;
789
790 memclear(vm);
791 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_CREATE, &vm);
792 if (ret != 0)
793 {
794 MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_CREATE failed: %s",
795 strerror(errno));
796 return INVALID_VM;
797 }
798
799 return vm.vm_id;
800 }
801
802 static void
__mos_vm_destroy_xe(struct mos_bufmgr * bufmgr,uint32_t vm_id)803 __mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
804 {
805 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
806 struct drm_xe_vm_destroy vm_destroy;
807 int ret;
808
809 if (INVALID_VM == vm_id)
810 {
811 MOS_DRM_ASSERTMESSAGE("invalid vm_id");
812 return;
813 }
814
815 memclear(vm_destroy);
816 vm_destroy.vm_id = vm_id;
817 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_DESTROY, &vm_destroy);
818 if (ret != 0)
819 {
820 MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_DESTROY failed: %s",
821 strerror(errno));
822 }
823 }
824
825
826 static uint32_t
mos_vm_create_xe(struct mos_bufmgr * bufmgr)827 mos_vm_create_xe(struct mos_bufmgr *bufmgr)
828 {
829 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
830
831 if (bufmgr_gem->vm_id != INVALID_VM)
832 {
833 return bufmgr_gem->vm_id;
834 }
835 else
836 {
837 return __mos_vm_create_xe(bufmgr);
838 }
839 }
840
841 static void
mos_vm_destroy_xe(struct mos_bufmgr * bufmgr,uint32_t vm_id)842 mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
843 {
844 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
845
846 if (vm_id != bufmgr_gem->vm_id)
847 {
848 __mos_vm_destroy_xe(bufmgr, vm_id);
849 }
850 }
851
852 static struct mos_linux_context *
mos_context_create_shared_xe(struct mos_bufmgr * bufmgr,mos_linux_context * ctx,__u32 flags,bool bContextProtected,void * engine_map,uint8_t ctx_width,uint8_t num_placements,uint32_t ctx_type)853 mos_context_create_shared_xe(
854 struct mos_bufmgr *bufmgr,
855 mos_linux_context* ctx,
856 __u32 flags,
857 bool bContextProtected,
858 void *engine_map,
859 uint8_t ctx_width,
860 uint8_t num_placements,
861 uint32_t ctx_type)
862 {
863 MOS_UNUSED(ctx);
864 MOS_UNUSED(ctx_type);
865 MOS_UNUSED(bContextProtected);
866
867 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, nullptr)
868 MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, nullptr)
869
870 static uint32_t dummy_exec_queue_id = 0;
871 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
872 struct mos_xe_context *context = nullptr;
873 struct drm_xe_exec_queue_create create;
874 int ret;
875 uint16_t engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
876
877 memclear(create);
878 create.width = ctx_width;
879 create.num_placements = num_placements;
880 create.vm_id = bufmgr_gem->vm_id;
881 create.flags = flags;
882 create.instances = (uintptr_t)engine_map;
883
884 /**
885 * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
886 * contains std::vector and std::queue. Otherwise both will have no instance.
887 */
888 context = MOS_New(mos_xe_context);
889 MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
890
891 /**
892 * Set exec_queue timeslice for render/ compute only as WA to ensure exec sequence.
893 * Note, this is caused by a potential issue in kmd since exec_queue preemption by plenty of WL w/ same priority.
894 */
895 if ((engine_class == DRM_XE_ENGINE_CLASS_RENDER
896 || engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
897 && (ctx_width * num_placements == 1)
898 && bufmgr_gem->exec_queue_timeslice != EXEC_QUEUE_TIMESLICE_DEFAULT)
899 {
900 struct drm_xe_ext_set_property timeslice;
901 memclear(timeslice);
902 timeslice.property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE;
903 /**
904 * Note, this value indicates to maximum of time slice for WL instead of real waiting time.
905 */
906 timeslice.value = bufmgr_gem->exec_queue_timeslice;
907 timeslice.base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY;
908 create.extensions = (uintptr_t)(×lice);
909 MOS_DRM_NORMALMESSAGE("WA: exec_queue timeslice set by engine class(%d), value(%d)",
910 engine_class, bufmgr_gem->exec_queue_timeslice);
911 }
912
913 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
914
915 MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, context, MOS_Delete, nullptr,
916 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE, return error(%d)", ret);
917
918 context->ctx.ctx_id = create.exec_queue_id;
919 context->ctx_width = ctx_width;
920 context->num_placements = num_placements;
921 context->engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
922 context->is_protected = bContextProtected;
923 context->flags = flags;
924 context->ctx.bufmgr = bufmgr;
925 context->ctx.vm_id = bufmgr_gem->vm_id;
926 context->reset_count = 0;
927 context->timeline_dep = nullptr;
928
929 bufmgr_gem->m_lock.lock();
930 context->dummy_exec_queue_id = ++dummy_exec_queue_id;
931 bufmgr_gem->global_ctx_info[context->dummy_exec_queue_id] = context;
932 bufmgr_gem->m_lock.unlock();
933 return &context->ctx;
934 }
935
936 static struct mos_linux_context *
mos_context_create_xe(struct mos_bufmgr * bufmgr)937 mos_context_create_xe(struct mos_bufmgr *bufmgr)
938 {
939 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
940 struct mos_xe_context *context = nullptr;
941
942 /**
943 * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
944 * contains std::queue. Otherwise queue will have no instance.
945 */
946 context = MOS_New(mos_xe_context);
947 MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
948
949 context->ctx.ctx_id = INVALID_EXEC_QUEUE_ID;
950 context->ctx_width = 0;
951 context->ctx.bufmgr = bufmgr;
952 context->ctx.vm_id = bufmgr_gem->vm_id;
953 context->reset_count = 0;
954 context->timeline_dep = nullptr;
955 context->dummy_exec_queue_id = INVALID_EXEC_QUEUE_ID;
956 return &context->ctx;
957 }
958
959 static struct mos_linux_context *
mos_context_create_ext_xe(struct mos_bufmgr * bufmgr,__u32 flags,bool bContextProtected)960 mos_context_create_ext_xe(
961 struct mos_bufmgr *bufmgr,
962 __u32 flags,
963 bool bContextProtected)
964 {
965 MOS_UNUSED(flags);
966 MOS_UNUSED(bContextProtected);
967
968 return mos_context_create_xe(bufmgr);
969 }
970
971 static void
mos_context_destroy_xe(struct mos_linux_context * ctx)972 mos_context_destroy_xe(struct mos_linux_context *ctx)
973 {
974 if (nullptr == ctx)
975 {
976 return;
977 }
978
979 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)(ctx->bufmgr);
980 if (nullptr == bufmgr_gem)
981 {
982 return;
983 }
984 struct mos_xe_context *context = (struct mos_xe_context *)ctx;
985 struct drm_xe_exec_queue_destroy exec_queue_destroy;
986 int ret;
987 bufmgr_gem->m_lock.lock();
988 bufmgr_gem->sync_obj_rw_lock.lock();
989 mos_sync_destroy_timeline_dep(bufmgr_gem->fd, context->timeline_dep);
990 context->timeline_dep = nullptr;
991 bufmgr_gem->global_ctx_info.erase(context->dummy_exec_queue_id);
992 bufmgr_gem->sync_obj_rw_lock.unlock();
993 bufmgr_gem->m_lock.unlock();
994
995 if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
996 {
997 MOS_Delete(context);
998 return;
999 }
1000
1001 memclear(exec_queue_destroy);
1002 exec_queue_destroy.exec_queue_id = ctx->ctx_id;
1003
1004 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
1005 if (ret != 0)
1006 MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_EXEC_QUEUE_DESTROY failed: %s", strerror(errno));
1007
1008 MOS_Delete(context);
1009 }
1010
1011 /**
1012 * Restore banned exec_queue with newly created one
1013 * Note: this call is only for banned context restore, if using it
1014 * as other purpose, MUST pay attention to context->reset_count here.
1015 */
1016 static int
__mos_context_restore_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx)1017 __mos_context_restore_xe(struct mos_bufmgr *bufmgr,
1018 struct mos_linux_context *ctx)
1019 {
1020 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
1021 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
1022 if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
1023 {
1024 MOS_DRM_ASSERTMESSAGE("Unable to restore intel context, it is not supported");
1025 return -EINVAL;
1026 }
1027 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
1028 struct mos_xe_context *context = (struct mos_xe_context *)ctx;
1029 int ret;
1030
1031 //query engine firstly
1032 uint32_t nengine = 0;
1033 ret = mos_query_engines_count_xe(bufmgr, &nengine);
1034 MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1035 "query engine count of restore failed, return error(%d)", ret)
1036 struct drm_xe_engine_class_instance engine_map[nengine];
1037 ret = mos_query_engines_xe(bufmgr,
1038 context->engine_class,
1039 context->engine_caps,
1040 &nengine,
1041 (void*)engine_map);
1042 MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1043 "query engine of restore failed, return error(%d)", ret)
1044
1045 //create new exec queue
1046 struct drm_xe_exec_queue_create create;
1047 memclear(create);
1048 create.width = context->ctx_width;
1049 create.num_placements = context->num_placements;
1050 create.vm_id = context->ctx.vm_id;
1051 create.flags = context->flags;
1052 create.instances = (uintptr_t)engine_map;
1053 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
1054 MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1055 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE of restore, return error(%d)", ret)
1056
1057 //destroy old exec_queue
1058 struct drm_xe_exec_queue_destroy exec_queue_destroy;
1059 memclear(exec_queue_destroy);
1060 exec_queue_destroy.exec_queue_id = ctx->ctx_id;
1061 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
1062 MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1063 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_DESTROY of restore, return error(%d)", ret)
1064
1065 //restore
1066 context->ctx.ctx_id = create.exec_queue_id;
1067 context->reset_count += 1;
1068
1069 return MOS_XE_SUCCESS;
1070 }
1071
1072 /**
1073 * Get the property of the ctx
1074 *
1075 * @ctx indicates to the context that to query
1076 * @property indicates to what property that to query
1077 * @value indicates to quired value with given property
1078 */
1079 static int
__mos_get_context_property_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx,uint32_t property,uint64_t & value)1080 __mos_get_context_property_xe(struct mos_bufmgr *bufmgr,
1081 struct mos_linux_context *ctx,
1082 uint32_t property,
1083 uint64_t &value)
1084 {
1085 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
1086 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
1087 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
1088 struct drm_xe_exec_queue_get_property p;
1089 memclear(p);
1090 p.property = property;
1091 p.exec_queue_id = ctx->ctx_id;
1092
1093 int ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY, &p);
1094
1095 value = p.value;
1096 return ret;
1097 }
1098
1099 /**
1100 * Allocate a section of virtual memory for a buffer, assigning an address.
1101 */
1102 static uint64_t
__mos_bo_vma_alloc_xe(struct mos_bufmgr * bufmgr,enum mos_memory_zone memzone,uint64_t size,uint64_t alignment)1103 __mos_bo_vma_alloc_xe(struct mos_bufmgr *bufmgr,
1104 enum mos_memory_zone memzone,
1105 uint64_t size,
1106 uint64_t alignment)
1107 {
1108 CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", 0);
1109 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1110 /* Force alignment to be some number of pages */
1111 alignment = ALIGN(alignment, PAGE_SIZE);
1112
1113 uint64_t addr = mos_vma_heap_alloc(&bufmgr_gem->vma_heap[memzone], size, alignment);
1114
1115 // currently only support 48bit range address
1116 CHK_CONDITION((addr >> 48ull) != 0, "invalid address, over 48bit range.\n", 0);
1117 CHK_CONDITION((addr >> (MEMZONE_SYS == memzone ? 40ull : (MEMZONE_DEVICE == memzone ? 41ull:42ull))) != 0, "invalid address, over memory zone range.\n", 0);
1118 CHK_CONDITION((addr % alignment) != 0, "invalid address, not meet aligment requirement.\n", 0);
1119
1120 return addr;
1121 }
1122
1123 static int
__mos_bo_set_offset_xe(MOS_LINUX_BO * bo)1124 __mos_bo_set_offset_xe(MOS_LINUX_BO *bo)
1125 {
1126 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1127 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1128 MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, -EINVAL)
1129 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1130
1131 uint64_t offset = 0;
1132 uint64_t alignment = 0;
1133
1134 if (0 == bo->offset64)
1135 {
1136 bufmgr_gem->m_lock.lock();
1137
1138 /* On platforms where lmem only supports 64K pages, kmd requires us
1139 * to either align the va to 2M or seperate the lmem objects and smem
1140 * objects into different va zones to avoid mixing up lmem object and
1141 * smem object into same page table. For imported object, we don't know
1142 * if it's in lmem or smem. So, we need to align the va to 2M.
1143 */
1144 if (MEMZONE_PRIME == bo_gem->mem_region)
1145 {
1146 offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_2M);
1147 }
1148 else if (MEMZONE_DEVICE == bo_gem->mem_region)
1149 {
1150 alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM], PAGE_SIZE_64K);
1151 offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
1152 }
1153 else if (MEMZONE_SYS == bo_gem->mem_region)
1154 {
1155 alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM], PAGE_SIZE_64K);
1156 offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
1157 }
1158 else
1159 {
1160 MOS_DRM_ASSERTMESSAGE("Invalid mem_region:%d", bo_gem->mem_region);
1161 }
1162
1163 bo->offset64 = offset;
1164 bo->offset = offset;
1165
1166 bufmgr_gem->m_lock.unlock();
1167 }
1168
1169 return 0;
1170 }
1171
__mos_vm_bind_xe(int fd,uint32_t vm_id,uint32_t exec_queue_id,uint32_t bo_handle,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op,uint32_t flags,struct drm_xe_sync * sync,uint32_t num_syncs,uint64_t ext)1172 static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint32_t bo_handle,
1173 uint64_t offset, uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, uint32_t flags,
1174 struct drm_xe_sync *sync, uint32_t num_syncs, uint64_t ext)
1175 {
1176 int ret;
1177
1178 struct drm_xe_vm_bind bind;
1179 memclear(bind);
1180 bind.extensions = ext;
1181 bind.vm_id = vm_id;
1182 bind.exec_queue_id = exec_queue_id;
1183 bind.num_binds = 1;
1184 bind.bind.obj = bo_handle;
1185 bind.bind.obj_offset = offset;
1186 bind.bind.range = size;
1187 bind.bind.pat_index = pat_index;
1188 bind.bind.addr = addr;
1189 bind.bind.op = op;
1190 bind.bind.flags = flags;
1191 bind.num_syncs = num_syncs;
1192 bind.syncs = (uintptr_t)sync;
1193
1194 ret = drmIoctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
1195 if (ret)
1196 {
1197 MOS_DRM_ASSERTMESSAGE("Failed to bind vm, vm_id:%d, exec_queue_id:%d, op:0x%x, flags:0x%x, bo_handle:%d, offset:%lx, addr:0x%lx, size:%ld, pat_index:%d, errno(%d)",
1198 vm_id, exec_queue_id, op, flags, bo_handle, offset, addr, size, pat_index, -errno);
1199 }
1200
1201 return ret;
1202 }
1203
mos_vm_bind_sync_xe(int fd,uint32_t vm_id,uint32_t bo,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op)1204 static int mos_vm_bind_sync_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
1205 uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op)
1206 {
1207 struct drm_xe_sync sync;
1208
1209 memclear(sync);
1210 sync.flags = DRM_XE_SYNC_FLAG_SIGNAL;
1211 sync.type = DRM_XE_SYNC_TYPE_SYNCOBJ;
1212 sync.handle = mos_sync_syncobj_create(fd, 0);
1213
1214 int ret = __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
1215 op, 0, &sync, 1, 0);
1216 if (ret)
1217 {
1218 MOS_DRM_ASSERTMESSAGE("ret:%d, error:%d", ret, -errno);
1219 mos_sync_syncobj_destroy(fd, sync.handle);
1220 return ret;
1221 }
1222
1223 ret = mos_sync_syncobj_wait_err(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
1224 if (ret)
1225 {
1226 MOS_DRM_ASSERTMESSAGE("syncobj_wait error:%d", -errno);
1227 }
1228
1229 mos_sync_syncobj_destroy(fd, sync.handle);
1230
1231 return ret;
1232 }
1233
mos_vm_bind_async_xe(int fd,uint32_t vm_id,uint32_t bo,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op,struct drm_xe_sync * sync,uint32_t num_syncs)1234 static int mos_vm_bind_async_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
1235 uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op,
1236 struct drm_xe_sync *sync, uint32_t num_syncs)
1237 {
1238 return __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
1239 op, 0, sync, num_syncs, 0);
1240 }
1241
1242 drm_export struct mos_linux_bo *
mos_bo_alloc_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc * alloc)1243 mos_bo_alloc_xe(struct mos_bufmgr *bufmgr,
1244 struct mos_drm_bo_alloc *alloc)
1245 {
1246 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1247 struct mos_xe_bo_gem *bo_gem;
1248 struct drm_xe_gem_create create;
1249 uint32_t bo_align = alloc->alignment;
1250 int ret;
1251
1252 /**
1253 * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
1254 * contains std::vector and std::map. Otherwise both will have no instance.
1255 */
1256 bo_gem = MOS_New(mos_xe_bo_gem);
1257 MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
1258 memclear(bo_gem->bo);
1259 bo_gem->is_exported = false;
1260 bo_gem->is_imported = false;
1261 bo_gem->is_userptr = false;
1262 bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1263 bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1264 atomic_set(&bo_gem->map_count, 0);
1265 bo_gem->mem_virtual = nullptr;
1266 bo_gem->mem_region = MEMZONE_SYS;
1267 bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM]);
1268
1269 if (bufmgr_gem->has_vram &&
1270 (MOS_MEMPOOL_VIDEOMEMORY == alloc->ext.mem_type || MOS_MEMPOOL_DEVICEMEMORY == alloc->ext.mem_type))
1271 {
1272 bo_gem->mem_region = MEMZONE_DEVICE;
1273 bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM]);
1274 alloc->ext.cpu_cacheable = false;
1275 }
1276
1277 memclear(create);
1278 if (MEMZONE_DEVICE == bo_gem->mem_region)
1279 {
1280 //Note: memory_region is related to gt_id for multi-tiles gpu, take gt_id into consideration in case of multi-tiles
1281 create.placement = bufmgr_gem->mem_regions_mask & (~0x1);
1282 }
1283 else
1284 {
1285 create.placement = bufmgr_gem->mem_regions_mask & 0x1;
1286 }
1287
1288 //Note: We suggest vm_id=0 here as default, otherwise this bo cannot be exported as prelim fd.
1289 create.vm_id = 0;
1290 create.size = ALIGN(alloc->size, bo_align);
1291
1292 /**
1293 * Note: current, it only supports WB/ WC while UC and other cache are not allowed.
1294 */
1295 create.cpu_caching = alloc->ext.cpu_cacheable ? DRM_XE_GEM_CPU_CACHING_WB : DRM_XE_GEM_CPU_CACHING_WC;
1296
1297 ret = drmIoctl(bufmgr_gem->fd,
1298 DRM_IOCTL_XE_GEM_CREATE,
1299 &create);
1300 MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, bo_gem, MOS_Delete, nullptr,
1301 "ioctl failed in DRM_IOCTL_XE_GEM_CREATE, return error(%d)", ret);
1302
1303 bo_gem->gem_handle = create.handle;
1304 bo_gem->bo.handle = bo_gem->gem_handle;
1305 bo_gem->bo.size = create.size;
1306 bo_gem->bo.vm_id = INVALID_VM;
1307 bo_gem->bo.bufmgr = bufmgr;
1308 bo_gem->bo.align = bo_align;
1309 bo_gem->cpu_caching = create.cpu_caching;
1310 /**
1311 * Note: Better to get a default pat_index to overwite invalid argv. Normally it should not happen.
1312 */
1313 bo_gem->pat_index = alloc->ext.pat_index == PAT_INDEX_INVALID ? 0 : alloc->ext.pat_index;
1314
1315 if (bufmgr_gem->mem_profiler_fd != -1)
1316 {
1317 snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CREATE, %d, %d, %lu, %d, %s\n",
1318 getpid(), bo_gem->bo.handle, bo_gem->bo.size,bo_gem->mem_region, alloc->name);
1319 ret = write(bufmgr_gem->mem_profiler_fd,
1320 bufmgr_gem->mem_profiler_buffer,
1321 strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
1322 if (-1 == ret)
1323 {
1324 MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s",
1325 bufmgr_gem->mem_profiler_path, strerror(errno));
1326 }
1327 }
1328
1329 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
1330 list (vma_list), so better set the list head here */
1331 DRMINITLISTHEAD(&bo_gem->name_list);
1332
1333 memcpy(bo_gem->name, alloc->name, (strlen(alloc->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc->name) + 1));
1334 atomic_set(&bo_gem->ref_count, 1);
1335
1336 MOS_DRM_NORMALMESSAGE("buf %d (%s) %ldb, bo:0x%lx",
1337 bo_gem->gem_handle, alloc->name, alloc->size, (uint64_t)&bo_gem->bo);
1338
1339 __mos_bo_set_offset_xe(&bo_gem->bo);
1340
1341 ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1342 bufmgr_gem->vm_id,
1343 bo_gem->gem_handle,
1344 0,
1345 bo_gem->bo.offset64,
1346 bo_gem->bo.size,
1347 bo_gem->pat_index,
1348 DRM_XE_VM_BIND_OP_MAP);
1349 if (ret)
1350 {
1351 MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
1352 mos_bo_free_xe(&bo_gem->bo);
1353 return nullptr;
1354 }
1355 else
1356 {
1357 bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1358 }
1359
1360 return &bo_gem->bo;
1361 }
1362
1363 static unsigned long
__mos_bo_tile_size_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,unsigned long size,uint32_t * tiling_mode,uint32_t alignment)1364 __mos_bo_tile_size_xe(struct mos_xe_bufmgr_gem *bufmgr_gem, unsigned long size,
1365 uint32_t *tiling_mode, uint32_t alignment)
1366 {
1367 unsigned long min_size, max_size;
1368 unsigned long i;
1369
1370 if (TILING_NONE == *tiling_mode)
1371 return size;
1372
1373 /* 965+ just need multiples of page size for tiling */
1374 return ROUND_UP_TO(size, alignment);
1375
1376 }
1377
1378 /*
1379 * Round a given pitch up to the minimum required for X tiling on a
1380 * given chip. We use 512 as the minimum to allow for a later tiling
1381 * change.
1382 */
1383 static unsigned long
__mos_bo_tile_pitch_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,unsigned long pitch,uint32_t * tiling_mode)1384 __mos_bo_tile_pitch_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
1385 unsigned long pitch, uint32_t *tiling_mode)
1386 {
1387 unsigned long tile_width;
1388 unsigned long i;
1389
1390 /* If untiled, then just align it so that we can do rendering
1391 * to it with the 3D engine.
1392 */
1393 if (TILING_NONE == *tiling_mode)
1394 return ALIGN(pitch, 64);
1395
1396 if (TILING_X == *tiling_mode)
1397 tile_width = 512;
1398 else
1399 tile_width = 128;
1400
1401 /* 965 is flexible */
1402 return ROUND_UP_TO(pitch, tile_width);
1403 }
1404
1405 static struct mos_linux_bo *
mos_bo_alloc_tiled_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_tiled * alloc_tiled)1406 mos_bo_alloc_tiled_xe(struct mos_bufmgr *bufmgr,
1407 struct mos_drm_bo_alloc_tiled *alloc_tiled)
1408 {
1409 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1410 unsigned long size, stride;
1411 uint32_t tiling;
1412
1413 uint32_t alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM];
1414
1415 if (bufmgr_gem->has_vram &&
1416 (MOS_MEMPOOL_VIDEOMEMORY == alloc_tiled->ext.mem_type || MOS_MEMPOOL_DEVICEMEMORY == alloc_tiled->ext.mem_type))
1417 {
1418 alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM];
1419 }
1420
1421 do {
1422 unsigned long aligned_y, height_alignment;
1423
1424 tiling = alloc_tiled->ext.tiling_mode;
1425
1426 /* If we're tiled, our allocations are in 8 or 32-row blocks,
1427 * so failure to align our height means that we won't allocate
1428 * enough pages.
1429 *
1430 * If we're untiled, we still have to align to 2 rows high
1431 * because the data port accesses 2x2 blocks even if the
1432 * bottom row isn't to be rendered, so failure to align means
1433 * we could walk off the end of the GTT and fault. This is
1434 * documented on 965, and may be the case on older chipsets
1435 * too so we try to be careful.
1436 */
1437 aligned_y = alloc_tiled->y;
1438 height_alignment = 2;
1439
1440 if (TILING_X == tiling)
1441 height_alignment = 8;
1442 else if (TILING_Y == tiling)
1443 height_alignment = 32;
1444 aligned_y = ALIGN(alloc_tiled->y, height_alignment);
1445
1446 stride = alloc_tiled->x * alloc_tiled->cpp;
1447 stride = __mos_bo_tile_pitch_xe(bufmgr_gem, stride, &alloc_tiled->ext.tiling_mode);
1448 size = stride * aligned_y;
1449 size = __mos_bo_tile_size_xe(bufmgr_gem, size, &alloc_tiled->ext.tiling_mode, alignment);
1450 } while (alloc_tiled->ext.tiling_mode != tiling);
1451
1452 alloc_tiled->pitch = stride;
1453
1454 struct mos_drm_bo_alloc alloc;
1455 alloc.name = alloc_tiled->name;
1456 alloc.size = size;
1457 alloc.alignment = alignment;
1458 alloc.ext = alloc_tiled->ext;
1459
1460 return mos_bo_alloc_xe(bufmgr, &alloc);
1461 }
1462
1463 drm_export struct mos_linux_bo *
mos_bo_alloc_userptr_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_userptr * alloc_uptr)1464 mos_bo_alloc_userptr_xe(struct mos_bufmgr *bufmgr,
1465 struct mos_drm_bo_alloc_userptr *alloc_uptr)
1466 {
1467 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1468 struct mos_xe_bo_gem *bo_gem;
1469 int ret;
1470
1471 /**
1472 * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
1473 * contains std::vector and std::map. Otherwise both will have no instance.
1474 */
1475 bo_gem = MOS_New(mos_xe_bo_gem);
1476 MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
1477 memclear(bo_gem->bo);
1478 bo_gem->is_exported = false;
1479 bo_gem->is_imported = false;
1480 bo_gem->is_userptr = true;
1481 bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1482 bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1483 atomic_set(&bo_gem->map_count, 0);
1484 bo_gem->mem_virtual = alloc_uptr->addr;
1485 bo_gem->gem_handle = INVALID_HANDLE;
1486 bo_gem->bo.handle = INVALID_HANDLE;
1487 bo_gem->bo.size = alloc_uptr->size;
1488 bo_gem->pat_index = alloc_uptr->pat_index == PAT_INDEX_INVALID ? 0 : alloc_uptr->pat_index;
1489 bo_gem->bo.bufmgr = bufmgr;
1490 bo_gem->bo.vm_id = INVALID_VM;
1491 bo_gem->mem_region = MEMZONE_SYS;
1492
1493 /* Save the address provided by user */
1494 #ifdef __cplusplus
1495 bo_gem->bo.virt = alloc_uptr->addr;
1496 #else
1497 bo_gem->bo.virtual = alloc_uptr->addr;
1498 #endif
1499
1500 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
1501 list (vma_list), so better set the list head here */
1502 DRMINITLISTHEAD(&bo_gem->name_list);
1503
1504 memcpy(bo_gem->name, alloc_uptr->name, (strlen(alloc_uptr->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc_uptr->name) + 1));
1505 atomic_set(&bo_gem->ref_count, 1);
1506
1507 __mos_bo_set_offset_xe(&bo_gem->bo);
1508
1509 ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1510 bufmgr_gem->vm_id,
1511 0,
1512 (uint64_t)alloc_uptr->addr,
1513 bo_gem->bo.offset64,
1514 bo_gem->bo.size,
1515 bo_gem->pat_index,
1516 DRM_XE_VM_BIND_OP_MAP_USERPTR);
1517
1518 if (ret)
1519 {
1520 MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_userptr_sync ret: %d", ret);
1521 mos_bo_free_xe(&bo_gem->bo);
1522 return nullptr;
1523 }
1524 else
1525 {
1526 bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1527 }
1528
1529 MOS_DRM_NORMALMESSAGE("mos_bo_alloc_userptr_xe: buf (%s) %ldb, bo:0x%lx",
1530 alloc_uptr->name, alloc_uptr->size, (uint64_t)&bo_gem->bo);
1531
1532
1533 return &bo_gem->bo;
1534 }
1535
1536 static struct mos_linux_bo *
mos_bo_create_from_prime_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_prime * alloc_prime)1537 mos_bo_create_from_prime_xe(struct mos_bufmgr *bufmgr, struct mos_drm_bo_alloc_prime *alloc_prime)
1538 {
1539 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1540 int ret;
1541 uint32_t handle;
1542 struct mos_xe_bo_gem *bo_gem;
1543 int prime_fd = alloc_prime->prime_fd;
1544 int size = alloc_prime->size;
1545 uint16_t pat_index = alloc_prime->pat_index;
1546 drmMMListHead *list;
1547
1548 bufmgr_gem->m_lock.lock();
1549 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
1550 if (ret)
1551 {
1552 MOS_DRM_ASSERTMESSAGE("create_from_prime: failed to obtain handle from fd: %s", strerror(errno));
1553 bufmgr_gem->m_lock.unlock();
1554 return nullptr;
1555 }
1556
1557 /*
1558 * See if the kernel has already returned this buffer to us. Just as
1559 * for named buffers, we must not create two bo's pointing at the same
1560 * kernel object
1561 */
1562 for (list = bufmgr_gem->named.next; list != &bufmgr_gem->named; list = list->next)
1563 {
1564 bo_gem = DRMLISTENTRY(struct mos_xe_bo_gem, list, name_list);
1565 if (bo_gem->gem_handle == handle)
1566 {
1567 mos_bo_reference_xe(&bo_gem->bo);
1568 bufmgr_gem->m_lock.unlock();
1569 return &bo_gem->bo;
1570 }
1571 }
1572
1573 bo_gem = MOS_New(mos_xe_bo_gem);
1574 if (!bo_gem)
1575 {
1576 bufmgr_gem->m_lock.unlock();
1577 return nullptr;
1578 }
1579
1580 memclear(bo_gem->bo);
1581 bo_gem->is_exported = false;
1582 bo_gem->is_imported = true;
1583 bo_gem->is_userptr = false;
1584 bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1585 bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1586 atomic_set(&bo_gem->map_count, 0);
1587 bo_gem->mem_virtual = nullptr;
1588
1589 /* Determine size of bo. The fd-to-handle ioctl really should
1590 * return the size, but it doesn't. If we have kernel 3.12 or
1591 * later, we can lseek on the prime fd to get the size. Older
1592 * kernels will just fail, in which case we fall back to the
1593 * provided (estimated or guess size). */
1594 ret = lseek(prime_fd, 0, SEEK_END);
1595 if (ret != -1)
1596 bo_gem->bo.size = ret;
1597 else
1598 bo_gem->bo.size = size;
1599
1600 bo_gem->bo.handle = handle;
1601 /*
1602 * Note: Need to get the pat_index by the customer_gmminfo with 1way coherency at least.
1603 */
1604 bo_gem->pat_index = pat_index == PAT_INDEX_INVALID ? 0 : pat_index;
1605 bo_gem->bo.bufmgr = bufmgr;
1606
1607 bo_gem->gem_handle = handle;
1608 atomic_set(&bo_gem->ref_count, 1);
1609
1610 /**
1611 * change bo_gem->name to const char*
1612 */
1613 memcpy(bo_gem->name, alloc_prime->name, sizeof("prime"));
1614 bo_gem->mem_region = MEMZONE_PRIME;
1615
1616 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1617 bufmgr_gem->m_lock.unlock();
1618
1619 __mos_bo_set_offset_xe(&bo_gem->bo);
1620
1621 ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1622 bufmgr_gem->vm_id,
1623 bo_gem->gem_handle,
1624 0,
1625 bo_gem->bo.offset64,
1626 bo_gem->bo.size,
1627 bo_gem->pat_index,
1628 DRM_XE_VM_BIND_OP_MAP);
1629 if (ret)
1630 {
1631 MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
1632 mos_bo_free_xe(&bo_gem->bo);
1633 return nullptr;
1634 }
1635 else
1636 {
1637 bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1638 }
1639
1640 return &bo_gem->bo;
1641 }
1642
1643 static int
mos_bo_export_to_prime_xe(struct mos_linux_bo * bo,int * prime_fd)1644 mos_bo_export_to_prime_xe(struct mos_linux_bo *bo, int *prime_fd)
1645 {
1646 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1647 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1648
1649 bufmgr_gem->m_lock.lock();
1650 if (DRMLISTEMPTY(&bo_gem->name_list))
1651 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1652 bufmgr_gem->m_lock.unlock();
1653
1654 mos_gem_bo_wait_rendering_xe(bo);
1655
1656 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
1657 DRM_CLOEXEC, prime_fd) != 0)
1658 return -errno;
1659
1660 bo_gem->is_exported = true;
1661
1662 return 0;
1663 }
1664
1665 /**
1666 * Update exec list for submission.
1667 *
1668 * @cmd_bo indicates to cmd bo for the exec submission.
1669 * @exec_bo indicates to the gpu resource for exec submission.
1670 * @write_flag indicates to whether exec bo's operation write on GPU.
1671 */
1672 static int
mos_gem_bo_update_exec_list_xe(struct mos_linux_bo * cmd_bo,struct mos_linux_bo * exec_bo,bool write_flag)1673 mos_gem_bo_update_exec_list_xe(struct mos_linux_bo *cmd_bo, struct mos_linux_bo *exec_bo, bool write_flag)
1674 {
1675 MOS_DRM_CHK_NULL_RETURN_VALUE(cmd_bo, -EINVAL)
1676 MOS_DRM_CHK_NULL_RETURN_VALUE(exec_bo, -EINVAL)
1677 struct mos_xe_bo_gem *cmd_bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
1678 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
1679 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1680 std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = cmd_bo_gem->exec_list;
1681
1682 if (exec_bo->handle == cmd_bo->handle)
1683 {
1684 MOS_DRM_NORMALMESSAGE("cmd bo should not add into exec list, skip it");
1685 return MOS_XE_SUCCESS;
1686 }
1687 uintptr_t key = (uintptr_t)exec_bo;
1688 if (exec_list.count(key) > 0)
1689 {
1690 /**
1691 * This exec bo has added before, but need to update its exec flags.
1692 */
1693
1694 // For all BOs with read and write usages, we could just assign write flag to reduce read deps size.
1695 if (write_flag || (exec_list[key].flags & EXEC_OBJECT_WRITE_XE))
1696 {
1697 exec_list[key].flags = EXEC_OBJECT_WRITE_XE;
1698 }
1699 else
1700 {
1701 // For BOs only with read usage, we should assign read flag.
1702 exec_list[key].flags |= EXEC_OBJECT_READ_XE;
1703 }
1704 }
1705 else
1706 {
1707 struct mos_xe_exec_bo target;
1708 target.bo = exec_bo;
1709 target.flags = write_flag ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
1710 exec_list[key] = target;
1711 mos_bo_reference_xe(exec_bo);
1712 }
1713 return MOS_XE_SUCCESS;
1714 }
1715
1716 /**
1717 * Clear the exec bo from the list after submission.
1718 *
1719 * @cmd_bo indicates to cmd bo for the exec submission.
1720 * @start is unused.
1721 */
1722 static void
mos_gem_bo_clear_exec_list_xe(struct mos_linux_bo * cmd_bo,int start)1723 mos_gem_bo_clear_exec_list_xe(struct mos_linux_bo *cmd_bo, int start)
1724 {
1725 MOS_UNUSED(start);
1726 if (cmd_bo != nullptr && cmd_bo->bufmgr != nullptr)
1727 {
1728 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
1729 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
1730 std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = bo_gem->exec_list;
1731
1732 for (auto &it : exec_list) {
1733 mos_bo_unreference_xe(it.second.bo);
1734 }
1735 exec_list.clear();
1736 }
1737 }
1738
1739 /**
1740 * This is to dump all pending execution timeline done on such bo
1741 */
1742 int
__mos_dump_bo_wait_rendering_timeline_xe(uint32_t bo_handle,uint32_t * handles,uint64_t * points,uint32_t count,int64_t timeout_nsec,uint32_t wait_flags,uint32_t rw_flags)1743 __mos_dump_bo_wait_rendering_timeline_xe(uint32_t bo_handle,
1744 uint32_t *handles,
1745 uint64_t *points,
1746 uint32_t count,
1747 int64_t timeout_nsec,
1748 uint32_t wait_flags,
1749 uint32_t rw_flags)
1750 {
1751 #if (_DEBUG || _RELEASE_INTERNAL)
1752 if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
1753 {
1754 MOS_DRM_CHK_NULL_RETURN_VALUE(handles, -EINVAL)
1755 char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
1756 int offset = 0;
1757 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
1758 MOS_MAX_MSG_BUF_SIZE - offset,
1759 "\n\t\t\tdump bo wait rendering: bo handle = %d, timeout_nsec = %ld, wait_flags = %d, rw_flags = %d",
1760 bo_handle,
1761 timeout_nsec,
1762 wait_flags,
1763 rw_flags);
1764
1765 for (int i = 0; i < count; i++)
1766 {
1767 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
1768 MOS_MAX_MSG_BUF_SIZE - offset,
1769 "\n\t\t\t-syncobj handle = %d, timeline = %ld",
1770 handles[i],
1771 points[i]);
1772 }
1773
1774 offset > MOS_MAX_MSG_BUF_SIZE ?
1775 MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
1776 }
1777 #endif
1778 return MOS_XE_SUCCESS;
1779 }
1780
1781 /**
1782 * @bo indicates to bo object that need to wait
1783 * @timeout_nsec indicates to timeout in nanosecond:
1784 * if timeout_nsec > 0, waiting for given time, if timeout, return -ETIME;
1785 * if timeout_nsec ==0, check bo busy state, if busy, return -ETIME imediately;
1786 * @wait_flags indicates wait operation, it supports wait all, wait submit, wait available or wait any;
1787 * refer drm syncobj to get more details in drm.h
1788 * @rw_flags indicates to read/write operation:
1789 * if rw_flags & EXEC_OBJECT_WRITE_XE, means bo write. Otherwise it means bo read.
1790 * @first_signaled indicates to first signaled syncobj handle in the handls array.
1791 */
1792 static int
__mos_gem_bo_wait_timeline_rendering_with_flags_xe(struct mos_linux_bo * bo,int64_t timeout_nsec,uint32_t wait_flags,uint32_t rw_flags,uint32_t * first_signaled)1793 __mos_gem_bo_wait_timeline_rendering_with_flags_xe(struct mos_linux_bo *bo,
1794 int64_t timeout_nsec,
1795 uint32_t wait_flags,
1796 uint32_t rw_flags,
1797 uint32_t *first_signaled)
1798 {
1799 MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
1800
1801 mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1802 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1803
1804 int ret = MOS_XE_SUCCESS;
1805 uint32_t count = 0;
1806 mos_xe_bo_gem *bo_gem = (mos_xe_bo_gem *)bo;
1807 std::map<uint32_t, uint64_t> timeline_data; //pair(syncobj, point)
1808 std::vector<uint32_t> handles;
1809 std::vector<uint64_t> points;
1810 std::set<uint32_t> exec_queue_ids;
1811 bufmgr_gem->m_lock.lock();
1812 bufmgr_gem->sync_obj_rw_lock.lock_shared();
1813 MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
1814
1815 mos_sync_get_bo_wait_timeline_deps(exec_queue_ids,
1816 bo_gem->read_deps,
1817 bo_gem->write_deps,
1818 timeline_data,
1819 bo_gem->last_exec_write_exec_queue,
1820 rw_flags);
1821 bufmgr_gem->m_lock.unlock();
1822
1823 for (auto it : timeline_data)
1824 {
1825 handles.push_back(it.first);
1826 points.push_back(it.second);
1827 }
1828
1829 count = handles.size();
1830 if (count > 0)
1831 {
1832 ret = mos_sync_syncobj_timeline_wait(bufmgr_gem->fd,
1833 handles.data(),
1834 points.data(),
1835 count,
1836 timeout_nsec,
1837 wait_flags,
1838 first_signaled);
1839
1840 __mos_dump_bo_wait_rendering_timeline_xe(bo_gem->gem_handle,
1841 handles.data(),
1842 points.data(),
1843 count,
1844 timeout_nsec,
1845 wait_flags,
1846 rw_flags);
1847 }
1848 bufmgr_gem->sync_obj_rw_lock.unlock_shared();
1849
1850 return ret;
1851 }
1852
1853 /**
1854 * Check if bo is still busy state.
1855 *
1856 * Check if read dep on all exec_queue and write dep on last write exec_queue are signaled.
1857 * If any one dep is not signaled, that means this bo is busy and return -ETIME immediately.
1858 * Otheriwise, move all dep on this bo from busy queue to free queue for reuse.
1859 */
1860 static int
mos_gem_bo_busy_xe(struct mos_linux_bo * bo)1861 mos_gem_bo_busy_xe(struct mos_linux_bo *bo)
1862 {
1863 MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL);
1864 mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1865 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1866
1867 int64_t timeout_nsec = 0;
1868 uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1869 uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
1870
1871 int ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1872
1873 if (ret)
1874 {
1875 //busy
1876 if (errno != ETIME)
1877 {
1878 MOS_DRM_ASSERTMESSAGE("bo_busy_xe ret:%d, error:%d", ret, -errno);
1879 }
1880 return true;
1881 }
1882 else if (MOS_XE_SUCCESS == ret)
1883 {
1884 //free
1885 return false;
1886 }
1887
1888 return false;
1889 }
1890
1891 /**
1892 * Waits for all GPU rendering with the object to have completed.
1893 *
1894 * Wait read dep on all exec_queue and write dep on last write exec_queue are signaled.
1895 * And move all dep on this bo from busy queue to free queue for reuse after rendering completed.
1896 */
1897 static void
mos_gem_bo_wait_rendering_xe(struct mos_linux_bo * bo)1898 mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo)
1899 {
1900 if (bo == nullptr || bo->bufmgr == nullptr)
1901 {
1902 MOS_DRM_ASSERTMESSAGE("ptr is null pointer");
1903 return;
1904 }
1905 mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1906
1907 int64_t timeout_nsec = INT64_MAX;
1908 uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1909 uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
1910
1911 int ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1912 if (ret)
1913 {
1914 MOS_DRM_ASSERTMESSAGE("bo_wait_rendering_xe ret:%d, error:%d", ret, -errno);
1915 }
1916 }
1917
1918 /**
1919 * @timeout_ns indicates to timeout for waiting, but it is fake timeout;
1920 * it only indicates to wait bo rendering completed or check bo busy state.
1921 * if timeout_ns != 0, wait bo rendering completed.
1922 * if timeout_ns == 0. check bo busy state.
1923 */
1924 static int
mos_gem_bo_wait_xe(struct mos_linux_bo * bo,int64_t timeout_ns)1925 mos_gem_bo_wait_xe(struct mos_linux_bo *bo, int64_t timeout_ns)
1926 {
1927 if (timeout_ns)
1928 {
1929 mos_gem_bo_wait_rendering_xe(bo);
1930 return 0;
1931 }
1932 else
1933 {
1934 return mos_gem_bo_busy_xe(bo) ? -ETIME : 0;
1935 }
1936 return 0;
1937 }
1938
1939 /**
1940 * Map gpu resource for CPU read or write.
1941 *
1942 * 1. if map for write, it should wait read dep on all exec_queue and write dep on last write exec_queue signaled.
1943 * 2. if map for read, it should only wait write dep on last write exec_queue signaled.
1944 *
1945 * After bo rendering completed on GPU, then CPU could continue its read or write operation.
1946 */
1947 static int
mos_bo_map_xe(struct mos_linux_bo * bo,int write_enable)1948 mos_bo_map_xe(struct mos_linux_bo *bo, int write_enable)
1949 {
1950 MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
1951 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1952 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1953 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1954 int ret;
1955
1956 int64_t timeout_nsec = INT64_MAX;
1957 uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1958 uint32_t rw_flags = write_enable ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
1959
1960 ret = __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1961 if (ret)
1962 {
1963 MOS_DRM_ASSERTMESSAGE("bo wait rendering error(%d ns)", -errno);
1964 }
1965
1966 if (bo_gem->is_userptr)
1967 {
1968 /* Return the same user ptr */
1969 return 0;
1970 }
1971
1972 bufmgr_gem->m_lock.lock();
1973 if (nullptr == bo_gem->mem_virtual)
1974 {
1975 struct drm_xe_gem_mmap_offset mmo;
1976 memclear(mmo);
1977 mmo.handle = bo->handle;
1978 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo);
1979 if (ret)
1980 {
1981 bufmgr_gem->m_lock.unlock();
1982 return ret;
1983 }
1984
1985 bo_gem->mem_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1986 MAP_SHARED, bufmgr_gem->fd, mmo.offset);
1987 if (MAP_FAILED == bo_gem->mem_virtual)
1988 {
1989 bo_gem->mem_virtual = nullptr;
1990 ret = -errno;
1991 MOS_DRM_ASSERTMESSAGE("Error mapping buffer %d (%s): %s .",
1992 bo_gem->gem_handle, bo_gem->name,
1993 strerror(errno));
1994 }
1995 }
1996
1997 #ifdef __cplusplus
1998 bo->virt = bo_gem->mem_virtual;
1999 #else
2000 bo->virtual = bo_gem->mem_virtual;
2001 #endif
2002
2003 atomic_inc(&bo_gem->map_count);
2004
2005 __mos_bo_mark_mmaps_incoherent_xe(bo);
2006 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
2007 bufmgr_gem->m_lock.unlock();
2008
2009 return 0;
2010 }
2011
2012 static int
mos_bo_map_wc_xe(struct mos_linux_bo * bo)2013 mos_bo_map_wc_xe(struct mos_linux_bo *bo)
2014 {
2015 return mos_bo_map_xe(bo, false);
2016 }
2017
mos_bo_unmap_xe(struct mos_linux_bo * bo)2018 static int mos_bo_unmap_xe(struct mos_linux_bo *bo)
2019 {
2020 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
2021 MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, 0)
2022 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
2023 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, 0)
2024
2025 if (bo_gem->is_userptr)
2026 return 0;
2027
2028 bufmgr_gem->m_lock.lock();
2029
2030 if (atomic_dec_and_test(&bo_gem->map_count))
2031 {
2032 __mos_bo_mark_mmaps_incoherent_xe(bo);
2033 #ifdef __cplusplus
2034 bo->virt = nullptr;
2035 #else
2036 bo->virtual = nullptr;
2037 #endif
2038 }
2039 bufmgr_gem->m_lock.unlock();
2040
2041 return 0;
2042 }
2043
2044 static int
mos_bo_unmap_wc_xe(struct mos_linux_bo * bo)2045 mos_bo_unmap_wc_xe(struct mos_linux_bo *bo)
2046 {
2047 return mos_bo_unmap_xe(bo);
2048 }
2049
2050 /**
2051 *This aims to dump the sync info on such execution.
2052 *@syncs contains fence in from bo who has dependency on
2053 *currect execution and a fence out in @dep from current execution.
2054 */
__mos_dump_syncs_array_xe(struct drm_xe_sync * syncs,uint32_t count,mos_xe_dep * dep)2055 int __mos_dump_syncs_array_xe(struct drm_xe_sync *syncs,
2056 uint32_t count,
2057 mos_xe_dep *dep)
2058 {
2059 #if (_DEBUG || _RELEASE_INTERNAL)
2060 if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
2061 {
2062 MOS_DRM_CHK_NULL_RETURN_VALUE(syncs, -EINVAL)
2063 MOS_DRM_CHK_NULL_RETURN_VALUE(dep, -EINVAL)
2064 char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
2065 int offset = 0;
2066 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2067 MOS_MAX_MSG_BUF_SIZE - offset,
2068 "\n\t\t\tdump fence out syncobj: handle = %d, timeline = %ld",
2069 dep->timeline_index);
2070 if (count > 0)
2071 {
2072 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2073 MOS_MAX_MSG_BUF_SIZE - offset,
2074 "\n\t\t\tdump exec syncs array, num sync = %d",
2075 count);
2076 }
2077 for (int i = 0; i < count; i++)
2078 {
2079 /**
2080 * Note: we assume all are timeline sync here, and change later when any other
2081 * types sync in use.
2082 */
2083 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2084 MOS_MAX_MSG_BUF_SIZE - offset,
2085 "\n\t\t\t-syncobj_handle = %d, timeline = %ld, sync type = %d, sync flags = %d",
2086 syncs[i].handle, syncs[i].timeline_value, syncs[i].type, syncs[i].flags);
2087 }
2088 offset > MOS_MAX_MSG_BUF_SIZE ?
2089 MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
2090 }
2091 #endif
2092 return MOS_XE_SUCCESS;
2093 }
2094
2095 /**
2096 * This is to dump timeline for each exec bo on such execution,
2097 * pair of execed_queue_id & timeline_value will be dumped.
2098 */
2099 int
__mos_dump_bo_deps_map_xe(struct mos_linux_bo ** bo,int num_bo,std::vector<mos_xe_exec_bo> & exec_list,uint32_t curr_exec_queue_id,std::map<uint32_t,struct mos_xe_context * > ctx_infos)2100 __mos_dump_bo_deps_map_xe(struct mos_linux_bo **bo,
2101 int num_bo,
2102 std::vector<mos_xe_exec_bo> &exec_list,
2103 uint32_t curr_exec_queue_id,
2104 std::map<uint32_t, struct mos_xe_context*> ctx_infos)
2105 {
2106 #if (_DEBUG || _RELEASE_INTERNAL)
2107 if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
2108 {
2109 MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
2110 uint32_t exec_list_size = exec_list.size();
2111 for (int i = 0; i < exec_list_size + num_bo; i++)
2112 {
2113 mos_xe_bo_gem *exec_bo_gem = nullptr;
2114 uint32_t exec_flags = 0;
2115 if (i < exec_list_size)
2116 {
2117 exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2118 exec_flags = exec_list[i].flags;
2119 }
2120 else
2121 {
2122 exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2123 exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
2124 }
2125 if (exec_bo_gem)
2126 {
2127 if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
2128 {
2129 MOS_DRM_NORMALMESSAGE("\n\t\t\tdump external bo, handle=%d, without deps map, skip dump", exec_bo_gem->bo.handle);
2130 }
2131 else
2132 {
2133 char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
2134 int offset = 0;
2135 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2136 MOS_MAX_MSG_BUF_SIZE - offset,
2137 "\n\t\t\tdump %s dep: bo handle=%d, curr_exec_queue_id=%d, curr_op_flags=%d",
2138 i >= exec_list_size ? "batch bo" : "exec bo",
2139 exec_bo_gem->bo.handle,
2140 curr_exec_queue_id,
2141 exec_flags);
2142
2143 auto it = exec_bo_gem->read_deps.begin();
2144 while (it != exec_bo_gem->read_deps.end())
2145 {
2146 if (ctx_infos.count(it->first) > 0)
2147 {
2148 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2149 MOS_MAX_MSG_BUF_SIZE - offset,
2150 "\n\t\t\t-read deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
2151 it->first,
2152 it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
2153 it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
2154 }
2155 it++;
2156 }
2157
2158 it = exec_bo_gem->write_deps.begin();
2159 while (it != exec_bo_gem->write_deps.end())
2160 {
2161 if (ctx_infos.count(it->first) > 0)
2162 {
2163 offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2164 MOS_MAX_MSG_BUF_SIZE - offset,
2165 "\n\t\t\t-write deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
2166 it->first,
2167 it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
2168 it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
2169 }
2170 it++;
2171 }
2172 offset > MOS_MAX_MSG_BUF_SIZE ?
2173 MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
2174 }
2175 }
2176 }
2177 }
2178 #endif
2179 return MOS_XE_SUCCESS;
2180 }
2181
2182 static int
__mos_context_exec_update_syncs_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,struct mos_linux_bo ** bo,int num_bo,struct mos_xe_context * ctx,std::vector<mos_xe_exec_bo> & exec_list,std::vector<struct drm_xe_sync> & syncs,std::vector<struct mos_xe_external_bo_info> & external_bos)2183 __mos_context_exec_update_syncs_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
2184 struct mos_linux_bo **bo,
2185 int num_bo,
2186 struct mos_xe_context *ctx,
2187 std::vector<mos_xe_exec_bo> &exec_list,
2188 std::vector<struct drm_xe_sync> &syncs,
2189 std::vector<struct mos_xe_external_bo_info> &external_bos)
2190 {
2191 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
2192 uint32_t curr_dummy_exec_queue_id = ctx->dummy_exec_queue_id;
2193 uint32_t exec_list_size = exec_list.size();
2194 int ret = 0;
2195 std::set<uint32_t> exec_queue_ids;
2196 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL);
2197 MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
2198
2199 for (int i = 0; i < exec_list_size + num_bo; i++)
2200 {
2201 mos_xe_bo_gem *exec_bo_gem = nullptr;
2202 uint32_t exec_flags = 0;
2203 if (i < exec_list_size)
2204 {
2205 //exec list bo
2206 exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2207 exec_flags = exec_list[i].flags;
2208 }
2209 else
2210 {
2211 //batch bo
2212 exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2213 exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default
2214 }
2215
2216 if (exec_bo_gem)
2217 {
2218 if (exec_flags == 0)
2219 {
2220 //Add an assert message here in case of potential thread safety issue.
2221 //Currently, exec bo's flags could only be in (0, EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE]
2222 MOS_DRM_ASSERTMESSAGE("Invalid op flags(0x0) for exec bo(handle=%d)", exec_bo_gem->bo.handle);
2223 }
2224
2225 if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
2226 {
2227 //external bo, need to export its syncobj everytime.
2228 int prime_fd = INVALID_HANDLE;
2229 ret = mos_sync_update_exec_syncs_from_handle(
2230 bufmgr_gem->fd,
2231 exec_bo_gem->bo.handle,
2232 exec_flags,
2233 syncs,
2234 prime_fd);
2235 if (ret == MOS_XE_SUCCESS)
2236 {
2237 /**
2238 * Note, must import batch syncobj for each external bo
2239 * and close the syncobj created for them after exec submission.
2240 */
2241 int count = syncs.size();
2242 struct mos_xe_external_bo_info infos;
2243 memclear(infos);
2244 infos.syncobj_handle = syncs[count - 1].handle;
2245 infos.prime_fd = prime_fd;
2246 external_bos.push_back(infos);
2247 }
2248 else
2249 {
2250 //Note: continue process even failed.
2251 //This may only cause potential synchronization issue, DONT't crash umd here.
2252 MOS_DRM_ASSERTMESSAGE("Failed to update syncobj for external bo(%d)",
2253 exec_bo_gem->bo.handle);
2254 }
2255 }
2256 else
2257 {
2258 //internal bo
2259 ret = mos_sync_update_exec_syncs_from_timeline_deps(
2260 curr_dummy_exec_queue_id,
2261 exec_bo_gem->last_exec_write_exec_queue,
2262 exec_flags,
2263 exec_queue_ids,
2264 exec_bo_gem->read_deps,
2265 exec_bo_gem->write_deps,
2266 syncs);
2267 }
2268 }
2269 }
2270 return MOS_XE_SUCCESS;
2271 }
2272
2273 static int
__mos_context_exec_update_bo_deps_xe(struct mos_linux_bo ** bo,int num_bo,std::vector<mos_xe_exec_bo> & exec_list,uint32_t curr_exec_queue_id,struct mos_xe_dep * dep)2274 __mos_context_exec_update_bo_deps_xe(struct mos_linux_bo **bo,
2275 int num_bo,
2276 std::vector<mos_xe_exec_bo> &exec_list,
2277 uint32_t curr_exec_queue_id,
2278 struct mos_xe_dep *dep)
2279 {
2280 uint32_t exec_list_size = exec_list.size();
2281
2282 for (int i = 0; i < exec_list_size + num_bo; i++)
2283 {
2284 mos_xe_bo_gem *exec_bo_gem = nullptr;
2285 uint32_t exec_flags = 0;
2286 if (i < exec_list_size)
2287 {
2288 //exec list bo
2289 exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2290 exec_flags = exec_list[i].flags;
2291 }
2292 else
2293 {
2294 //batch bo
2295 exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2296 exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
2297 }
2298 if (exec_bo_gem)
2299 {
2300 mos_sync_update_bo_deps(curr_exec_queue_id, exec_flags, dep, exec_bo_gem->read_deps, exec_bo_gem->write_deps);
2301 if (exec_flags & EXEC_OBJECT_READ_XE)
2302 {
2303 exec_bo_gem->last_exec_read_exec_queue = curr_exec_queue_id;
2304 }
2305 if (exec_flags & EXEC_OBJECT_WRITE_XE)
2306 {
2307 exec_bo_gem->last_exec_write_exec_queue = curr_exec_queue_id;
2308 }
2309 }
2310 }
2311
2312 return MOS_XE_SUCCESS;
2313 }
2314
2315 /**
2316 * @ctx indicates to guity ctx that needs to recover for re-submission
2317 * @exec indicates to exec data in previous failed submission to re-submit
2318 * @curr_exec_queue_id indicates to guilty exec_queue_id, it will be replaced by newly creating one
2319 */
2320 static int
__mos_bo_context_exec_retry_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx,struct drm_xe_exec & exec,uint32_t & curr_exec_queue_id)2321 __mos_bo_context_exec_retry_xe(struct mos_bufmgr *bufmgr,
2322 struct mos_linux_context *ctx,
2323 struct drm_xe_exec &exec,
2324 uint32_t &curr_exec_queue_id)
2325 {
2326 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
2327 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
2328
2329 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2330 int ret = MOS_XE_SUCCESS;
2331
2332 //query ctx property firstly to check if failure is caused by exec_queue ban
2333 uint64_t property_value = 0;
2334 ret = __mos_get_context_property_xe(bufmgr, ctx, DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN, property_value);
2335
2336 /**
2337 * if exec_queue is banned, queried value is 1, otherwise it is zero;
2338 * if exec failure is not caused by exec_queue ban, umd could not help recover it.
2339 */
2340 if (ret || !property_value)
2341 {
2342 MOS_DRM_ASSERTMESSAGE("Failed to retore ctx(%d) with error(%d)",
2343 curr_exec_queue_id, -EPERM);
2344 return -EPERM;
2345 }
2346
2347 ret = __mos_context_restore_xe(bufmgr, ctx);
2348
2349 if (ret == MOS_XE_SUCCESS)
2350 {
2351 curr_exec_queue_id = ctx->ctx_id;
2352 exec.exec_queue_id = curr_exec_queue_id;
2353 //try once again to submit
2354 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
2355 if (ret)
2356 {
2357 MOS_DRM_ASSERTMESSAGE("Failed to re-submission in DRM_IOCTL_XE_EXEC(errno:%d): new exec_queue_id = %d",
2358 ret, curr_exec_queue_id);
2359 }
2360 }
2361 else
2362 {
2363 MOS_DRM_ASSERTMESSAGE("Failed to retore context with error(%d), exec_queue_id = %d",
2364 ret, curr_exec_queue_id);
2365 }
2366 return ret;
2367 }
2368
2369 /**
2370 * @bo contains batch bo only.
2371 * @num_bo indicates to batch bo num.
2372 * @ctx indicates to the exec exec_queue.
2373
2374 *GPU<->GPU synchronization:
2375 * Exec must ensure the synchronization between GPU->GPU with bellow 8 steps:
2376 * 1. Get the deps from read_deps and write_deps by checking bo's op flags and add it into syncs array;
2377 * a) if flags & READ: get write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY only;
2378 * b) if flags & WRITE: get read_deps[all_exec_queue exclude ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY
2379 * and write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY;
2380 * 2. Export a syncobj from external bo as dep and add it indo syncs array.
2381 * 3. Initial a new timeline dep object for exec queue if it doesn't have and add it to syncs array, otherwise add timeline
2382 * dep from context->timeline_dep directly while it has latest avaiable timeline point in it;
2383 * 4. Exec submittion with batches and syncs.
2384 * 5. Update read_deps[ctx->dummy_exec_queue_id] and write_deps[ctx->dummy_exec_queue_id] with the new deps from the dep_queue;
2385 * 6. Update timeline dep's timeline index to be latest avaiable one for currect exec queue.
2386 * 7. Import syncobj from batch bo for each external bo's DMA buffer for external process to wait media process on demand.
2387 * 8. Close syncobj handle and syncobj fd for external bo to avoid leak.
2388 * GPU->CPU(optional):
2389 * If bo->map_deps.dep exist:
2390 * get it and add it to exec syncs array
2391 */
2392 static int
mos_bo_context_exec_with_sync_xe(struct mos_linux_bo ** bo,int num_bo,struct mos_linux_context * ctx,struct drm_clip_rect * cliprects,int num_cliprects,int DR4,unsigned int flags,int * fence)2393 mos_bo_context_exec_with_sync_xe(struct mos_linux_bo **bo, int num_bo, struct mos_linux_context *ctx,
2394 struct drm_clip_rect *cliprects, int num_cliprects, int DR4,
2395 unsigned int flags, int *fence)
2396 {
2397
2398 MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
2399 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL)
2400 if (num_bo <= 0)
2401 {
2402 MOS_DRM_ASSERTMESSAGE("invalid batch bo num(%d)", num_bo);
2403 return -EINVAL;
2404 }
2405
2406 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo[0]->bufmgr;
2407 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
2408
2409 uint64_t batch_addrs[num_bo];
2410
2411 std::vector<mos_xe_exec_bo> exec_list;
2412 for (int i = 0; i < num_bo; i++)
2413 {
2414 MOS_DRM_CHK_NULL_RETURN_VALUE(bo[i], -EINVAL)
2415 batch_addrs[i] = bo[i]->offset64;
2416 struct mos_xe_bo_gem *batch_bo_gem = (struct mos_xe_bo_gem *) bo[i];
2417 MOS_XE_GET_VALUES_FROM_MAP(batch_bo_gem->exec_list, exec_list);
2418 }
2419
2420 struct mos_xe_context *context = (struct mos_xe_context *) ctx;
2421 uint32_t curr_exec_queue_id = context->ctx.ctx_id;
2422 std::vector<struct mos_xe_external_bo_info> external_bos;
2423 std::vector<struct drm_xe_sync> syncs;
2424 uint64_t curr_timeline = 0;
2425 int ret = 0;
2426
2427 uint32_t exec_list_size = exec_list.size();
2428 if (exec_list_size == 0)
2429 {
2430 MOS_DRM_NORMALMESSAGE("invalid exec list count(%d)", exec_list_size);
2431 }
2432
2433 bufmgr_gem->m_lock.lock();
2434
2435 if (context->timeline_dep == nullptr)
2436 {
2437 context->timeline_dep = mos_sync_create_timeline_dep(bufmgr_gem->fd);
2438
2439 if (context->timeline_dep == nullptr)
2440 {
2441 MOS_DRM_ASSERTMESSAGE("Failed to initial context timeline dep");
2442 bufmgr_gem->m_lock.unlock();
2443 return -ENOMEM;
2444 }
2445 }
2446
2447 struct mos_xe_dep *dep = context->timeline_dep;
2448 //add latest avaiable timeline point(dep) into syncs as fence out point.
2449 mos_sync_update_exec_syncs_from_timeline_dep(
2450 bufmgr_gem->fd,
2451 dep,
2452 syncs);
2453
2454 bufmgr_gem->sync_obj_rw_lock.lock_shared();
2455 //update exec syncs array by external and interbal bo dep
2456 __mos_context_exec_update_syncs_xe(
2457 bufmgr_gem,
2458 bo,
2459 num_bo,
2460 context,
2461 exec_list,
2462 syncs,
2463 external_bos);
2464
2465 //exec submit
2466 uint32_t sync_count = syncs.size();
2467 struct drm_xe_sync *syncs_array = syncs.data();
2468
2469 //dump bo deps map
2470 __mos_dump_bo_deps_map_xe(bo, num_bo, exec_list, curr_exec_queue_id, bufmgr_gem->global_ctx_info);
2471 //dump fence in and fence out info
2472 __mos_dump_syncs_array_xe(syncs_array, sync_count, dep);
2473
2474 struct drm_xe_exec exec;
2475 memclear(exec);
2476 exec.extensions = 0;
2477 exec.exec_queue_id = curr_exec_queue_id;
2478 exec.num_syncs = sync_count;
2479 exec.syncs = (uintptr_t)syncs_array;
2480 /**
2481 * exec.address only accepts batch->offset64 when num bo == 1;
2482 * and it only accepts batch array when num bo > 1
2483 */
2484 exec.address = (num_bo == 1 ? (uintptr_t)batch_addrs[0] : (uintptr_t)batch_addrs);
2485 exec.num_batch_buffer = num_bo;
2486 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
2487 if (ret)
2488 {
2489 MOS_DRM_ASSERTMESSAGE("Failed to submission in DRM_IOCTL_XE_EXEC(errno:%d): exec_queue_id = %d, num_syncs = %d, num_bo = %d",
2490 -errno, curr_exec_queue_id, sync_count, num_bo);
2491
2492 //check if it caused by guilty exec_queue_id, if so, could restore the exec_queue_id/ queue here and re-try exec again.
2493 if (ret == -EPERM)
2494 {
2495 ret = __mos_bo_context_exec_retry_xe(&bufmgr_gem->bufmgr, ctx, exec, curr_exec_queue_id);
2496 }
2497 }
2498 curr_timeline = dep->timeline_index;
2499
2500 //update bos' read and write dep with new timeline
2501 __mos_context_exec_update_bo_deps_xe(bo, num_bo, exec_list, context->dummy_exec_queue_id, dep);
2502
2503 //Update dep with latest available timeline
2504 mos_sync_update_timeline_dep(dep);
2505
2506 bufmgr_gem->sync_obj_rw_lock.unlock_shared();
2507 bufmgr_gem->m_lock.unlock();
2508
2509 //import batch syncobj or its point for external bos and close syncobj created for external bo before.
2510 uint32_t external_bo_count = external_bos.size();
2511 int sync_file_fd = INVALID_HANDLE;
2512 int temp_syncobj = INVALID_HANDLE;
2513
2514 if (external_bo_count > 0)
2515 {
2516 temp_syncobj = mos_sync_syncobj_create(bufmgr_gem->fd, 0);
2517 if (temp_syncobj > 0)
2518 {
2519 mos_sync_syncobj_timeline_to_binary(bufmgr_gem->fd, temp_syncobj, dep->syncobj_handle, curr_timeline, 0);
2520 sync_file_fd = mos_sync_syncobj_handle_to_syncfile_fd(bufmgr_gem->fd, temp_syncobj);
2521 }
2522 }
2523 for (int i = 0; i < external_bo_count; i++)
2524 {
2525 //import syncobj for external bos
2526 if (sync_file_fd >= 0)
2527 {
2528 mos_sync_import_syncfile_to_external_bo(bufmgr_gem->fd, external_bos[i].prime_fd, sync_file_fd);
2529 }
2530 if (external_bos[i].prime_fd != INVALID_HANDLE)
2531 {
2532 close(external_bos[i].prime_fd);
2533 }
2534 mos_sync_syncobj_destroy(bufmgr_gem->fd, external_bos[i].syncobj_handle);
2535 }
2536 if (sync_file_fd >= 0)
2537 {
2538 close(sync_file_fd);
2539 }
2540 if (temp_syncobj > 0)
2541 {
2542 mos_sync_syncobj_destroy(bufmgr_gem->fd, temp_syncobj);
2543 }
2544
2545 //Note: keep exec return value for final return value.
2546 return ret;
2547 }
2548
2549 /**
2550 * Get the DEVICE ID for the device. This can be overridden by setting the
2551 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
2552 */
2553 static int
mos_get_devid_xe(struct mos_bufmgr * bufmgr)2554 mos_get_devid_xe(struct mos_bufmgr *bufmgr)
2555 {
2556 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2557 int fd = bufmgr_gem->fd;
2558 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2559
2560 MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
2561 struct drm_xe_query_config *config = dev->config;
2562
2563 return (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff);
2564 }
2565
2566 static struct drm_xe_query_engines *
__mos_query_engines_xe(int fd)2567 __mos_query_engines_xe(int fd)
2568 {
2569 if (fd < 0)
2570 {
2571 return nullptr;
2572 }
2573
2574 struct drm_xe_device_query query;
2575 struct drm_xe_query_engines *engines;
2576 int ret;
2577
2578 memclear(query);
2579 query.extensions = 0;
2580 query.query = DRM_XE_DEVICE_QUERY_ENGINES;
2581 query.size = 0;
2582 query.data = 0;
2583
2584 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2585 if (ret || !query.size)
2586 {
2587 MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2588 return nullptr;
2589 }
2590
2591 engines = (drm_xe_query_engines *)calloc(1, query.size);
2592 MOS_DRM_CHK_NULL_RETURN_VALUE(engines, nullptr)
2593
2594 query.data = (uintptr_t)engines;
2595 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2596 if (ret || !query.size)
2597 {
2598 MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2599 MOS_XE_SAFE_FREE(engines);
2600 return nullptr;
2601 }
2602
2603 return engines;
2604 }
2605
2606 static int
mos_query_engines_count_xe(struct mos_bufmgr * bufmgr,unsigned int * nengine)2607 mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine)
2608 {
2609 MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
2610 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2611 int fd = bufmgr_gem->fd;
2612 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2613
2614 MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2615 *nengine = dev->engines->num_engines;
2616
2617 return MOS_XE_SUCCESS;
2618 }
2619
2620 int
mos_query_engines_xe(struct mos_bufmgr * bufmgr,__u16 engine_class,__u64 caps,unsigned int * nengine,void * engine_map)2621 mos_query_engines_xe(struct mos_bufmgr *bufmgr,
2622 __u16 engine_class,
2623 __u64 caps,
2624 unsigned int *nengine,
2625 void *engine_map)
2626 {
2627 MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
2628 MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, -EINVAL);
2629
2630 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2631 struct drm_xe_engine_class_instance *ci = (struct drm_xe_engine_class_instance *)engine_map;
2632 int fd = bufmgr_gem->fd;
2633 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2634
2635 MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2636 struct drm_xe_query_engines *engines = dev->engines;
2637
2638 int i, num;
2639 struct drm_xe_engine *engine;
2640 for (i = 0, num = 0; i < engines->num_engines; i++)
2641 {
2642 engine = (struct drm_xe_engine *)&engines->engines[i];
2643 if (engine_class == engine->instance.engine_class)
2644 {
2645 ci->engine_class = engine_class;
2646 ci->engine_instance = engine->instance.engine_instance;
2647 ci->gt_id = engine->instance.gt_id;
2648 ci++;
2649 num++;
2650 }
2651
2652 if (num > *nengine)
2653 {
2654 MOS_DRM_ASSERTMESSAGE("Number of engine instances out of range, %d,%d", num, *nengine);
2655 return -1;
2656 }
2657 }
2658
2659 //Note30: need to confirm if engine_instance is ordered, otherwise re-order needed.
2660
2661 *nengine = num;
2662
2663 return 0;
2664 }
2665
2666 static size_t
mos_get_engine_class_size_xe()2667 mos_get_engine_class_size_xe()
2668 {
2669 return sizeof(struct drm_xe_engine_class_instance);
2670 }
2671
2672 static int
mos_query_sysinfo_xe(struct mos_bufmgr * bufmgr,MEDIA_SYSTEM_INFO * gfx_info)2673 mos_query_sysinfo_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
2674 {
2675 MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
2676 MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL);
2677
2678 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2679 int fd = bufmgr_gem->fd;
2680 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2681 int ret;
2682
2683 MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2684
2685 if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
2686 || 0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled)
2687 {
2688 unsigned int num_vd = 0;
2689 unsigned int num_ve = 0;
2690
2691 for (unsigned int i = 0; i < dev->engines->num_engines; i++)
2692 {
2693 if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
2694 && dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE)
2695 {
2696 gfx_info->VDBoxInfo.Instances.VDBoxEnableMask |=
2697 1 << dev->engines->engines[i].instance.engine_instance;
2698 num_vd++;
2699 }
2700
2701 if (0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled
2702 && dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE)
2703 {
2704 num_ve++;
2705 }
2706 }
2707
2708 if (num_vd > 0)
2709 {
2710 gfx_info->VDBoxInfo.NumberOfVDBoxEnabled = num_vd;
2711 }
2712
2713 if (num_vd > 0)
2714 {
2715 gfx_info->VEBoxInfo.NumberOfVEBoxEnabled = num_ve;
2716 }
2717 }
2718
2719 return 0;
2720 }
2721
mos_select_fixed_engine_xe(struct mos_bufmgr * bufmgr,void * engine_map,uint32_t * nengine,uint32_t fixed_instance_mask)2722 void mos_select_fixed_engine_xe(struct mos_bufmgr *bufmgr,
2723 void *engine_map,
2724 uint32_t *nengine,
2725 uint32_t fixed_instance_mask)
2726 {
2727 MOS_UNUSED(bufmgr);
2728 #if (DEBUG || _RELEASE_INTERNAL)
2729 if (fixed_instance_mask)
2730 {
2731 struct drm_xe_engine_class_instance *_engine_map = (struct drm_xe_engine_class_instance *)engine_map;
2732 auto unselect_index = 0;
2733 for (auto bit = 0; bit < *nengine; bit++)
2734 {
2735 if (((fixed_instance_mask >> bit) & 0x1) && (bit > unselect_index))
2736 {
2737 _engine_map[unselect_index].engine_class = _engine_map[bit].engine_class;
2738 _engine_map[unselect_index].engine_instance = _engine_map[bit].engine_instance;
2739 _engine_map[unselect_index].gt_id = _engine_map[bit].gt_id;
2740 _engine_map[unselect_index].pad = _engine_map[bit].pad;
2741 _engine_map[bit].engine_class = 0;
2742 _engine_map[bit].engine_instance = 0;
2743 _engine_map[bit].gt_id = 0;
2744 _engine_map[bit].pad = 0;
2745 unselect_index++;
2746 }
2747 else if (((fixed_instance_mask >> bit) & 0x1) && (bit == unselect_index))
2748 {
2749 unselect_index++;
2750 }
2751 else if (!((fixed_instance_mask >> bit) & 0x1))
2752 {
2753 _engine_map[bit].engine_class = 0;
2754 _engine_map[bit].engine_instance = 0;
2755 _engine_map[bit].gt_id = 0;
2756 _engine_map[bit].pad = 0;
2757 }
2758 }
2759 *nengine = unselect_index;
2760 }
2761 #else
2762 MOS_UNUSED(engine_map);
2763 MOS_UNUSED(nengine);
2764 MOS_UNUSED(fixed_instance_mask);
2765 #endif
2766
2767 }
2768
2769
2770 /**
2771 * Note: xe kmd doesn't support query blob before dg2.
2772 */
2773 static uint32_t *
__mos_query_hw_config_xe(int fd)2774 __mos_query_hw_config_xe(int fd)
2775 {
2776 struct drm_xe_device_query query;
2777 uint32_t *hw_config;
2778 int ret;
2779
2780 if (fd < 0)
2781 {
2782 return nullptr;
2783 }
2784
2785 memclear(query);
2786 query.query = DRM_XE_DEVICE_QUERY_HWCONFIG;
2787
2788 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2789 if (ret || !query.size)
2790 {
2791 MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2792 return nullptr;
2793 }
2794
2795 hw_config = (uint32_t *)calloc(1, query.size + sizeof(uint32_t));
2796 MOS_DRM_CHK_NULL_RETURN_VALUE(hw_config, nullptr)
2797
2798 query.data = (uintptr_t)&hw_config[1];
2799 ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2800 if (ret != 0 || query.size <= 0)
2801 {
2802 MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2803 MOS_XE_SAFE_FREE(hw_config);
2804 return nullptr;
2805 }
2806
2807 hw_config[0] = query.size / sizeof(uint32_t);
2808
2809 return hw_config;
2810 }
2811
2812 static int
mos_query_device_blob_xe(struct mos_bufmgr * bufmgr,MEDIA_SYSTEM_INFO * gfx_info)2813 mos_query_device_blob_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
2814 {
2815 MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL)
2816
2817 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2818 int fd = bufmgr_gem->fd;
2819 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2820
2821 MOS_DRM_CHK_XE_DEV(dev, hw_config, __mos_query_hw_config_xe, -ENODEV)
2822
2823 uint32_t *hwconfig = &dev->hw_config[1];
2824 uint32_t num_config = dev->hw_config[0];
2825
2826 int i = 0;
2827 while (i < num_config) {
2828 /* Attribute ID starts with 1 */
2829 assert(hwconfig[i] > 0);
2830
2831 #if DEBUG_BLOB_QUERY
2832 MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hwconfig[i]], hwconfig[i+2]);
2833 #endif
2834 if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hwconfig[i])
2835 {
2836 assert(hwconfig[i+1] == 1);
2837 gfx_info->SliceCount = hwconfig[i+2];
2838 gfx_info->MaxSlicesSupported = hwconfig[i+2];
2839 }
2840
2841 if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hwconfig[i])
2842 || (INTEL_HWCONFIG_MAX_SUBSLICE == hwconfig[i]))
2843 {
2844 assert(hwconfig[i+1] == 1);
2845 gfx_info->SubSliceCount = hwconfig[i+2];
2846 gfx_info->MaxSubSlicesSupported = hwconfig[i+2];
2847 }
2848
2849 if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hwconfig[i])
2850 || (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hwconfig[i]))
2851 {
2852 assert(hwconfig[i+1] == 1);
2853 gfx_info->MaxEuPerSubSlice = hwconfig[i+2];
2854 }
2855
2856 if (INTEL_HWCONFIG_DEPRECATED_L3_CACHE_SIZE_IN_KB == hwconfig[i])
2857 {
2858 assert(hwconfig[i+1] == 1);
2859 gfx_info->L3CacheSizeInKb = hwconfig[i+2];
2860 }
2861
2862 if (INTEL_HWCONFIG_NUM_THREADS_PER_EU == hwconfig[i])
2863 {
2864 assert(hwconfig[i+1] == 1);
2865 gfx_info->NumThreadsPerEu = hwconfig[i+2];
2866 }
2867
2868 if (INTEL_HWCONFIG_MAX_VECS == hwconfig[i])
2869 {
2870 assert(hwconfig[i+1] == 1);
2871 gfx_info->MaxVECS = hwconfig[i+2];
2872 }
2873
2874 /* Advance to next key */
2875 i += hwconfig[i + 1]; // value size
2876 i += 2;// KL size
2877 }
2878
2879 return 0;
2880 }
2881
2882 static void
mos_enable_reuse_xe(struct mos_bufmgr * bufmgr)2883 mos_enable_reuse_xe(struct mos_bufmgr *bufmgr)
2884 {
2885 MOS_UNIMPLEMENT(bufmgr);
2886 }
2887
2888 // The function is not supported on KMD
mos_query_hw_ip_version_xe(struct mos_bufmgr * bufmgr,__u16 engine_class,void * ip_ver_info)2889 static int mos_query_hw_ip_version_xe(struct mos_bufmgr *bufmgr, __u16 engine_class, void *ip_ver_info)
2890 {
2891 MOS_UNIMPLEMENT(bufmgr);
2892 MOS_UNIMPLEMENT(engine_class);
2893 MOS_UNIMPLEMENT(ip_ver_info);
2894 return 0;
2895 }
2896
2897 static void
mos_bo_free_xe(struct mos_linux_bo * bo)2898 mos_bo_free_xe(struct mos_linux_bo *bo)
2899 {
2900 struct mos_xe_bufmgr_gem *bufmgr_gem = nullptr;
2901 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
2902 struct drm_gem_close close_ioctl;
2903 int ret;
2904
2905 if (nullptr == bo_gem)
2906 {
2907 MOS_DRM_ASSERTMESSAGE("bo == nullptr");
2908 return;
2909 }
2910
2911 bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
2912
2913 if (nullptr == bufmgr_gem)
2914 {
2915 MOS_DRM_ASSERTMESSAGE("bufmgr_gem == nullptr");
2916 return;
2917 }
2918
2919 mos_gem_bo_wait_rendering_xe(bo);
2920
2921 bufmgr_gem->m_lock.lock();
2922
2923 if (!bo_gem->is_userptr)
2924 {
2925 if (bo_gem->mem_virtual)
2926 {
2927 VG(VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, 0));
2928 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
2929 bo_gem->mem_virtual = nullptr;
2930 }
2931 }
2932
2933 if (bo->vm_id != INVALID_VM)
2934 {
2935 ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
2936 bo->vm_id,
2937 0,
2938 0,
2939 bo->offset64,
2940 bo->size,
2941 bo_gem->pat_index,
2942 DRM_XE_VM_BIND_OP_UNMAP);
2943 if (ret)
2944 {
2945 MOS_DRM_ASSERTMESSAGE("mos_gem_bo_free mos_vm_unbind ret error. bo:0x%lx, vm_id:%d\r",
2946 (uint64_t)bo,
2947 bo->vm_id);
2948 }
2949 else
2950 {
2951 bo->vm_id = INVALID_VM;
2952 }
2953 }
2954
2955 if (!bo_gem->is_userptr)
2956 {
2957 /* Close this object */
2958 memclear(close_ioctl);
2959 close_ioctl.handle = bo_gem->gem_handle;
2960 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_ioctl);
2961 if (ret != 0)
2962 {
2963 MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s",
2964 bo_gem->gem_handle, bo_gem->name, strerror(errno));
2965 }
2966 }
2967
2968 if (bufmgr_gem->mem_profiler_fd != -1)
2969 {
2970 snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
2971 ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
2972 if (-1 == ret)
2973 {
2974 snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
2975 ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
2976 if (-1 == ret)
2977 {
2978 MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
2979 }
2980 }
2981 }
2982
2983 /* Return the VMA for reuse */
2984 __mos_bo_vma_free_xe(bo->bufmgr, bo->offset64, bo->size);
2985 bufmgr_gem->m_lock.unlock();
2986
2987 MOS_Delete(bo_gem);
2988 }
2989
2990 static int
mos_bo_set_softpin_xe(MOS_LINUX_BO * bo)2991 mos_bo_set_softpin_xe(MOS_LINUX_BO *bo)
2992 {
2993 MOS_UNIMPLEMENT(bo);
2994 return 0;
2995 }
2996
2997 static void
mos_bufmgr_gem_destroy_xe(struct mos_bufmgr * bufmgr)2998 mos_bufmgr_gem_destroy_xe(struct mos_bufmgr *bufmgr)
2999 {
3000 if (nullptr == bufmgr)
3001 return;
3002
3003 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
3004 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
3005 int i, ret;
3006
3007 /* Release userptr bo kept hanging around for optimisation. */
3008
3009 mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_SYS]);
3010 mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_DEVICE]);
3011 mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_PRIME]);
3012
3013 if (bufmgr_gem->vm_id != INVALID_VM)
3014 {
3015 __mos_vm_destroy_xe(bufmgr, bufmgr_gem->vm_id);
3016 bufmgr_gem->vm_id = INVALID_VM;
3017 }
3018
3019 if (bufmgr_gem->mem_profiler_fd != -1)
3020 {
3021 close(bufmgr_gem->mem_profiler_fd);
3022 }
3023
3024 MOS_XE_SAFE_FREE(dev->hw_config);
3025 dev->hw_config = nullptr;
3026
3027 MOS_XE_SAFE_FREE(dev->config);
3028 dev->config = nullptr;
3029
3030 MOS_XE_SAFE_FREE(dev->engines);
3031 dev->engines = nullptr;
3032
3033 MOS_XE_SAFE_FREE(dev->mem_regions);
3034 dev->mem_regions = nullptr;
3035
3036 MOS_XE_SAFE_FREE(dev->gt_list);
3037 dev->gt_list = nullptr;
3038
3039 MOS_Delete(bufmgr_gem);
3040 }
3041
3042 static void
mos_bufmgr_gem_unref_xe(struct mos_bufmgr * bufmgr)3043 mos_bufmgr_gem_unref_xe(struct mos_bufmgr *bufmgr)
3044 {
3045 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
3046
3047 if (bufmgr_gem && atomic_add_unless(&bufmgr_gem->ref_count, -1, 1))
3048 {
3049 pthread_mutex_lock(&bufmgr_list_mutex);
3050
3051 if (atomic_dec_and_test(&bufmgr_gem->ref_count))
3052 {
3053 DRMLISTDEL(&bufmgr_gem->managers);
3054 mos_bufmgr_gem_destroy_xe(bufmgr);
3055 }
3056
3057 pthread_mutex_unlock(&bufmgr_list_mutex);
3058 }
3059 }
3060
3061 static int
mo_get_context_param_xe(struct mos_linux_context * ctx,uint32_t size,uint64_t param,uint64_t * value)3062 mo_get_context_param_xe(struct mos_linux_context *ctx,
3063 uint32_t size,
3064 uint64_t param,
3065 uint64_t *value)
3066 {
3067 MOS_UNIMPLEMENT(ctx);
3068 MOS_UNIMPLEMENT(size);
3069 MOS_UNIMPLEMENT(param);
3070 MOS_UNIMPLEMENT(value);
3071 return 0;
3072 }
3073
mos_enable_softpin_xe(struct mos_bufmgr * bufmgr,bool va1m_align)3074 static void mos_enable_softpin_xe(struct mos_bufmgr *bufmgr, bool va1m_align)
3075 {
3076 MOS_UNIMPLEMENT(bufmgr);
3077 MOS_UNIMPLEMENT(va1m_align);
3078 }
3079
3080 static int
mos_get_reset_stats_xe(struct mos_linux_context * ctx,uint32_t * reset_count,uint32_t * active,uint32_t * pending)3081 mos_get_reset_stats_xe(struct mos_linux_context *ctx,
3082 uint32_t *reset_count,
3083 uint32_t *active,
3084 uint32_t *pending)
3085 {
3086 MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
3087
3088 struct mos_xe_context *context = (struct mos_xe_context *)ctx;
3089 if (reset_count)
3090 *reset_count = context->reset_count;
3091 if (active)
3092 *active = 0;
3093 if (pending)
3094 *pending = 0;
3095 return 0;
3096 }
3097
3098 static mos_oca_exec_list_info*
mos_bo_get_oca_exec_list_info_xe(struct mos_linux_bo * bo,int * count)3099 mos_bo_get_oca_exec_list_info_xe(struct mos_linux_bo *bo, int *count)
3100 {
3101 if (nullptr == bo || nullptr == count)
3102 {
3103 return nullptr;
3104 }
3105
3106 mos_oca_exec_list_info *info = nullptr;
3107 int counter = 0;
3108 int MAX_COUNT = 50;
3109 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
3110 struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *)bo;
3111 int exec_list_count = bo_gem->exec_list.size();
3112
3113 if (exec_list_count == 0 || exec_list_count > MAX_COUNT)
3114 {
3115 return nullptr;
3116 }
3117
3118 info = (mos_oca_exec_list_info *)malloc((exec_list_count + 1) * sizeof(mos_oca_exec_list_info));
3119 if (!info)
3120 {
3121 MOS_DRM_ASSERTMESSAGE("malloc mos_oca_exec_list_info failed");
3122 return info;
3123 }
3124
3125 for (auto &it : bo_gem->exec_list)
3126 {
3127 /*note: set capture for each bo*/
3128 struct mos_xe_bo_gem *exec_bo_gem = (struct mos_xe_bo_gem *)it.second.bo;
3129 uint32_t exec_flags = it.second.flags;
3130 if (exec_bo_gem)
3131 {
3132 info[counter].handle = exec_bo_gem->bo.handle;
3133 info[counter].size = exec_bo_gem->bo.size;
3134 info[counter].offset64 = exec_bo_gem->bo.offset64;
3135 info[counter].flags = exec_flags;
3136 info[counter].mem_region = exec_bo_gem->mem_region;
3137 info[counter].is_batch = false;
3138 counter++;
3139 }
3140 }
3141
3142 /*note: bo is cmd bo, also need to be added*/
3143 info[counter].handle = bo->handle;
3144 info[counter].size = bo->size;
3145 info[counter].offset64 = bo->offset64;
3146 info[counter].flags = EXEC_OBJECT_WRITE_XE; // use write flags for batch bo as default.
3147 info[counter].mem_region = bo_gem->mem_region;
3148 info[counter].is_batch = true;
3149 counter++;
3150
3151 *count = counter;
3152
3153 return info;
3154 }
3155
3156 static bool
mos_has_bsd2_xe(struct mos_bufmgr * bufmgr)3157 mos_has_bsd2_xe(struct mos_bufmgr *bufmgr)
3158 {
3159 MOS_UNUSED(bufmgr);
3160 return true;
3161 }
3162
3163 static void
mos_bo_set_object_capture_xe(struct mos_linux_bo * bo)3164 mos_bo_set_object_capture_xe(struct mos_linux_bo *bo)
3165 {
3166 MOS_UNIMPLEMENT(bo);
3167 }
3168
3169 static void
mos_bo_set_object_async_xe(struct mos_linux_bo * bo)3170 mos_bo_set_object_async_xe(struct mos_linux_bo *bo)
3171 {
3172 MOS_UNIMPLEMENT(bo);
3173 }
3174
3175 static int
mos_get_driver_info_xe(struct mos_bufmgr * bufmgr,struct LinuxDriverInfo * drvInfo)3176 mos_get_driver_info_xe(struct mos_bufmgr *bufmgr, struct LinuxDriverInfo *drvInfo)
3177 {
3178 MOS_DRM_CHK_NULL_RETURN_VALUE(drvInfo, -EINVAL)
3179 struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
3180 struct mos_xe_device *dev = &bufmgr_gem->xe_device;
3181 int fd = bufmgr_gem->fd;
3182
3183 uint32_t MaxEuPerSubSlice = 0;
3184 int i = 0;
3185 drvInfo->hasBsd = 1;
3186 drvInfo->hasBsd2 = 1;
3187 drvInfo->hasVebox = 1;
3188
3189 //For XE driver always has ppgtt
3190 drvInfo->hasPpgtt = 1;
3191
3192 /**
3193 * query blob
3194 * Note: xe kmd doesn't support query blob before dg2, so don't check null and return here.
3195 */
3196 if (dev->hw_config == nullptr)
3197 {
3198 dev->hw_config = __mos_query_hw_config_xe(fd);
3199 }
3200
3201 if (dev->hw_config)
3202 {
3203 uint32_t *hw_config = &dev->hw_config[1];
3204 uint32_t num_config = dev->hw_config[0];
3205
3206 while (i < num_config)
3207 {
3208 /* Attribute ID starts with 1 */
3209 assert(hw_config[i] > 0);
3210
3211 #if DEBUG_BLOB_QUERY
3212 MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hw_config[i]], hw_config[i+2]);
3213 #endif
3214 if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hw_config[i])
3215 {
3216 assert(hw_config[i+1] == 1);
3217 drvInfo->sliceCount = hw_config[i+2];
3218 }
3219
3220 if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hw_config[i])
3221 || (INTEL_HWCONFIG_MAX_SUBSLICE == hw_config[i]))
3222 {
3223 assert(hw_config[i+1] == 1);
3224 drvInfo->subSliceCount = hw_config[i+2];
3225 }
3226
3227 if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hw_config[i])
3228 || (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hw_config[i]))
3229 {
3230 assert(hw_config[i+1] == 1);
3231 MaxEuPerSubSlice = hw_config[i+2];
3232 }
3233
3234 /* Advance to next key */
3235 i += hw_config[i + 1]; // value size
3236 i += 2;// KL size
3237 }
3238
3239 drvInfo->euCount = drvInfo->subSliceCount * MaxEuPerSubSlice;
3240 }
3241 else
3242 {
3243 drvInfo->euCount = 96;
3244 drvInfo->subSliceCount = 6;
3245 drvInfo->sliceCount = 1;
3246 }
3247
3248 // query engines info
3249 MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
3250 struct drm_xe_query_engines *engines = dev->engines;
3251 int num_vd = 0;
3252 int num_ve = 0;
3253 for (i = 0; i < engines->num_engines; i++)
3254 {
3255 if (DRM_XE_ENGINE_CLASS_VIDEO_DECODE == engines->engines[i].instance.engine_class)
3256 {
3257 num_vd++;
3258 }
3259 else if (DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE == engines->engines[i].instance.engine_class)
3260 {
3261 num_ve++;
3262 }
3263 }
3264
3265 if (num_vd >= 1)
3266 {
3267 drvInfo->hasBsd = 1;
3268 }
3269
3270 if (num_vd >= 2)
3271 {
3272 drvInfo->hasBsd2 = 1;
3273 }
3274
3275 if (num_ve >= 1)
3276 {
3277 drvInfo->hasVebox = 1;
3278 }
3279
3280 drvInfo->hasHuc = 1;
3281 if (1 == drvInfo->hasHuc)
3282 {
3283 drvInfo->hasProtectedHuc = 1;
3284 }
3285
3286 // query config
3287 MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, -ENODEV)
3288 struct drm_xe_query_config *config = dev->config;
3289 drvInfo->devId = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
3290 drvInfo->devRev = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16;
3291
3292 return MOS_XE_SUCCESS;
3293 }
3294
3295 /**
3296 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3297 * and manage map buffer objections.
3298 *
3299 * \param fd File descriptor of the opened DRM device.
3300 */
3301 struct mos_bufmgr *
mos_bufmgr_gem_init_xe(int fd,int batch_size)3302 mos_bufmgr_gem_init_xe(int fd, int batch_size)
3303 {
3304 //Note: don't put this field in bufmgr in case of bufmgr inaccessable in some functions
3305 #if (_DEBUG || _RELEASE_INTERNAL)
3306 MOS_READ_ENV_VARIABLE(INTEL_XE_BUFMGR_DEBUG, MOS_USER_FEATURE_VALUE_TYPE_INT64, __xe_bufmgr_debug__);
3307 if (__xe_bufmgr_debug__ < 0)
3308 {
3309 __xe_bufmgr_debug__ = 0;
3310 }
3311 #endif
3312
3313 struct mos_xe_bufmgr_gem *bufmgr_gem;
3314 int ret, tmp;
3315 struct mos_xe_device *dev = nullptr;
3316
3317 pthread_mutex_lock(&bufmgr_list_mutex);
3318
3319 bufmgr_gem = mos_bufmgr_gem_find(fd);
3320 if (bufmgr_gem)
3321 goto exit;
3322
3323 bufmgr_gem = MOS_New(mos_xe_bufmgr_gem);
3324 if (nullptr == bufmgr_gem)
3325 goto exit;
3326
3327 bufmgr_gem->bufmgr = {};
3328 bufmgr_gem->xe_device = {};
3329 dev = &bufmgr_gem->xe_device;
3330
3331 bufmgr_gem->fd = fd;
3332 bufmgr_gem->vm_id = INVALID_VM;
3333 atomic_set(&bufmgr_gem->ref_count, 1);
3334
3335 bufmgr_gem->bufmgr.vm_create = mos_vm_create_xe;
3336 bufmgr_gem->bufmgr.vm_destroy = mos_vm_destroy_xe;
3337 bufmgr_gem->bufmgr.context_create = mos_context_create_xe;
3338 bufmgr_gem->bufmgr.context_create_ext = mos_context_create_ext_xe;
3339 bufmgr_gem->bufmgr.context_create_shared = mos_context_create_shared_xe;
3340 bufmgr_gem->bufmgr.context_destroy = mos_context_destroy_xe;
3341 bufmgr_gem->bufmgr.bo_alloc = mos_bo_alloc_xe;
3342 bufmgr_gem->bufmgr.bo_add_softpin_target = mos_gem_bo_update_exec_list_xe;
3343 bufmgr_gem->bufmgr.bo_clear_relocs = mos_gem_bo_clear_exec_list_xe;
3344 bufmgr_gem->bufmgr.bo_alloc_userptr = mos_bo_alloc_userptr_xe;
3345 bufmgr_gem->bufmgr.bo_alloc_tiled = mos_bo_alloc_tiled_xe;
3346 bufmgr_gem->bufmgr.bo_map = mos_bo_map_xe;
3347 bufmgr_gem->bufmgr.bo_busy = mos_gem_bo_busy_xe;
3348 bufmgr_gem->bufmgr.bo_wait_rendering = mos_gem_bo_wait_rendering_xe;
3349 bufmgr_gem->bufmgr.bo_wait = mos_gem_bo_wait_xe;
3350 bufmgr_gem->bufmgr.bo_map_wc = mos_bo_map_wc_xe;
3351 bufmgr_gem->bufmgr.bo_unmap = mos_bo_unmap_xe;
3352 bufmgr_gem->bufmgr.bo_unmap_wc = mos_bo_unmap_wc_xe;
3353 bufmgr_gem->bufmgr.bo_create_from_prime = mos_bo_create_from_prime_xe;
3354 bufmgr_gem->bufmgr.bo_export_to_prime = mos_bo_export_to_prime_xe;
3355 bufmgr_gem->bufmgr.get_devid = mos_get_devid_xe;
3356 bufmgr_gem->bufmgr.query_engines_count = mos_query_engines_count_xe;
3357 bufmgr_gem->bufmgr.query_engines = mos_query_engines_xe;
3358 bufmgr_gem->bufmgr.get_engine_class_size = mos_get_engine_class_size_xe;
3359 bufmgr_gem->bufmgr.query_sys_engines = mos_query_sysinfo_xe;
3360 bufmgr_gem->bufmgr.select_fixed_engine = mos_select_fixed_engine_xe;
3361 bufmgr_gem->bufmgr.query_device_blob = mos_query_device_blob_xe;
3362 bufmgr_gem->bufmgr.get_driver_info = mos_get_driver_info_xe;
3363 bufmgr_gem->bufmgr.destroy = mos_bufmgr_gem_unref_xe;
3364 bufmgr_gem->bufmgr.query_hw_ip_version = mos_query_hw_ip_version_xe;
3365 bufmgr_gem->bufmgr.get_platform_information = mos_get_platform_information_xe;
3366 bufmgr_gem->bufmgr.set_platform_information = mos_set_platform_information_xe;
3367 bufmgr_gem->bufmgr.enable_reuse = mos_enable_reuse_xe;
3368 bufmgr_gem->bufmgr.bo_reference = mos_bo_reference_xe;
3369 bufmgr_gem->bufmgr.bo_unreference = mos_bo_unreference_xe;
3370 bufmgr_gem->bufmgr.bo_set_softpin = mos_bo_set_softpin_xe;
3371 bufmgr_gem->bufmgr.enable_softpin = mos_enable_softpin_xe;
3372 bufmgr_gem->bufmgr.get_context_param = mo_get_context_param_xe;
3373 bufmgr_gem->bufmgr.get_reset_stats = mos_get_reset_stats_xe;
3374 bufmgr_gem->bufmgr.bo_get_softpin_targets_info = mos_bo_get_oca_exec_list_info_xe;
3375 bufmgr_gem->bufmgr.has_bsd2= mos_has_bsd2_xe;
3376 bufmgr_gem->bufmgr.set_object_capture = mos_bo_set_object_capture_xe;
3377 bufmgr_gem->bufmgr.set_object_async = mos_bo_set_object_async_xe;
3378 bufmgr_gem->bufmgr.bo_context_exec3 = mos_bo_context_exec_with_sync_xe;
3379
3380 bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
3381 MOS_READ_ENV_VARIABLE(INTEL_ENGINE_TIMESLICE, MOS_USER_FEATURE_VALUE_TYPE_INT32, bufmgr_gem->exec_queue_timeslice);
3382 if (bufmgr_gem->exec_queue_timeslice <= 0
3383 || bufmgr_gem->exec_queue_timeslice >= EXEC_QUEUE_TIMESLICE_MAX)
3384 {
3385 bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
3386 }
3387
3388 bufmgr_gem->mem_profiler_fd = -1;
3389 bufmgr_gem->mem_profiler_path = getenv("MEDIA_MEMORY_PROFILER_LOG");
3390 if (bufmgr_gem->mem_profiler_path != nullptr)
3391 {
3392 if (strcmp(bufmgr_gem->mem_profiler_path, "/sys/kernel/debug/tracing/trace_marker") == 0)
3393 {
3394 ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY );
3395 }
3396 else
3397 {
3398 ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
3399 }
3400
3401 if ( -1 == ret)
3402 {
3403 MOS_DRM_ASSERTMESSAGE("Failed to open %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
3404 }
3405 }
3406
3407 dev->uc_versions[UC_TYPE_GUC_SUBMISSION].uc_type = UC_TYPE_INVALID;
3408 dev->uc_versions[UC_TYPE_HUC].uc_type = UC_TYPE_INVALID;
3409
3410 bufmgr_gem->vm_id = __mos_vm_create_xe(&bufmgr_gem->bufmgr);
3411 __mos_query_mem_regions_instance_mask_xe(&bufmgr_gem->bufmgr);
3412 __mos_has_vram_xe(&bufmgr_gem->bufmgr);
3413 __mos_get_default_alignment_xe(&bufmgr_gem->bufmgr);
3414
3415 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3416 DRMINITLISTHEAD(&bufmgr_gem->named);
3417
3418 mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_SYS], MEMZONE_SYS_START, MEMZONE_SYS_SIZE);
3419 mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_DEVICE], MEMZONE_DEVICE_START, MEMZONE_DEVICE_SIZE);
3420 mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_PRIME], MEMZONE_PRIME_START, MEMZONE_PRIME_SIZE);
3421
3422 exit:
3423 pthread_mutex_unlock(&bufmgr_list_mutex);
3424
3425 return bufmgr_gem != nullptr ? &bufmgr_gem->bufmgr : nullptr;
3426 }
3427
mos_get_dev_id_xe(int fd,uint32_t * device_id)3428 int mos_get_dev_id_xe(int fd, uint32_t *device_id)
3429 {
3430 if (fd < 0 || nullptr == device_id)
3431 {
3432 return -EINVAL;
3433 }
3434 struct drm_xe_query_config *config = __mos_query_config_xe(fd);
3435 MOS_DRM_CHK_NULL_RETURN_VALUE(config, -ENODEV)
3436
3437 *device_id = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
3438 MOS_XE_SAFE_FREE(config);
3439
3440 return MOS_XE_SUCCESS;
3441 }
3442