xref: /aosp_15_r20/external/intel-media-driver/media_softlet/linux/common/os/xe/mos_bufmgr_xe.c (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2  * Copyright © 2023 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Xu, Zhengguo <[email protected]>
25  */
26 
27 #ifdef HAVE_LIBGEN_H
28 #include <libgen.h>
29 #endif
30 #include <stdlib.h>
31 #include <fcntl.h>
32 #include <sys/stat.h>
33 #include <sys/mman.h>
34 #include <signal.h>
35 #include <getopt.h>
36 #include <stdlib.h>
37 #include <unistd.h>
38 #include <sys/wait.h>
39 #include <sys/types.h>
40 #include <sys/syscall.h>
41 #include <sys/utsname.h>
42 #include <termios.h>
43 #ifndef ETIME
44 #define ETIME ETIMEDOUT
45 #endif
46 
47 #include <map>
48 #include <vector>
49 #include <queue>
50 #include <list>
51 #include <mutex>
52 #include <shared_mutex>
53 #include <algorithm>
54 
55 #ifdef HAVE_VALGRIND
56 #include <valgrind/valgrind.h>
57 #include <valgrind/memcheck.h>
58 
59 #define VG(x) x
60 #else
61 #define VG(x) do {} while (0)
62 #endif
63 
64 #ifdef HAVE_CONFIG_H
65 #include "config.h"
66 #endif
67 #include "mos_bufmgr_api.h"
68 #include "mos_util_debug.h"
69 #include "intel_hwconfig_types.h"
70 #include "xf86drm.h"
71 #include "mos_vma.h"
72 #include "libdrm_lists.h"
73 #include "mos_bufmgr_xe.h"
74 #include "mos_synchronization_xe.h"
75 #include "mos_utilities.h"
76 #include "mos_bufmgr_util_debug.h"
77 #include "media_user_setting_value.h"
78 #include "linux_system_info.h"
79 #include "mos_oca_defs_specific.h"
80 
81 //These two struct used by mos_bufmgr_priv.h
82 typedef struct MOS_OCA_EXEC_LIST_INFO mos_oca_exec_list_info;
83 //struct MEDIA_SYSTEM_INFO;
84 
85 #include "mos_bufmgr_priv.h"
86 
87 #define PAGE_SIZE_4K                   (1ull << 12)
88 #define MAX(a, b) ((a) > (b) ? (a) : (b))
89 
90 //mos_xe_mem_class currently used as index of default_alignment
91 enum mos_xe_mem_class
92 {
93     MOS_XE_MEM_CLASS_SYSMEM = 0,     //For DRM_XE_MEM_REGION_CLASS_SYSMEM
94     MOS_XE_MEM_CLASS_VRAM,           //For DRM_XE_MEM_REGION_CLASS_VRAM
95     MOS_XE_MEM_CLASS_MAX
96 };
97 
98 struct mos_xe_context {
99     struct mos_linux_context ctx;
100 
101     /**
102      * Always keep the latest avaiable timeline index for
103      * such execution's fence out point.
104      */
105     struct mos_xe_dep* timeline_dep;
106 
107     /**
108      * The UMD's dummy exec_queue id for exec_queue ctx.
109      */
110     uint32_t dummy_exec_queue_id;
111 
112     /**
113      * Indicate to the ctx width.
114      */
115     uint8_t ctx_width;
116     /**
117      * Indicate to num placements when creating exec_queue.
118      */
119     uint8_t num_placements;
120     /**
121      * Indicate to engine class used to create exec_queue.
122      */
123     uint16_t engine_class;
124     /**
125      * Indicate to engine capability of queried exec_queue.
126      */
127     uint64_t engine_caps;
128     /**
129      * Indicate to creation flags, current value should be always zero.
130      */
131     uint32_t flags;
132     /**
133      * Indicate whether it is protected ctx.
134      */
135     bool is_protected;
136 
137     /**
138      * Indicate to exec_queue reset count on this context;
139      * Note, this count depends on context restore, if uplayer tries to query
140      * reset statue before context restore, this value may be incorrect.
141      */
142     uint32_t reset_count;
143 };
144 
145 typedef struct mos_xe_device {
146     /**
147      * Note: we agree that hw_config[0] points to the number of hw config in total
148      * And hw config data starts from hw_config[1]
149      */
150     uint32_t *hw_config = nullptr;
151     struct drm_xe_query_config *config = nullptr;
152     struct drm_xe_query_engines *engines = nullptr;
153     struct drm_xe_query_mem_regions *mem_regions = nullptr;
154     struct drm_xe_query_gt_list *gt_list = nullptr;
155 
156     /**
157      * Note: we agree here that uc_versions[0] for guc version and uc_versions[1] for huc version
158      */
159     struct drm_xe_query_uc_fw_version uc_versions[UC_TYPE_MAX];
160 } mos_xe_device;
161 
162 typedef struct mos_xe_bufmgr_gem {
163     struct mos_bufmgr bufmgr;
164 
165     atomic_t ref_count;
166 
167     int fd;
168 
169     std::recursive_mutex m_lock;
170 
171     drmMMListHead managers;
172     drmMMListHead named;
173 
174     mos_vma_heap vma_heap[MEMZONE_COUNT];
175 
176     bool object_capture_disabled; // Note: useless on xe and remove it in furture.
177 
178     #define MEM_PROFILER_BUFFER_SIZE 256
179     char mem_profiler_buffer[MEM_PROFILER_BUFFER_SIZE];
180     char* mem_profiler_path;
181     int mem_profiler_fd;
182 
183     uint32_t gt_id;
184 
185     /**
186      * This RW lock is used for avoid reading or writing the same sync obj in KMD.
187      * Reading sync obj ioctl: exec and syncobj wait.
188      * Writing sync obj ioctl: reset sync obj, destroy sync obj and create sync obj.
189      */
190     std::shared_timed_mutex sync_obj_rw_lock;
191 
192     /**
193      * Save the pair of UMD dummy exec_queue id and ctx pointer.
194      */
195     std::map<uint32_t, struct mos_xe_context*> global_ctx_info;
196 
197     uint32_t vm_id;
198 
199     /**
200      * Everything queried from kmd that indicates to hw infomation.
201      */
202     struct mos_xe_device xe_device;
203 
204     //Note: DON't put these fields in xe_device
205     bool     has_vram;
206     uint8_t  va_bits;
207     /** bitmask of all memory regions */
208     uint64_t mem_regions_mask;
209     /** @default_alignment: safe alignment regardless region location */
210     uint32_t default_alignment[MOS_XE_MEM_CLASS_MAX] = {PAGE_SIZE_4K, PAGE_SIZE_4K};
211     //End of Note
212 
213     /**
214      * Indicates whether gpu-gpu and cpu-gpu synchronization is disabled.
215      * This is mainly for debug purpose, and synchronizarion should be always enabled by default.
216      * It could be disabled by env INTEL_SYNCHRONIZATION_DISABLE.
217      */
218     bool is_disable_synchronization;
219 
220     /** indicate to exec_queue property of timeslice */
221 #define EXEC_QUEUE_TIMESLICE_DEFAULT    -1
222 #define EXEC_QUEUE_TIMESLICE_MAX        100000 //100ms
223     int32_t exec_queue_timeslice;
224 } mos_xe_bufmgr_gem;
225 
226 typedef struct mos_xe_exec_bo {
227     /** indicate to real exec bo*/
228     struct mos_linux_bo *bo;
229 
230     /**
231      * Save read, write flags etc.
232      * Two flags defined here: EXEC_OBJECT_READ_XE and EXEC_OBJECT_WRITE_XE.
233      * Whether this bo needs exec sync depends on this flags.
234      */
235     uint32_t flags;
236 } mos_xe_exec_bo;
237 
238 typedef struct mos_xe_bo_gem {
239     /**
240      * Maximun size for bo name
241      */
242 #define MAX_NAME_SIZE 128
243 
244     struct mos_linux_bo bo;
245 
246     /**
247      * Reference count
248      */
249     atomic_t ref_count;
250     /**
251      * Map count when map bo is called
252      */
253     atomic_t map_count;
254 
255     //Note7: unify gem_handle and bo.handle by deleting this one; Refine mos_linux_bo.handle to typt of uint32_t
256     /**
257      * Bo handle allocared from drm
258      * Note: conbine with bo.handle to use same one.
259      */
260     uint32_t gem_handle;
261     /**
262      * Save bo name, this is for debug usage;
263      * Suggest giving bo name when allocating bo.
264      */
265     char name[MAX_NAME_SIZE];
266 
267     /**
268          *
269          * List contains  prime fd'd objects
270      */
271     drmMMListHead name_list;
272 
273     /**
274      * Mapped address for the buffer, saved across map/unmap cycles
275      */
276     void *mem_virtual;
277 
278     /**
279      * Boolean of whether this buffer was allocated with userptr
280      */
281     bool is_userptr;
282 
283     /**
284      * Memory region on created the surfaces for local/system memory;
285      * This field only indicates to memory region type, it not memory region instance.
286      */
287     int mem_region;
288 
289     /**
290      * We should always get the syncobj handle from the bo handle by bellow 4 steps in each time:
291      * 1. get the prime_handle from bo.handle
292      * 2. get syncfile fd from prime_fd
293      * 3. get syncobj_handle from syncfile by
294      * 4. close prime_fd and syncfile fd.
295      *
296      * If umd wants external process to sync between them, umd should always import its batch
297      * syncobj handle into each external bo's dma sync buffer.
298      *
299      * Boolean of whether this buffer is imported from external
300      */
301     bool is_imported;
302     /**
303      * @cpu_caching: The CPU caching mode to select for this object. If
304      * mmaping the object the mode selected here will also be used.
305      *
306      * Supported values:
307      *
308      * DRM_XE_GEM_CPU_CACHING_WB: Allocate the pages with write-back
309      * caching.  On iGPU this can't be used for scanout surfaces. Currently
310      * not allowed for objects placed in VRAM.
311      *
312      * DRM_XE_GEM_CPU_CACHING_WC: Allocate the pages as write-combined. This
313      * is uncached. Scanout surfaces should likely use this. All objects
314      * that can be placed in VRAM must use this.
315      */
316     uint16_t cpu_caching;
317 
318     /**
319      * @pat_index: The platform defined @pat_index to use for this mapping.
320      * The index basically maps to some predefined memory attributes,
321      * including things like caching, coherency, compression etc.  The exact
322      * meaning of the pat_index is platform specific. When the KMD sets up
323      * the binding the index here is encoded into the ppGTT PTE.
324      *
325      * For coherency the @pat_index needs to be at least 1way coherent when
326      * drm_xe_gem_create.cpu_caching is DRM_XE_GEM_CPU_CACHING_WB. The KMD
327      * will extract the coherency mode from the @pat_index and reject if
328      * there is a mismatch (see note below for pre-MTL platforms).
329      *
330      * Note: On pre-MTL platforms there is only a caching mode and no
331      * explicit coherency mode, but on such hardware there is always a
332      * shared-LLC (or is dgpu) so all GT memory accesses are coherent with
333      * CPU caches even with the caching mode set as uncached.  It's only the
334      * display engine that is incoherent (on dgpu it must be in VRAM which
335      * is always mapped as WC on the CPU). However to keep the uapi somewhat
336      * consistent with newer platforms the KMD groups the different cache
337      * levels into the following coherency buckets on all pre-MTL platforms:
338      *
339      *  ppGTT UC -> COH_NONE
340      *  ppGTT WC -> COH_NONE
341      *  ppGTT WT -> COH_NONE
342      *  ppGTT WB -> COH_AT_LEAST_1WAY
343      *
344      * In practice UC/WC/WT should only ever used for scanout surfaces on
345      * such platforms (or perhaps in general for dma-buf if shared with
346      * another device) since it is only the display engine that is actually
347      * incoherent.  Everything else should typically use WB given that we
348      * have a shared-LLC.  On MTL+ this completely changes and the HW
349      * defines the coherency mode as part of the @pat_index, where
350      * incoherent GT access is possible.
351      *
352      * Note: For userptr and externally imported dma-buf the kernel expects
353      * either 1WAY or 2WAY for the @pat_index.
354      */
355     uint16_t pat_index;
356 
357     /**
358      * Boolean of whether this buffer is exported to external
359      */
360     bool is_exported;
361 
362     /**
363      * For cmd bo, it has an exec bo list which saves all exec bo in it.
364      * Uplayer caller should alway update this list before exec submission and clear the list after exec submission.
365      */
366     std::map<uintptr_t, struct mos_xe_exec_bo> exec_list;
367 
368 #define INVALID_EXEC_QUEUE_ID    -1
369     /**
370      * Save last dummy write exec_queue id.
371      * Init this field as INVALID_EXEC_QUEUE_ID at begining.
372      */
373     uint32_t last_exec_write_exec_queue;
374 
375     /**
376      * Save last dummy read exec_queue id.
377      * Init this field as INVALID_EXEC_QUEUE_ID at begining.
378      */
379     uint32_t last_exec_read_exec_queue;
380 
381     /**
382      * Read dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
383      * This map saves read deps of this bo on all exec exec_queue;
384      * Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
385      * Refer to exec call to get more details.
386      */
387     std::map<uint32_t, struct mos_xe_bo_dep> read_deps;
388 
389     /**
390      * Write dependents, pair of dummy EXEC_QUEUE_ID and mos_xe_bo_dep
391      * This map saves write deps of this bo on all exec exec_queue;
392      * Exec will check opration flags to get the dep from the map to add into exec sync array and updated the map after exec.
393      * Refer to exec call to get more details.
394      */
395     std::map<uint32_t, struct mos_xe_bo_dep> write_deps;
396 
397 } mos_xe_bo_gem;
398 
399 struct mos_xe_external_bo_info {
400     /**
401      * syncobj handle created by umd to import external bo syncfile
402      */
403     int syncobj_handle;
404     /**
405      * prime fd export from external bo handle
406      */
407     int prime_fd;
408 };
409 
410 #define MOS_UNIMPLEMENT(param)    (void)(param)
411 
412 static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
413 static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
414 
415 static void mos_bo_free_xe(struct mos_linux_bo *bo);
416 static int mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine);
417 int mos_query_engines_xe(struct mos_bufmgr *bufmgr,
418                       __u16 engine_class,
419                       __u64 caps,
420                       unsigned int *nengine,
421                       void *engine_map);
422 static void mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo);
423 
424 static struct mos_xe_bufmgr_gem *
mos_bufmgr_gem_find(int fd)425 mos_bufmgr_gem_find(int fd)
426 {
427     struct mos_xe_bufmgr_gem *bufmgr_gem;
428 
429     DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
430         if (bufmgr_gem->fd == fd) {
431             atomic_inc(&bufmgr_gem->ref_count);
432             return bufmgr_gem;
433         }
434     }
435 
436     return nullptr;
437 }
438 
439 #define MOS_DRM_CHK_XE_DEV(xe_dev, info, query_func, retval)                 \
440     MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev, retval);                           \
441     if (xe_dev->info == nullptr)                                             \
442     {                                                                        \
443         xe_dev->info = query_func(fd);                                       \
444         MOS_DRM_CHK_NULL_RETURN_VALUE(xe_dev->info, retval);                 \
445     }
446 
447 static struct drm_xe_query_gt_list *
__mos_query_gt_list_xe(int fd)448 __mos_query_gt_list_xe(int fd)
449 {
450     int ret = 0;
451     struct drm_xe_query_gt_list *gt_list;
452     struct drm_xe_device_query query;
453     memclear(query);
454     query.query = DRM_XE_DEVICE_QUERY_GT_LIST;
455 
456     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
457                 &query);
458     if (ret || !query.size)
459     {
460         return nullptr;
461     }
462 
463     gt_list = (drm_xe_query_gt_list *)calloc(1, query.size);
464     MOS_DRM_CHK_NULL_RETURN_VALUE(gt_list, nullptr);
465 
466     query.data = (uintptr_t)(gt_list);
467     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
468                 &query);
469     if (ret || !query.size || 0 == gt_list->num_gt)
470     {
471         MOS_XE_SAFE_FREE(gt_list);
472         return nullptr;
473     }
474 
475     return gt_list;
476 }
477 
__mos_query_mem_regions_instance_mask_xe(struct mos_bufmgr * bufmgr)478 static uint32_t __mos_query_mem_regions_instance_mask_xe(struct mos_bufmgr *bufmgr)
479 {
480     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
481     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
482     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
483     int fd = bufmgr_gem->fd;
484     uint64_t __memory_regions = 0;
485 
486     MOS_DRM_CHK_XE_DEV(dev, gt_list, __mos_query_gt_list_xe, 0)
487 
488     struct drm_xe_query_gt_list *gt_list = dev->gt_list;
489     for (int i = 0; i < gt_list->num_gt; i++) {
490         /**
491          * Note: __memory_regions is the mem region instance mask on all tiles and gts
492          */
493         __memory_regions |= gt_list->gt_list[i].near_mem_regions |
494             gt_list->gt_list[i].far_mem_regions;
495     }
496 
497     bufmgr_gem->mem_regions_mask = __memory_regions;
498 
499     return __memory_regions;
500 }
501 
502 static struct drm_xe_query_mem_regions *
__mos_query_mem_regions_xe(int fd)503 __mos_query_mem_regions_xe(int fd)
504 {
505     int ret = 0;
506     struct drm_xe_query_mem_regions *mem_regions;
507     struct drm_xe_device_query query;
508     memclear(query);
509     query.query = DRM_XE_DEVICE_QUERY_MEM_REGIONS;
510 
511     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY,
512                 &query);
513     if (ret || !query.size)
514     {
515         return nullptr;
516     }
517 
518     mem_regions = (drm_xe_query_mem_regions *)calloc(1, query.size);
519     MOS_DRM_CHK_NULL_RETURN_VALUE(mem_regions, nullptr);
520 
521     query.data = (uintptr_t)(mem_regions);
522     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
523     if (ret || !query.size || 0 == mem_regions->num_mem_regions)
524     {
525         MOS_XE_SAFE_FREE(mem_regions);
526         return nullptr;
527     }
528 
529     return mem_regions;
530 }
531 
__mos_query_vram_region_count_xe(struct mos_xe_device * dev,int fd)532 uint8_t __mos_query_vram_region_count_xe(struct mos_xe_device *dev, int fd)
533 {
534     uint8_t vram_regions = 0;
535 
536     MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, 0)
537 
538     struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
539     for (int i =0; i < mem_regions->num_mem_regions; i++)
540     {
541         if (mem_regions->mem_regions[i].mem_class == DRM_XE_MEM_REGION_CLASS_VRAM)
542         {
543             vram_regions++;
544         }
545     }
546 
547     return vram_regions;
548 }
549 
mos_force_gt_reset_xe(int fd,int gt_id)550 int mos_force_gt_reset_xe(int fd, int gt_id)
551 {
552     char reset_string[128];
553 
554     sprintf(reset_string, "cat /sys/kernel/debug/dri/0/gt%d/force_reset", gt_id);
555     return system(reset_string);
556 }
557 
558 static struct drm_xe_query_config *
__mos_query_config_xe(int fd)559 __mos_query_config_xe(int fd)
560 {
561     struct drm_xe_query_config *config;
562     struct drm_xe_device_query query;
563     int ret = 0;
564 
565     memclear(query);
566     query.query = DRM_XE_DEVICE_QUERY_CONFIG;
567     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
568     if (ret || !query.size)
569     {
570         return nullptr;
571     }
572 
573     config = (drm_xe_query_config *) malloc(query.size);
574     if (config != nullptr)
575     {
576         memset(config, 0, query.size);
577     }
578     else
579     {
580         MOS_DRM_ASSERTMESSAGE("malloc config failed");
581         return nullptr;
582     }
583 
584     query.data = (uintptr_t)config;
585     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, (void *)&query);
586     if (ret || !query.size || 0 == config->num_params)
587     {
588         MOS_XE_SAFE_FREE(config);
589         return nullptr;
590     }
591 
592     return config;
593 }
594 
595 static int
__mos_get_default_alignment_xe(struct mos_bufmgr * bufmgr)596 __mos_get_default_alignment_xe(struct mos_bufmgr *bufmgr)
597 {
598     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL)
599     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
600     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
601     int fd = bufmgr_gem->fd;
602     MOS_DRM_CHK_XE_DEV(dev, mem_regions, __mos_query_mem_regions_xe, -ENODEV)
603     struct drm_xe_query_mem_regions *mem_regions = dev->mem_regions;
604     uint16_t mem_class;
605 
606     for (int i = 0; i < mem_regions->num_mem_regions; i++)
607     {
608         if (DRM_XE_MEM_REGION_CLASS_SYSMEM == mem_regions->mem_regions[i].mem_class)
609         {
610             mem_class = MOS_XE_MEM_CLASS_SYSMEM;
611         }
612         else if (DRM_XE_MEM_REGION_CLASS_VRAM == mem_regions->mem_regions[i].mem_class)
613         {
614             mem_class = MOS_XE_MEM_CLASS_VRAM;
615         }
616         else
617         {
618             MOS_DRM_ASSERTMESSAGE("Unsupported mem class");
619             return -EINVAL;
620         }
621 
622         if (bufmgr_gem->default_alignment[mem_class] < mem_regions->mem_regions[i].min_page_size)
623         {
624             bufmgr_gem->default_alignment[mem_class] = mem_regions->mem_regions[i].min_page_size;
625         }
626     }
627 
628     return 0;
629 }
630 
631 /**
632  * Note: Need to add this func to bufmgr api later
633  */
634 static int
mos_query_uc_version_xe(struct mos_bufmgr * bufmgr,struct mos_drm_uc_version * version)635 mos_query_uc_version_xe(struct mos_bufmgr *bufmgr, struct mos_drm_uc_version *version)
636 {
637     int ret = 0;
638     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
639     int fd = bufmgr_gem->fd;
640     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
641 
642     if (bufmgr && version && version->uc_type < UC_TYPE_MAX)
643     {
644         /**
645          * Note: query uc version from kmd if no historic data in bufmgr, otherwise using historic data.
646          */
647         if (dev->uc_versions[version->uc_type].uc_type != version->uc_type)
648         {
649             struct drm_xe_device_query query;
650             memclear(query);
651             query.size = sizeof(struct drm_xe_query_uc_fw_version);
652             query.query = DRM_XE_DEVICE_QUERY_UC_FW_VERSION;
653             memclear(dev->uc_versions[version->uc_type]);
654             dev->uc_versions[version->uc_type].uc_type = version->uc_type;
655             query.data = (uintptr_t)&dev->uc_versions[version->uc_type];
656 
657             ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_DEVICE_QUERY,
658                         &query);
659             if (ret)
660             {
661                 memclear(dev->uc_versions[version->uc_type]);
662                 dev->uc_versions[version->uc_type].uc_type = UC_TYPE_INVALID;
663                 MOS_DRM_ASSERTMESSAGE("Failed to query UC version, uc type: %d, errno: %d", version->uc_type, ret);
664                 return ret;
665             }
666         }
667 
668         version->major_version = dev->uc_versions[version->uc_type].major_ver;
669         version->minor_version = dev->uc_versions[version->uc_type].minor_ver;
670     }
671 
672     return ret;
673 }
674 
__mos_has_vram_xe(struct mos_bufmgr * bufmgr)675 bool __mos_has_vram_xe(struct mos_bufmgr *bufmgr)
676 {
677     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
678     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
679     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
680     int fd = bufmgr_gem->fd;
681     MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
682     struct drm_xe_query_config *config = dev->config;
683     bool has_vram = ((config->info[DRM_XE_QUERY_CONFIG_FLAGS] & DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM) > 0);
684     bufmgr_gem->has_vram = has_vram;
685     return has_vram;
686 }
687 
__mos_query_va_bits_xe(struct mos_bufmgr * bufmgr)688 uint8_t __mos_query_va_bits_xe(struct mos_bufmgr *bufmgr)
689 {
690     uint8_t va_bits = 48;
691     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, va_bits)
692     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
693     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
694     int fd = bufmgr_gem->fd;
695     bufmgr_gem->va_bits = va_bits;
696     MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, va_bits)
697     struct drm_xe_query_config *config = dev->config;
698     va_bits = config->info[DRM_XE_QUERY_CONFIG_VA_BITS] & 0xff;
699     bufmgr_gem->va_bits = va_bits;
700     return va_bits;
701 }
702 
703 static uint64_t
mos_get_platform_information_xe(struct mos_bufmgr * bufmgr)704 mos_get_platform_information_xe(struct mos_bufmgr *bufmgr)
705 {
706     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, 0)
707     return bufmgr->platform_information;
708 }
709 
710 static void
mos_set_platform_information_xe(struct mos_bufmgr * bufmgr,uint64_t p)711 mos_set_platform_information_xe(struct mos_bufmgr *bufmgr, uint64_t p)
712 {
713     if (bufmgr)
714         bufmgr->platform_information |= p;
715 }
716 
717 static enum mos_memory_zone
__mos_bo_memzone_for_address_xe(uint64_t address)718 __mos_bo_memzone_for_address_xe(uint64_t address)
719 {
720     if (address >= MEMZONE_PRIME_START)
721         return MEMZONE_PRIME;
722     else if (address >= MEMZONE_DEVICE_START)
723         return MEMZONE_DEVICE;
724     else
725         return MEMZONE_SYS;
726 }
727 
728 static void
__mos_bo_vma_free_xe(struct mos_bufmgr * bufmgr,uint64_t address,uint64_t size)729 __mos_bo_vma_free_xe(struct mos_bufmgr *bufmgr,
730          uint64_t address,
731          uint64_t size)
732 {
733     CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", );
734     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
735 
736     CHK_CONDITION(0ull == address, "invalid address.\n", );
737     enum mos_memory_zone memzone = __mos_bo_memzone_for_address_xe(address);
738     mos_vma_heap_free(&bufmgr_gem->vma_heap[memzone], address, size);
739 }
740 
741 static void
__mos_bo_mark_mmaps_incoherent_xe(struct mos_linux_bo * bo)742 __mos_bo_mark_mmaps_incoherent_xe(struct mos_linux_bo *bo)
743 {
744 #if HAVE_VALGRIND
745     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
746 
747     if (bo_gem->mem_virtual)
748         VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
749 #endif
750 }
751 
752 static inline void
mos_bo_reference_xe(struct mos_linux_bo * bo)753 mos_bo_reference_xe(struct mos_linux_bo *bo)
754 {
755     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
756 
757     atomic_inc(&bo_gem->ref_count);
758 }
759 
mos_bo_unreference_xe(struct mos_linux_bo * bo)760 drm_export void mos_bo_unreference_xe(struct mos_linux_bo *bo)
761 {
762     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
763 
764     if (atomic_read(&bo_gem->ref_count) <= 0)
765         return;
766 
767     if (atomic_dec_and_test(&bo_gem->ref_count))
768     {
769         /* release memory associated with this object */
770         /* Clear any left-over mappings */
771         if (atomic_read(&bo_gem->map_count) > 0)
772         {
773             atomic_set(&bo_gem->map_count, 0);
774             __mos_bo_mark_mmaps_incoherent_xe(bo);
775         }
776 
777         DRMLISTDEL(&bo_gem->name_list);
778 
779         mos_bo_free_xe(bo);
780     }
781 }
782 
783 static uint32_t
__mos_vm_create_xe(struct mos_bufmgr * bufmgr)784 __mos_vm_create_xe(struct mos_bufmgr *bufmgr)
785 {
786     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
787     struct drm_xe_vm_create vm;
788     int ret;
789 
790     memclear(vm);
791     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_CREATE, &vm);
792     if (ret != 0)
793     {
794         MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_CREATE failed: %s",
795             strerror(errno));
796         return INVALID_VM;
797     }
798 
799     return vm.vm_id;
800 }
801 
802 static void
__mos_vm_destroy_xe(struct mos_bufmgr * bufmgr,uint32_t vm_id)803 __mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
804 {
805     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
806     struct drm_xe_vm_destroy vm_destroy;
807     int ret;
808 
809     if (INVALID_VM == vm_id)
810     {
811         MOS_DRM_ASSERTMESSAGE("invalid vm_id");
812         return;
813     }
814 
815     memclear(vm_destroy);
816     vm_destroy.vm_id = vm_id;
817     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_VM_DESTROY, &vm_destroy);
818     if (ret != 0)
819     {
820         MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_VM_DESTROY failed: %s",
821             strerror(errno));
822     }
823 }
824 
825 
826 static uint32_t
mos_vm_create_xe(struct mos_bufmgr * bufmgr)827 mos_vm_create_xe(struct mos_bufmgr *bufmgr)
828 {
829     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
830 
831     if (bufmgr_gem->vm_id != INVALID_VM)
832     {
833         return bufmgr_gem->vm_id;
834     }
835     else
836     {
837         return __mos_vm_create_xe(bufmgr);
838     }
839 }
840 
841 static void
mos_vm_destroy_xe(struct mos_bufmgr * bufmgr,uint32_t vm_id)842 mos_vm_destroy_xe(struct mos_bufmgr *bufmgr, uint32_t vm_id)
843 {
844     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
845 
846     if (vm_id != bufmgr_gem->vm_id)
847     {
848         __mos_vm_destroy_xe(bufmgr, vm_id);
849     }
850 }
851 
852 static struct mos_linux_context *
mos_context_create_shared_xe(struct mos_bufmgr * bufmgr,mos_linux_context * ctx,__u32 flags,bool bContextProtected,void * engine_map,uint8_t ctx_width,uint8_t num_placements,uint32_t ctx_type)853 mos_context_create_shared_xe(
854                             struct mos_bufmgr *bufmgr,
855                             mos_linux_context* ctx,
856                             __u32 flags,
857                             bool bContextProtected,
858                             void *engine_map,
859                             uint8_t ctx_width,
860                             uint8_t num_placements,
861                             uint32_t ctx_type)
862 {
863     MOS_UNUSED(ctx);
864     MOS_UNUSED(ctx_type);
865     MOS_UNUSED(bContextProtected);
866 
867     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, nullptr)
868     MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, nullptr)
869 
870     static uint32_t dummy_exec_queue_id = 0;
871     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
872     struct mos_xe_context *context = nullptr;
873     struct drm_xe_exec_queue_create create;
874     int ret;
875     uint16_t engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
876 
877     memclear(create);
878     create.width = ctx_width;
879     create.num_placements = num_placements;
880     create.vm_id = bufmgr_gem->vm_id;
881     create.flags = flags;
882     create.instances = (uintptr_t)engine_map;
883 
884     /**
885      * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
886      * contains std::vector and std::queue. Otherwise both will have no instance.
887      */
888     context = MOS_New(mos_xe_context);
889     MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
890 
891     /**
892      * Set exec_queue timeslice for render/ compute only as WA to ensure exec sequence.
893      * Note, this is caused by a potential issue in kmd since exec_queue preemption by plenty of WL w/ same priority.
894      */
895     if ((engine_class == DRM_XE_ENGINE_CLASS_RENDER
896                 || engine_class == DRM_XE_ENGINE_CLASS_COMPUTE)
897                 && (ctx_width * num_placements == 1)
898                 && bufmgr_gem->exec_queue_timeslice != EXEC_QUEUE_TIMESLICE_DEFAULT)
899     {
900         struct drm_xe_ext_set_property timeslice;
901         memclear(timeslice);
902         timeslice.property = DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE;
903         /**
904          * Note, this value indicates to maximum of time slice for WL instead of real waiting time.
905          */
906         timeslice.value = bufmgr_gem->exec_queue_timeslice;
907         timeslice.base.name = DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY;
908         create.extensions = (uintptr_t)(&timeslice);
909         MOS_DRM_NORMALMESSAGE("WA: exec_queue timeslice set by engine class(%d), value(%d)",
910                     engine_class, bufmgr_gem->exec_queue_timeslice);
911     }
912 
913     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
914 
915     MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, context, MOS_Delete, nullptr,
916                 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE, return error(%d)", ret);
917 
918     context->ctx.ctx_id = create.exec_queue_id;
919     context->ctx_width = ctx_width;
920     context->num_placements = num_placements;
921     context->engine_class = ((struct drm_xe_engine_class_instance *)engine_map)[0].engine_class;
922     context->is_protected = bContextProtected;
923     context->flags = flags;
924     context->ctx.bufmgr = bufmgr;
925     context->ctx.vm_id = bufmgr_gem->vm_id;
926     context->reset_count = 0;
927     context->timeline_dep = nullptr;
928 
929     bufmgr_gem->m_lock.lock();
930     context->dummy_exec_queue_id = ++dummy_exec_queue_id;
931     bufmgr_gem->global_ctx_info[context->dummy_exec_queue_id] = context;
932     bufmgr_gem->m_lock.unlock();
933     return &context->ctx;
934 }
935 
936 static struct mos_linux_context *
mos_context_create_xe(struct mos_bufmgr * bufmgr)937 mos_context_create_xe(struct mos_bufmgr *bufmgr)
938 {
939     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
940     struct mos_xe_context *context = nullptr;
941 
942     /**
943      * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_context
944      * contains std::queue. Otherwise queue will have no instance.
945      */
946     context = MOS_New(mos_xe_context);
947     MOS_DRM_CHK_NULL_RETURN_VALUE(context, nullptr)
948 
949     context->ctx.ctx_id = INVALID_EXEC_QUEUE_ID;
950     context->ctx_width = 0;
951     context->ctx.bufmgr = bufmgr;
952     context->ctx.vm_id = bufmgr_gem->vm_id;
953     context->reset_count = 0;
954     context->timeline_dep = nullptr;
955     context->dummy_exec_queue_id = INVALID_EXEC_QUEUE_ID;
956     return &context->ctx;
957 }
958 
959 static struct mos_linux_context *
mos_context_create_ext_xe(struct mos_bufmgr * bufmgr,__u32 flags,bool bContextProtected)960 mos_context_create_ext_xe(
961                             struct mos_bufmgr *bufmgr,
962                             __u32 flags,
963                             bool bContextProtected)
964 {
965     MOS_UNUSED(flags);
966     MOS_UNUSED(bContextProtected);
967 
968     return mos_context_create_xe(bufmgr);
969 }
970 
971 static void
mos_context_destroy_xe(struct mos_linux_context * ctx)972 mos_context_destroy_xe(struct mos_linux_context *ctx)
973 {
974     if (nullptr == ctx)
975     {
976         return;
977     }
978 
979     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)(ctx->bufmgr);
980     if (nullptr == bufmgr_gem)
981     {
982         return;
983     }
984     struct mos_xe_context *context = (struct mos_xe_context *)ctx;
985     struct drm_xe_exec_queue_destroy exec_queue_destroy;
986     int ret;
987     bufmgr_gem->m_lock.lock();
988     bufmgr_gem->sync_obj_rw_lock.lock();
989     mos_sync_destroy_timeline_dep(bufmgr_gem->fd, context->timeline_dep);
990     context->timeline_dep = nullptr;
991     bufmgr_gem->global_ctx_info.erase(context->dummy_exec_queue_id);
992     bufmgr_gem->sync_obj_rw_lock.unlock();
993     bufmgr_gem->m_lock.unlock();
994 
995     if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
996     {
997         MOS_Delete(context);
998         return;
999     }
1000 
1001     memclear(exec_queue_destroy);
1002     exec_queue_destroy.exec_queue_id = ctx->ctx_id;
1003 
1004     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
1005     if (ret != 0)
1006         MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_XE_EXEC_QUEUE_DESTROY failed: %s", strerror(errno));
1007 
1008     MOS_Delete(context);
1009 }
1010 
1011 /**
1012  * Restore banned exec_queue with newly created one
1013  * Note: this call is only for banned context restore, if using it
1014  * as other purpose, MUST pay attention to context->reset_count here.
1015  */
1016 static int
__mos_context_restore_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx)1017 __mos_context_restore_xe(struct mos_bufmgr *bufmgr,
1018             struct mos_linux_context *ctx)
1019 {
1020     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
1021     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
1022     if (INVALID_EXEC_QUEUE_ID == ctx->ctx_id)
1023     {
1024         MOS_DRM_ASSERTMESSAGE("Unable to restore intel context, it is not supported");
1025         return -EINVAL;
1026     }
1027     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
1028     struct mos_xe_context *context = (struct mos_xe_context *)ctx;
1029     int ret;
1030 
1031     //query engine firstly
1032     uint32_t nengine = 0;
1033     ret = mos_query_engines_count_xe(bufmgr, &nengine);
1034     MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1035                 "query engine count of restore failed, return error(%d)", ret)
1036     struct drm_xe_engine_class_instance engine_map[nengine];
1037     ret = mos_query_engines_xe(bufmgr,
1038                 context->engine_class,
1039                 context->engine_caps,
1040                 &nengine,
1041                 (void*)engine_map);
1042     MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1043                 "query engine of restore failed, return error(%d)", ret)
1044 
1045     //create new exec queue
1046     struct drm_xe_exec_queue_create create;
1047     memclear(create);
1048     create.width = context->ctx_width;
1049     create.num_placements = context->num_placements;
1050     create.vm_id = context->ctx.vm_id;
1051     create.flags = context->flags;
1052     create.instances = (uintptr_t)engine_map;
1053     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_CREATE, &create);
1054     MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1055                 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_CREATE of restore, return error(%d)", ret)
1056 
1057     //destroy old exec_queue
1058     struct drm_xe_exec_queue_destroy exec_queue_destroy;
1059     memclear(exec_queue_destroy);
1060     exec_queue_destroy.exec_queue_id = ctx->ctx_id;
1061     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_DESTROY, &exec_queue_destroy);
1062     MOS_DRM_CHK_STATUS_MESSAGE_RETURN(ret,
1063                 "ioctl failed in DRM_IOCTL_XE_EXEC_QUEUE_DESTROY of restore, return error(%d)", ret)
1064 
1065     //restore
1066     context->ctx.ctx_id = create.exec_queue_id;
1067     context->reset_count += 1;
1068 
1069     return MOS_XE_SUCCESS;
1070 }
1071 
1072 /**
1073  * Get the property of the ctx
1074  *
1075  * @ctx indicates to the context that to query
1076  * @property indicates to what property that to query
1077  * @value indicates to quired value with given property
1078  */
1079 static int
__mos_get_context_property_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx,uint32_t property,uint64_t & value)1080 __mos_get_context_property_xe(struct mos_bufmgr *bufmgr,
1081             struct mos_linux_context *ctx,
1082             uint32_t property,
1083             uint64_t &value)
1084 {
1085     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
1086     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
1087     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
1088     struct drm_xe_exec_queue_get_property p;
1089     memclear(p);
1090     p.property = property;
1091     p.exec_queue_id = ctx->ctx_id;
1092 
1093     int ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY, &p);
1094 
1095     value = p.value;
1096     return ret;
1097 }
1098 
1099 /**
1100  * Allocate a section of virtual memory for a buffer, assigning an address.
1101  */
1102 static uint64_t
__mos_bo_vma_alloc_xe(struct mos_bufmgr * bufmgr,enum mos_memory_zone memzone,uint64_t size,uint64_t alignment)1103 __mos_bo_vma_alloc_xe(struct mos_bufmgr *bufmgr,
1104           enum mos_memory_zone memzone,
1105           uint64_t size,
1106           uint64_t alignment)
1107 {
1108     CHK_CONDITION(nullptr == bufmgr, "nullptr bufmgr.\n", 0);
1109     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1110     /* Force alignment to be some number of pages */
1111     alignment = ALIGN(alignment, PAGE_SIZE);
1112 
1113     uint64_t addr = mos_vma_heap_alloc(&bufmgr_gem->vma_heap[memzone], size, alignment);
1114 
1115     // currently only support 48bit range address
1116     CHK_CONDITION((addr >> 48ull) != 0, "invalid address, over 48bit range.\n", 0);
1117     CHK_CONDITION((addr >> (MEMZONE_SYS == memzone ? 40ull : (MEMZONE_DEVICE == memzone  ? 41ull:42ull))) != 0, "invalid address, over memory zone range.\n", 0);
1118     CHK_CONDITION((addr % alignment) != 0, "invalid address, not meet aligment requirement.\n", 0);
1119 
1120     return addr;
1121 }
1122 
1123 static int
__mos_bo_set_offset_xe(MOS_LINUX_BO * bo)1124 __mos_bo_set_offset_xe(MOS_LINUX_BO *bo)
1125 {
1126     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1127     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1128     MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, -EINVAL)
1129     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1130 
1131     uint64_t offset = 0;
1132     uint64_t alignment = 0;
1133 
1134     if (0 == bo->offset64)
1135     {
1136         bufmgr_gem->m_lock.lock();
1137 
1138         /* On platforms where lmem only supports 64K pages, kmd requires us
1139          * to either align the va to 2M or seperate the lmem objects and smem
1140          * objects into different va zones to avoid mixing up lmem object and
1141          * smem object into same page table. For imported object, we don't know
1142          * if it's in lmem or smem. So, we need to align the va to 2M.
1143          */
1144         if (MEMZONE_PRIME == bo_gem->mem_region)
1145         {
1146             offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_2M);
1147         }
1148         else if (MEMZONE_DEVICE == bo_gem->mem_region)
1149         {
1150             alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM], PAGE_SIZE_64K);
1151             offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
1152         }
1153         else if (MEMZONE_SYS == bo_gem->mem_region)
1154         {
1155             alignment = MAX(bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM], PAGE_SIZE_64K);
1156             offset = __mos_bo_vma_alloc_xe(bo->bufmgr, (enum mos_memory_zone)bo_gem->mem_region, bo->size, PAGE_SIZE_64K);
1157         }
1158         else
1159         {
1160             MOS_DRM_ASSERTMESSAGE("Invalid mem_region:%d", bo_gem->mem_region);
1161         }
1162 
1163         bo->offset64 = offset;
1164         bo->offset = offset;
1165 
1166         bufmgr_gem->m_lock.unlock();
1167     }
1168 
1169     return 0;
1170 }
1171 
__mos_vm_bind_xe(int fd,uint32_t vm_id,uint32_t exec_queue_id,uint32_t bo_handle,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op,uint32_t flags,struct drm_xe_sync * sync,uint32_t num_syncs,uint64_t ext)1172 static int __mos_vm_bind_xe(int fd, uint32_t vm_id, uint32_t exec_queue_id, uint32_t bo_handle,
1173           uint64_t offset, uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op, uint32_t flags,
1174           struct drm_xe_sync *sync, uint32_t num_syncs, uint64_t ext)
1175 {
1176     int ret;
1177 
1178     struct drm_xe_vm_bind bind;
1179     memclear(bind);
1180     bind.extensions = ext;
1181     bind.vm_id = vm_id;
1182     bind.exec_queue_id = exec_queue_id;
1183     bind.num_binds = 1;
1184     bind.bind.obj = bo_handle;
1185     bind.bind.obj_offset = offset;
1186     bind.bind.range = size;
1187     bind.bind.pat_index = pat_index;
1188     bind.bind.addr = addr;
1189     bind.bind.op = op;
1190     bind.bind.flags = flags;
1191     bind.num_syncs = num_syncs;
1192     bind.syncs = (uintptr_t)sync;
1193 
1194     ret = drmIoctl(fd, DRM_IOCTL_XE_VM_BIND, &bind);
1195     if (ret)
1196     {
1197         MOS_DRM_ASSERTMESSAGE("Failed to bind vm, vm_id:%d, exec_queue_id:%d, op:0x%x, flags:0x%x, bo_handle:%d, offset:%lx, addr:0x%lx, size:%ld, pat_index:%d, errno(%d)",
1198             vm_id, exec_queue_id, op, flags, bo_handle, offset, addr, size, pat_index, -errno);
1199     }
1200 
1201     return ret;
1202 }
1203 
mos_vm_bind_sync_xe(int fd,uint32_t vm_id,uint32_t bo,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op)1204 static int mos_vm_bind_sync_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
1205         uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op)
1206 {
1207     struct drm_xe_sync sync;
1208 
1209     memclear(sync);
1210     sync.flags = DRM_XE_SYNC_FLAG_SIGNAL;
1211     sync.type = DRM_XE_SYNC_TYPE_SYNCOBJ;
1212     sync.handle = mos_sync_syncobj_create(fd, 0);
1213 
1214     int ret = __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
1215                 op, 0, &sync, 1,  0);
1216     if (ret)
1217     {
1218         MOS_DRM_ASSERTMESSAGE("ret:%d, error:%d", ret, -errno);
1219         mos_sync_syncobj_destroy(fd, sync.handle);
1220         return ret;
1221     }
1222 
1223     ret = mos_sync_syncobj_wait_err(fd, &sync.handle, 1, INT64_MAX, 0, NULL);
1224     if (ret)
1225     {
1226         MOS_DRM_ASSERTMESSAGE("syncobj_wait error:%d", -errno);
1227     }
1228 
1229     mos_sync_syncobj_destroy(fd, sync.handle);
1230 
1231     return ret;
1232 }
1233 
mos_vm_bind_async_xe(int fd,uint32_t vm_id,uint32_t bo,uint64_t offset,uint64_t addr,uint64_t size,uint16_t pat_index,uint32_t op,struct drm_xe_sync * sync,uint32_t num_syncs)1234 static int mos_vm_bind_async_xe(int fd, uint32_t vm_id, uint32_t bo, uint64_t offset,
1235         uint64_t addr, uint64_t size, uint16_t pat_index, uint32_t op,
1236         struct drm_xe_sync *sync, uint32_t num_syncs)
1237 {
1238     return __mos_vm_bind_xe(fd, vm_id, 0, bo, offset, addr, size, pat_index,
1239                 op, 0, sync, num_syncs,    0);
1240 }
1241 
1242 drm_export struct mos_linux_bo *
mos_bo_alloc_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc * alloc)1243 mos_bo_alloc_xe(struct mos_bufmgr *bufmgr,
1244                struct mos_drm_bo_alloc *alloc)
1245 {
1246     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1247     struct mos_xe_bo_gem *bo_gem;
1248     struct drm_xe_gem_create create;
1249     uint32_t bo_align = alloc->alignment;
1250     int ret;
1251 
1252     /**
1253      * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
1254      * contains std::vector and std::map. Otherwise both will have no instance.
1255      */
1256     bo_gem = MOS_New(mos_xe_bo_gem);
1257     MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
1258     memclear(bo_gem->bo);
1259     bo_gem->is_exported = false;
1260     bo_gem->is_imported = false;
1261     bo_gem->is_userptr = false;
1262     bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1263     bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1264     atomic_set(&bo_gem->map_count, 0);
1265     bo_gem->mem_virtual = nullptr;
1266     bo_gem->mem_region = MEMZONE_SYS;
1267     bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM]);
1268 
1269     if (bufmgr_gem->has_vram &&
1270             (MOS_MEMPOOL_VIDEOMEMORY == alloc->ext.mem_type || MOS_MEMPOOL_DEVICEMEMORY == alloc->ext.mem_type))
1271     {
1272         bo_gem->mem_region = MEMZONE_DEVICE;
1273         bo_align = MAX(alloc->alignment, bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM]);
1274         alloc->ext.cpu_cacheable = false;
1275     }
1276 
1277     memclear(create);
1278     if (MEMZONE_DEVICE == bo_gem->mem_region)
1279     {
1280         //Note: memory_region is related to gt_id for multi-tiles gpu, take gt_id into consideration in case of multi-tiles
1281         create.placement = bufmgr_gem->mem_regions_mask & (~0x1);
1282     }
1283     else
1284     {
1285         create.placement = bufmgr_gem->mem_regions_mask & 0x1;
1286     }
1287 
1288     //Note: We suggest vm_id=0 here as default, otherwise this bo cannot be exported as prelim fd.
1289     create.vm_id = 0;
1290     create.size = ALIGN(alloc->size, bo_align);
1291 
1292     /**
1293      * Note: current, it only supports WB/ WC while UC and other cache are not allowed.
1294      */
1295     create.cpu_caching = alloc->ext.cpu_cacheable ? DRM_XE_GEM_CPU_CACHING_WB : DRM_XE_GEM_CPU_CACHING_WC;
1296 
1297     ret = drmIoctl(bufmgr_gem->fd,
1298         DRM_IOCTL_XE_GEM_CREATE,
1299         &create);
1300     MOS_DRM_CHK_STATUS_MESSAGE_RETURN_VALUE_WH_OP(ret, bo_gem, MOS_Delete, nullptr,
1301                 "ioctl failed in DRM_IOCTL_XE_GEM_CREATE, return error(%d)", ret);
1302 
1303     bo_gem->gem_handle = create.handle;
1304     bo_gem->bo.handle = bo_gem->gem_handle;
1305     bo_gem->bo.size    = create.size;
1306     bo_gem->bo.vm_id = INVALID_VM;
1307     bo_gem->bo.bufmgr = bufmgr;
1308     bo_gem->bo.align = bo_align;
1309     bo_gem->cpu_caching = create.cpu_caching;
1310     /**
1311      * Note: Better to get a default pat_index to overwite invalid argv. Normally it should not happen.
1312      */
1313     bo_gem->pat_index = alloc->ext.pat_index == PAT_INDEX_INVALID ? 0 : alloc->ext.pat_index;
1314 
1315     if (bufmgr_gem->mem_profiler_fd != -1)
1316     {
1317         snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CREATE, %d, %d, %lu, %d, %s\n",
1318                     getpid(), bo_gem->bo.handle, bo_gem->bo.size,bo_gem->mem_region, alloc->name);
1319         ret = write(bufmgr_gem->mem_profiler_fd,
1320                     bufmgr_gem->mem_profiler_buffer,
1321                     strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
1322         if (-1 == ret)
1323         {
1324             MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s",
1325                         bufmgr_gem->mem_profiler_path, strerror(errno));
1326         }
1327     }
1328 
1329     /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
1330        list (vma_list), so better set the list head here */
1331     DRMINITLISTHEAD(&bo_gem->name_list);
1332 
1333     memcpy(bo_gem->name, alloc->name, (strlen(alloc->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc->name) + 1));
1334     atomic_set(&bo_gem->ref_count, 1);
1335 
1336     MOS_DRM_NORMALMESSAGE("buf %d (%s) %ldb, bo:0x%lx",
1337         bo_gem->gem_handle, alloc->name, alloc->size, (uint64_t)&bo_gem->bo);
1338 
1339     __mos_bo_set_offset_xe(&bo_gem->bo);
1340 
1341     ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1342                     bufmgr_gem->vm_id,
1343                     bo_gem->gem_handle,
1344                     0,
1345                     bo_gem->bo.offset64,
1346                     bo_gem->bo.size,
1347                     bo_gem->pat_index,
1348                     DRM_XE_VM_BIND_OP_MAP);
1349     if (ret)
1350     {
1351         MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
1352         mos_bo_free_xe(&bo_gem->bo);
1353         return nullptr;
1354     }
1355     else
1356     {
1357         bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1358     }
1359 
1360     return &bo_gem->bo;
1361 }
1362 
1363 static unsigned long
__mos_bo_tile_size_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,unsigned long size,uint32_t * tiling_mode,uint32_t alignment)1364 __mos_bo_tile_size_xe(struct mos_xe_bufmgr_gem *bufmgr_gem, unsigned long size,
1365                uint32_t *tiling_mode, uint32_t alignment)
1366 {
1367     unsigned long min_size, max_size;
1368     unsigned long i;
1369 
1370     if (TILING_NONE == *tiling_mode)
1371         return size;
1372 
1373     /* 965+ just need multiples of page size for tiling */
1374     return ROUND_UP_TO(size, alignment);
1375 
1376 }
1377 
1378 /*
1379  * Round a given pitch up to the minimum required for X tiling on a
1380  * given chip.  We use 512 as the minimum to allow for a later tiling
1381  * change.
1382  */
1383 static unsigned long
__mos_bo_tile_pitch_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,unsigned long pitch,uint32_t * tiling_mode)1384 __mos_bo_tile_pitch_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
1385                 unsigned long pitch, uint32_t *tiling_mode)
1386 {
1387     unsigned long tile_width;
1388     unsigned long i;
1389 
1390     /* If untiled, then just align it so that we can do rendering
1391      * to it with the 3D engine.
1392      */
1393     if (TILING_NONE == *tiling_mode)
1394         return ALIGN(pitch, 64);
1395 
1396     if (TILING_X == *tiling_mode)
1397         tile_width = 512;
1398     else
1399         tile_width = 128;
1400 
1401     /* 965 is flexible */
1402     return ROUND_UP_TO(pitch, tile_width);
1403 }
1404 
1405 static struct mos_linux_bo *
mos_bo_alloc_tiled_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_tiled * alloc_tiled)1406 mos_bo_alloc_tiled_xe(struct mos_bufmgr *bufmgr,
1407                  struct mos_drm_bo_alloc_tiled *alloc_tiled)
1408 {
1409     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1410     unsigned long size, stride;
1411     uint32_t tiling;
1412 
1413     uint32_t alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_SYSMEM];
1414 
1415     if (bufmgr_gem->has_vram &&
1416        (MOS_MEMPOOL_VIDEOMEMORY == alloc_tiled->ext.mem_type   || MOS_MEMPOOL_DEVICEMEMORY == alloc_tiled->ext.mem_type))
1417     {
1418         alignment = bufmgr_gem->default_alignment[MOS_XE_MEM_CLASS_VRAM];
1419     }
1420 
1421     do {
1422         unsigned long aligned_y, height_alignment;
1423 
1424         tiling = alloc_tiled->ext.tiling_mode;
1425 
1426         /* If we're tiled, our allocations are in 8 or 32-row blocks,
1427          * so failure to align our height means that we won't allocate
1428          * enough pages.
1429          *
1430          * If we're untiled, we still have to align to 2 rows high
1431          * because the data port accesses 2x2 blocks even if the
1432          * bottom row isn't to be rendered, so failure to align means
1433          * we could walk off the end of the GTT and fault.  This is
1434          * documented on 965, and may be the case on older chipsets
1435          * too so we try to be careful.
1436          */
1437         aligned_y = alloc_tiled->y;
1438         height_alignment = 2;
1439 
1440         if (TILING_X == tiling)
1441             height_alignment = 8;
1442         else if (TILING_Y == tiling)
1443             height_alignment = 32;
1444         aligned_y = ALIGN(alloc_tiled->y, height_alignment);
1445 
1446         stride = alloc_tiled->x * alloc_tiled->cpp;
1447         stride = __mos_bo_tile_pitch_xe(bufmgr_gem, stride, &alloc_tiled->ext.tiling_mode);
1448         size = stride * aligned_y;
1449         size = __mos_bo_tile_size_xe(bufmgr_gem, size, &alloc_tiled->ext.tiling_mode, alignment);
1450     } while (alloc_tiled->ext.tiling_mode != tiling);
1451 
1452     alloc_tiled->pitch = stride;
1453 
1454     struct mos_drm_bo_alloc alloc;
1455     alloc.name = alloc_tiled->name;
1456     alloc.size = size;
1457     alloc.alignment = alignment;
1458     alloc.ext = alloc_tiled->ext;
1459 
1460     return mos_bo_alloc_xe(bufmgr, &alloc);
1461 }
1462 
1463 drm_export struct mos_linux_bo *
mos_bo_alloc_userptr_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_userptr * alloc_uptr)1464 mos_bo_alloc_userptr_xe(struct mos_bufmgr *bufmgr,
1465                 struct mos_drm_bo_alloc_userptr *alloc_uptr)
1466 {
1467     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1468     struct mos_xe_bo_gem *bo_gem;
1469     int ret;
1470 
1471     /**
1472      * Note: must use MOS_New to allocate buffer instead of malloc since mos_xe_bo_gem
1473      * contains std::vector and std::map. Otherwise both will have no instance.
1474      */
1475     bo_gem = MOS_New(mos_xe_bo_gem);
1476     MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, nullptr)
1477     memclear(bo_gem->bo);
1478     bo_gem->is_exported = false;
1479     bo_gem->is_imported = false;
1480     bo_gem->is_userptr = true;
1481     bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1482     bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1483     atomic_set(&bo_gem->map_count, 0);
1484     bo_gem->mem_virtual = alloc_uptr->addr;
1485     bo_gem->gem_handle = INVALID_HANDLE;
1486     bo_gem->bo.handle = INVALID_HANDLE;
1487     bo_gem->bo.size    = alloc_uptr->size;
1488     bo_gem->pat_index = alloc_uptr->pat_index == PAT_INDEX_INVALID ? 0 : alloc_uptr->pat_index;
1489     bo_gem->bo.bufmgr = bufmgr;
1490     bo_gem->bo.vm_id = INVALID_VM;
1491     bo_gem->mem_region = MEMZONE_SYS;
1492 
1493     /* Save the address provided by user */
1494 #ifdef __cplusplus
1495     bo_gem->bo.virt   = alloc_uptr->addr;
1496 #else
1497     bo_gem->bo.virtual   = alloc_uptr->addr;
1498 #endif
1499 
1500     /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
1501        list (vma_list), so better set the list head here */
1502     DRMINITLISTHEAD(&bo_gem->name_list);
1503 
1504     memcpy(bo_gem->name, alloc_uptr->name, (strlen(alloc_uptr->name) + 1) > MAX_NAME_SIZE ? MAX_NAME_SIZE : (strlen(alloc_uptr->name) + 1));
1505     atomic_set(&bo_gem->ref_count, 1);
1506 
1507     __mos_bo_set_offset_xe(&bo_gem->bo);
1508 
1509     ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1510                 bufmgr_gem->vm_id,
1511                 0,
1512                 (uint64_t)alloc_uptr->addr,
1513                 bo_gem->bo.offset64,
1514                 bo_gem->bo.size,
1515                 bo_gem->pat_index,
1516                 DRM_XE_VM_BIND_OP_MAP_USERPTR);
1517 
1518     if (ret)
1519     {
1520         MOS_DRM_ASSERTMESSAGE("mos_xe_vm_bind_userptr_sync ret: %d", ret);
1521         mos_bo_free_xe(&bo_gem->bo);
1522         return nullptr;
1523     }
1524     else
1525     {
1526         bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1527     }
1528 
1529     MOS_DRM_NORMALMESSAGE("mos_bo_alloc_userptr_xe: buf (%s) %ldb, bo:0x%lx",
1530         alloc_uptr->name, alloc_uptr->size, (uint64_t)&bo_gem->bo);
1531 
1532 
1533     return &bo_gem->bo;
1534 }
1535 
1536 static struct mos_linux_bo *
mos_bo_create_from_prime_xe(struct mos_bufmgr * bufmgr,struct mos_drm_bo_alloc_prime * alloc_prime)1537 mos_bo_create_from_prime_xe(struct mos_bufmgr *bufmgr, struct mos_drm_bo_alloc_prime *alloc_prime)
1538 {
1539     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
1540     int ret;
1541     uint32_t handle;
1542     struct mos_xe_bo_gem *bo_gem;
1543     int prime_fd = alloc_prime->prime_fd;
1544     int size = alloc_prime->size;
1545     uint16_t pat_index = alloc_prime->pat_index;
1546     drmMMListHead *list;
1547 
1548     bufmgr_gem->m_lock.lock();
1549     ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
1550     if (ret)
1551     {
1552         MOS_DRM_ASSERTMESSAGE("create_from_prime: failed to obtain handle from fd: %s", strerror(errno));
1553         bufmgr_gem->m_lock.unlock();
1554         return nullptr;
1555     }
1556 
1557     /*
1558      * See if the kernel has already returned this buffer to us. Just as
1559      * for named buffers, we must not create two bo's pointing at the same
1560      * kernel object
1561      */
1562     for (list = bufmgr_gem->named.next; list != &bufmgr_gem->named; list = list->next)
1563     {
1564         bo_gem = DRMLISTENTRY(struct mos_xe_bo_gem, list, name_list);
1565         if (bo_gem->gem_handle == handle)
1566         {
1567             mos_bo_reference_xe(&bo_gem->bo);
1568             bufmgr_gem->m_lock.unlock();
1569             return &bo_gem->bo;
1570         }
1571     }
1572 
1573     bo_gem = MOS_New(mos_xe_bo_gem);
1574     if (!bo_gem)
1575     {
1576         bufmgr_gem->m_lock.unlock();
1577         return nullptr;
1578     }
1579 
1580     memclear(bo_gem->bo);
1581     bo_gem->is_exported = false;
1582     bo_gem->is_imported = true;
1583     bo_gem->is_userptr = false;
1584     bo_gem->last_exec_read_exec_queue = INVALID_EXEC_QUEUE_ID;
1585     bo_gem->last_exec_write_exec_queue = INVALID_EXEC_QUEUE_ID;
1586     atomic_set(&bo_gem->map_count, 0);
1587     bo_gem->mem_virtual = nullptr;
1588 
1589     /* Determine size of bo.  The fd-to-handle ioctl really should
1590      * return the size, but it doesn't.  If we have kernel 3.12 or
1591      * later, we can lseek on the prime fd to get the size.  Older
1592      * kernels will just fail, in which case we fall back to the
1593      * provided (estimated or guess size). */
1594     ret = lseek(prime_fd, 0, SEEK_END);
1595     if (ret != -1)
1596         bo_gem->bo.size = ret;
1597     else
1598         bo_gem->bo.size = size;
1599 
1600     bo_gem->bo.handle = handle;
1601     /*
1602      * Note: Need to get the pat_index by the customer_gmminfo with 1way coherency at least.
1603      */
1604     bo_gem->pat_index = pat_index == PAT_INDEX_INVALID ? 0 : pat_index;
1605     bo_gem->bo.bufmgr = bufmgr;
1606 
1607     bo_gem->gem_handle = handle;
1608     atomic_set(&bo_gem->ref_count, 1);
1609 
1610     /**
1611      * change bo_gem->name to const char*
1612      */
1613     memcpy(bo_gem->name, alloc_prime->name, sizeof("prime"));
1614     bo_gem->mem_region = MEMZONE_PRIME;
1615 
1616     DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1617     bufmgr_gem->m_lock.unlock();
1618 
1619     __mos_bo_set_offset_xe(&bo_gem->bo);
1620 
1621     ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
1622                 bufmgr_gem->vm_id,
1623                 bo_gem->gem_handle,
1624                 0,
1625                 bo_gem->bo.offset64,
1626                 bo_gem->bo.size,
1627                 bo_gem->pat_index,
1628                 DRM_XE_VM_BIND_OP_MAP);
1629     if (ret)
1630     {
1631         MOS_DRM_ASSERTMESSAGE("mos_vm_bind_sync_xe ret: %d", ret);
1632         mos_bo_free_xe(&bo_gem->bo);
1633         return nullptr;
1634     }
1635     else
1636     {
1637         bo_gem->bo.vm_id = bufmgr_gem->vm_id;
1638     }
1639 
1640     return &bo_gem->bo;
1641 }
1642 
1643 static int
mos_bo_export_to_prime_xe(struct mos_linux_bo * bo,int * prime_fd)1644 mos_bo_export_to_prime_xe(struct mos_linux_bo *bo, int *prime_fd)
1645 {
1646     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1647     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1648 
1649     bufmgr_gem->m_lock.lock();
1650     if (DRMLISTEMPTY(&bo_gem->name_list))
1651         DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1652     bufmgr_gem->m_lock.unlock();
1653 
1654     mos_gem_bo_wait_rendering_xe(bo);
1655 
1656     if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
1657                    DRM_CLOEXEC, prime_fd) != 0)
1658         return -errno;
1659 
1660     bo_gem->is_exported = true;
1661 
1662     return 0;
1663 }
1664 
1665 /**
1666  * Update exec list for submission.
1667  *
1668  * @cmd_bo indicates to cmd bo for the exec submission.
1669  * @exec_bo indicates to the gpu resource for exec submission.
1670  * @write_flag indicates to whether exec bo's operation write on GPU.
1671  */
1672 static int
mos_gem_bo_update_exec_list_xe(struct mos_linux_bo * cmd_bo,struct mos_linux_bo * exec_bo,bool write_flag)1673 mos_gem_bo_update_exec_list_xe(struct mos_linux_bo *cmd_bo, struct mos_linux_bo *exec_bo, bool write_flag)
1674 {
1675     MOS_DRM_CHK_NULL_RETURN_VALUE(cmd_bo, -EINVAL)
1676     MOS_DRM_CHK_NULL_RETURN_VALUE(exec_bo, -EINVAL)
1677     struct mos_xe_bo_gem *cmd_bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
1678     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
1679     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1680     std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = cmd_bo_gem->exec_list;
1681 
1682     if (exec_bo->handle == cmd_bo->handle)
1683     {
1684         MOS_DRM_NORMALMESSAGE("cmd bo should not add into exec list, skip it");
1685         return MOS_XE_SUCCESS;
1686     }
1687     uintptr_t key = (uintptr_t)exec_bo;
1688     if (exec_list.count(key) > 0)
1689     {
1690         /**
1691          * This exec bo has added before, but need to update its exec flags.
1692          */
1693 
1694         // For all BOs with read and write usages, we could just assign write flag to reduce read deps size.
1695         if (write_flag || (exec_list[key].flags & EXEC_OBJECT_WRITE_XE))
1696         {
1697             exec_list[key].flags = EXEC_OBJECT_WRITE_XE;
1698         }
1699         else
1700         {
1701             // For BOs only with read usage, we should assign read flag.
1702             exec_list[key].flags |= EXEC_OBJECT_READ_XE;
1703         }
1704     }
1705     else
1706     {
1707         struct mos_xe_exec_bo target;
1708         target.bo = exec_bo;
1709         target.flags = write_flag ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
1710         exec_list[key] = target;
1711         mos_bo_reference_xe(exec_bo);
1712     }
1713     return MOS_XE_SUCCESS;
1714 }
1715 
1716 /**
1717  * Clear the exec bo from the list after submission.
1718  *
1719  * @cmd_bo indicates to cmd bo for the exec submission.
1720  * @start is unused.
1721  */
1722 static void
mos_gem_bo_clear_exec_list_xe(struct mos_linux_bo * cmd_bo,int start)1723 mos_gem_bo_clear_exec_list_xe(struct mos_linux_bo *cmd_bo, int start)
1724 {
1725     MOS_UNUSED(start);
1726     if (cmd_bo != nullptr && cmd_bo->bufmgr != nullptr)
1727     {
1728         struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) cmd_bo->bufmgr;
1729         struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) cmd_bo;
1730         std::map<uintptr_t, struct mos_xe_exec_bo> &exec_list = bo_gem->exec_list;
1731 
1732         for (auto &it : exec_list) {
1733             mos_bo_unreference_xe(it.second.bo);
1734         }
1735         exec_list.clear();
1736     }
1737 }
1738 
1739 /**
1740  * This is to dump all pending execution timeline done on such bo
1741  */
1742 int
__mos_dump_bo_wait_rendering_timeline_xe(uint32_t bo_handle,uint32_t * handles,uint64_t * points,uint32_t count,int64_t timeout_nsec,uint32_t wait_flags,uint32_t rw_flags)1743 __mos_dump_bo_wait_rendering_timeline_xe(uint32_t bo_handle,
1744             uint32_t *handles,
1745             uint64_t *points,
1746             uint32_t count,
1747             int64_t timeout_nsec,
1748             uint32_t wait_flags,
1749             uint32_t rw_flags)
1750 {
1751 #if (_DEBUG || _RELEASE_INTERNAL)
1752     if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
1753     {
1754         MOS_DRM_CHK_NULL_RETURN_VALUE(handles, -EINVAL)
1755         char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
1756         int offset = 0;
1757         offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
1758                             MOS_MAX_MSG_BUF_SIZE - offset,
1759                             "\n\t\t\tdump bo wait rendering: bo handle = %d, timeout_nsec = %ld, wait_flags = %d, rw_flags = %d",
1760                             bo_handle,
1761                             timeout_nsec,
1762                             wait_flags,
1763                             rw_flags);
1764 
1765         for (int i = 0; i < count; i++)
1766         {
1767             offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
1768                             MOS_MAX_MSG_BUF_SIZE - offset,
1769                             "\n\t\t\t-syncobj handle = %d, timeline = %ld",
1770                             handles[i],
1771                             points[i]);
1772         }
1773 
1774         offset > MOS_MAX_MSG_BUF_SIZE ?
1775             MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
1776     }
1777 #endif
1778     return MOS_XE_SUCCESS;
1779 }
1780 
1781 /**
1782  * @bo indicates to bo object that need to wait
1783  * @timeout_nsec indicates to timeout in nanosecond:
1784  *     if timeout_nsec > 0, waiting for given time, if timeout, return -ETIME;
1785  *     if timeout_nsec ==0, check bo busy state, if busy, return -ETIME imediately;
1786  * @wait_flags indicates wait operation, it supports wait all, wait submit, wait available or wait any;
1787  *     refer drm syncobj to get more details in drm.h
1788  * @rw_flags indicates to read/write operation:
1789  *     if rw_flags & EXEC_OBJECT_WRITE_XE, means bo write. Otherwise it means bo read.
1790  * @first_signaled indicates to first signaled syncobj handle in the handls array.
1791  */
1792 static int
__mos_gem_bo_wait_timeline_rendering_with_flags_xe(struct mos_linux_bo * bo,int64_t timeout_nsec,uint32_t wait_flags,uint32_t rw_flags,uint32_t * first_signaled)1793 __mos_gem_bo_wait_timeline_rendering_with_flags_xe(struct mos_linux_bo *bo,
1794             int64_t timeout_nsec,
1795             uint32_t wait_flags,
1796             uint32_t rw_flags,
1797             uint32_t *first_signaled)
1798 {
1799     MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
1800 
1801     mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1802     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1803 
1804     int ret = MOS_XE_SUCCESS;
1805     uint32_t count = 0;
1806     mos_xe_bo_gem *bo_gem = (mos_xe_bo_gem *)bo;
1807     std::map<uint32_t, uint64_t> timeline_data; //pair(syncobj, point)
1808     std::vector<uint32_t> handles;
1809     std::vector<uint64_t> points;
1810     std::set<uint32_t> exec_queue_ids;
1811     bufmgr_gem->m_lock.lock();
1812     bufmgr_gem->sync_obj_rw_lock.lock_shared();
1813     MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
1814 
1815     mos_sync_get_bo_wait_timeline_deps(exec_queue_ids,
1816                 bo_gem->read_deps,
1817                 bo_gem->write_deps,
1818                 timeline_data,
1819                 bo_gem->last_exec_write_exec_queue,
1820                 rw_flags);
1821     bufmgr_gem->m_lock.unlock();
1822 
1823     for (auto it : timeline_data)
1824     {
1825         handles.push_back(it.first);
1826         points.push_back(it.second);
1827     }
1828 
1829     count = handles.size();
1830     if (count > 0)
1831     {
1832         ret = mos_sync_syncobj_timeline_wait(bufmgr_gem->fd,
1833                         handles.data(),
1834                         points.data(),
1835                         count,
1836                         timeout_nsec,
1837                         wait_flags,
1838                         first_signaled);
1839 
1840         __mos_dump_bo_wait_rendering_timeline_xe(bo_gem->gem_handle,
1841                         handles.data(),
1842                         points.data(),
1843                         count,
1844                         timeout_nsec,
1845                         wait_flags,
1846                         rw_flags);
1847     }
1848     bufmgr_gem->sync_obj_rw_lock.unlock_shared();
1849 
1850     return ret;
1851 }
1852 
1853 /**
1854  * Check if bo is still busy state.
1855  *
1856  * Check if read dep on all exec_queue and write dep on last write exec_queue are signaled.
1857  * If any one dep is not signaled, that means this bo is busy and return -ETIME immediately.
1858  * Otheriwise, move all dep on this bo from busy queue to free queue for reuse.
1859  */
1860 static int
mos_gem_bo_busy_xe(struct mos_linux_bo * bo)1861 mos_gem_bo_busy_xe(struct mos_linux_bo *bo)
1862 {
1863     MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL);
1864     mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1865     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1866 
1867     int64_t timeout_nsec = 0;
1868     uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1869     uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
1870 
1871     int ret =  __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1872 
1873     if (ret)
1874     {
1875         //busy
1876         if (errno != ETIME)
1877         {
1878             MOS_DRM_ASSERTMESSAGE("bo_busy_xe ret:%d, error:%d", ret, -errno);
1879         }
1880         return true;
1881     }
1882     else if (MOS_XE_SUCCESS == ret)
1883     {
1884         //free
1885         return false;
1886     }
1887 
1888     return false;
1889 }
1890 
1891 /**
1892  * Waits for all GPU rendering with the object to have completed.
1893  *
1894  * Wait read dep on all exec_queue and write dep on last write exec_queue are signaled.
1895  * And move all dep on this bo from busy queue to free queue for reuse after rendering completed.
1896  */
1897 static void
mos_gem_bo_wait_rendering_xe(struct mos_linux_bo * bo)1898 mos_gem_bo_wait_rendering_xe(struct mos_linux_bo *bo)
1899 {
1900     if (bo == nullptr || bo->bufmgr == nullptr)
1901     {
1902         MOS_DRM_ASSERTMESSAGE("ptr is null pointer");
1903         return;
1904     }
1905     mos_xe_bufmgr_gem *bufmgr_gem = (mos_xe_bufmgr_gem *)bo->bufmgr;
1906 
1907     int64_t timeout_nsec = INT64_MAX;
1908     uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1909     uint32_t rw_flags = EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE;
1910 
1911     int ret =  __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1912     if (ret)
1913     {
1914         MOS_DRM_ASSERTMESSAGE("bo_wait_rendering_xe ret:%d, error:%d", ret, -errno);
1915     }
1916 }
1917 
1918 /**
1919  * @timeout_ns indicates to timeout for waiting, but it is fake timeout;
1920  *     it only indicates to wait bo rendering completed or check bo busy state.
1921  *     if timeout_ns != 0, wait bo rendering completed.
1922  *     if timeout_ns == 0. check bo busy state.
1923  */
1924 static int
mos_gem_bo_wait_xe(struct mos_linux_bo * bo,int64_t timeout_ns)1925 mos_gem_bo_wait_xe(struct mos_linux_bo *bo, int64_t timeout_ns)
1926 {
1927     if (timeout_ns)
1928     {
1929         mos_gem_bo_wait_rendering_xe(bo);
1930         return 0;
1931     }
1932     else
1933     {
1934         return mos_gem_bo_busy_xe(bo) ? -ETIME : 0;
1935     }
1936     return 0;
1937 }
1938 
1939 /**
1940  * Map gpu resource for CPU read or write.
1941  *
1942  * 1. if map for write, it should wait read dep on all exec_queue and write dep on last write exec_queue signaled.
1943  * 2. if map for read, it should only wait write dep on last write exec_queue signaled.
1944  *
1945  * After bo rendering completed on GPU, then CPU could continue its read or write operation.
1946  */
1947 static int
mos_bo_map_xe(struct mos_linux_bo * bo,int write_enable)1948 mos_bo_map_xe(struct mos_linux_bo *bo, int write_enable)
1949 {
1950     MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
1951     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
1952     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
1953     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
1954     int ret;
1955 
1956     int64_t timeout_nsec = INT64_MAX;
1957     uint32_t wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
1958     uint32_t rw_flags = write_enable ? EXEC_OBJECT_WRITE_XE : EXEC_OBJECT_READ_XE;
1959 
1960     ret =  __mos_gem_bo_wait_timeline_rendering_with_flags_xe(bo, timeout_nsec, wait_flags, rw_flags, nullptr);
1961     if (ret)
1962     {
1963         MOS_DRM_ASSERTMESSAGE("bo wait rendering error(%d ns)", -errno);
1964     }
1965 
1966     if (bo_gem->is_userptr)
1967     {
1968         /* Return the same user ptr */
1969         return 0;
1970     }
1971 
1972     bufmgr_gem->m_lock.lock();
1973     if (nullptr == bo_gem->mem_virtual)
1974     {
1975         struct drm_xe_gem_mmap_offset mmo;
1976         memclear(mmo);
1977         mmo.handle = bo->handle;
1978         ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_GEM_MMAP_OFFSET, &mmo);
1979         if (ret)
1980         {
1981             bufmgr_gem->m_lock.unlock();
1982             return ret;
1983         }
1984 
1985         bo_gem->mem_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1986             MAP_SHARED, bufmgr_gem->fd, mmo.offset);
1987         if (MAP_FAILED == bo_gem->mem_virtual)
1988         {
1989             bo_gem->mem_virtual = nullptr;
1990             ret = -errno;
1991             MOS_DRM_ASSERTMESSAGE("Error mapping buffer %d (%s): %s .",
1992                 bo_gem->gem_handle, bo_gem->name,
1993                 strerror(errno));
1994         }
1995     }
1996 
1997 #ifdef __cplusplus
1998     bo->virt = bo_gem->mem_virtual;
1999 #else
2000     bo->virtual = bo_gem->mem_virtual;
2001 #endif
2002 
2003     atomic_inc(&bo_gem->map_count);
2004 
2005     __mos_bo_mark_mmaps_incoherent_xe(bo);
2006     VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
2007     bufmgr_gem->m_lock.unlock();
2008 
2009     return 0;
2010 }
2011 
2012 static int
mos_bo_map_wc_xe(struct mos_linux_bo * bo)2013 mos_bo_map_wc_xe(struct mos_linux_bo *bo)
2014 {
2015     return mos_bo_map_xe(bo, false);
2016 }
2017 
mos_bo_unmap_xe(struct mos_linux_bo * bo)2018 static int mos_bo_unmap_xe(struct mos_linux_bo *bo)
2019 {
2020     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
2021     MOS_DRM_CHK_NULL_RETURN_VALUE(bo_gem, 0)
2022     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
2023     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, 0)
2024 
2025     if (bo_gem->is_userptr)
2026         return 0;
2027 
2028     bufmgr_gem->m_lock.lock();
2029 
2030     if (atomic_dec_and_test(&bo_gem->map_count))
2031     {
2032        __mos_bo_mark_mmaps_incoherent_xe(bo);
2033 #ifdef __cplusplus
2034         bo->virt = nullptr;
2035 #else
2036         bo->virtual = nullptr;
2037 #endif
2038     }
2039     bufmgr_gem->m_lock.unlock();
2040 
2041     return 0;
2042 }
2043 
2044 static int
mos_bo_unmap_wc_xe(struct mos_linux_bo * bo)2045 mos_bo_unmap_wc_xe(struct mos_linux_bo *bo)
2046 {
2047     return mos_bo_unmap_xe(bo);
2048 }
2049 
2050 /**
2051  *This aims to dump the sync info on such execution.
2052  *@syncs contains fence in from bo who has dependency on
2053  *currect execution and a fence out in @dep from current execution.
2054  */
__mos_dump_syncs_array_xe(struct drm_xe_sync * syncs,uint32_t count,mos_xe_dep * dep)2055 int __mos_dump_syncs_array_xe(struct drm_xe_sync *syncs,
2056             uint32_t count,
2057             mos_xe_dep *dep)
2058 {
2059 #if (_DEBUG || _RELEASE_INTERNAL)
2060     if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
2061     {
2062         MOS_DRM_CHK_NULL_RETURN_VALUE(syncs, -EINVAL)
2063         MOS_DRM_CHK_NULL_RETURN_VALUE(dep, -EINVAL)
2064         char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
2065         int offset = 0;
2066         offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2067                     MOS_MAX_MSG_BUF_SIZE - offset,
2068                     "\n\t\t\tdump fence out syncobj: handle = %d, timeline = %ld",
2069                     dep->timeline_index);
2070         if (count > 0)
2071         {
2072             offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2073                     MOS_MAX_MSG_BUF_SIZE - offset,
2074                     "\n\t\t\tdump exec syncs array, num sync = %d",
2075                     count);
2076         }
2077         for (int i = 0; i < count; i++)
2078         {
2079             /**
2080              * Note: we assume all are timeline sync here, and change later when any other
2081              * types sync in use.
2082              */
2083             offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2084                     MOS_MAX_MSG_BUF_SIZE - offset,
2085                     "\n\t\t\t-syncobj_handle = %d, timeline = %ld, sync type = %d, sync flags = %d",
2086                     syncs[i].handle, syncs[i].timeline_value, syncs[i].type, syncs[i].flags);
2087         }
2088         offset > MOS_MAX_MSG_BUF_SIZE ?
2089             MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
2090     }
2091 #endif
2092     return MOS_XE_SUCCESS;
2093 }
2094 
2095 /**
2096  * This is to dump timeline for each exec bo on such execution,
2097  * pair of execed_queue_id & timeline_value will be dumped.
2098  */
2099 int
__mos_dump_bo_deps_map_xe(struct mos_linux_bo ** bo,int num_bo,std::vector<mos_xe_exec_bo> & exec_list,uint32_t curr_exec_queue_id,std::map<uint32_t,struct mos_xe_context * > ctx_infos)2100 __mos_dump_bo_deps_map_xe(struct mos_linux_bo **bo,
2101             int num_bo,
2102             std::vector<mos_xe_exec_bo> &exec_list,
2103             uint32_t curr_exec_queue_id,
2104             std::map<uint32_t, struct mos_xe_context*> ctx_infos)
2105 {
2106 #if (_DEBUG || _RELEASE_INTERNAL)
2107     if (__XE_TEST_DEBUG(XE_DEBUG_SYNCHRONIZATION))
2108     {
2109         MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
2110         uint32_t exec_list_size = exec_list.size();
2111         for (int i = 0; i < exec_list_size + num_bo; i++)
2112         {
2113             mos_xe_bo_gem *exec_bo_gem = nullptr;
2114             uint32_t exec_flags = 0;
2115             if (i < exec_list_size)
2116             {
2117                 exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2118                 exec_flags = exec_list[i].flags;
2119             }
2120             else
2121             {
2122                 exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2123                 exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
2124             }
2125             if (exec_bo_gem)
2126             {
2127                 if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
2128                 {
2129                     MOS_DRM_NORMALMESSAGE("\n\t\t\tdump external bo, handle=%d, without deps map, skip dump", exec_bo_gem->bo.handle);
2130                 }
2131                 else
2132                 {
2133                     char log_msg[MOS_MAX_MSG_BUF_SIZE] = { 0 };
2134                     int offset = 0;
2135                     offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2136                                     MOS_MAX_MSG_BUF_SIZE - offset,
2137                                     "\n\t\t\tdump %s dep: bo handle=%d, curr_exec_queue_id=%d, curr_op_flags=%d",
2138                                     i >= exec_list_size ? "batch bo" : "exec bo",
2139                                     exec_bo_gem->bo.handle,
2140                                     curr_exec_queue_id,
2141                                     exec_flags);
2142 
2143                     auto it =  exec_bo_gem->read_deps.begin();
2144                     while (it != exec_bo_gem->read_deps.end())
2145                     {
2146                         if (ctx_infos.count(it->first) > 0)
2147                         {
2148                             offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2149                                             MOS_MAX_MSG_BUF_SIZE - offset,
2150                                             "\n\t\t\t-read deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
2151                                             it->first,
2152                                             it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
2153                                             it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
2154                         }
2155                         it++;
2156                     }
2157 
2158                     it = exec_bo_gem->write_deps.begin();
2159                     while (it != exec_bo_gem->write_deps.end())
2160                     {
2161                         if (ctx_infos.count(it->first) > 0)
2162                         {
2163                             offset += MOS_SecureStringPrint(log_msg + offset, MOS_MAX_MSG_BUF_SIZE,
2164                                             MOS_MAX_MSG_BUF_SIZE - offset,
2165                                             "\n\t\t\t-write deps: execed_exec_queue_id=%d, syncobj_handle=%d", "timeline = %ld",
2166                                             it->first,
2167                                             it->second.dep ? it->second.dep->syncobj_handle : INVALID_HANDLE,
2168                                             it->second.dep ? it->second.exec_timeline_index : INVALID_HANDLE);
2169                         }
2170                         it++;
2171                     }
2172                     offset > MOS_MAX_MSG_BUF_SIZE ?
2173                         MOS_DRM_NORMALMESSAGE("imcomplete dump since log msg buffer overwrite %s", log_msg) : MOS_DRM_NORMALMESSAGE("%s", log_msg);
2174                 }
2175             }
2176         }
2177     }
2178 #endif
2179     return MOS_XE_SUCCESS;
2180 }
2181 
2182 static int
__mos_context_exec_update_syncs_xe(struct mos_xe_bufmgr_gem * bufmgr_gem,struct mos_linux_bo ** bo,int num_bo,struct mos_xe_context * ctx,std::vector<mos_xe_exec_bo> & exec_list,std::vector<struct drm_xe_sync> & syncs,std::vector<struct mos_xe_external_bo_info> & external_bos)2183 __mos_context_exec_update_syncs_xe(struct mos_xe_bufmgr_gem *bufmgr_gem,
2184             struct mos_linux_bo **bo,
2185             int num_bo,
2186             struct mos_xe_context *ctx,
2187             std::vector<mos_xe_exec_bo> &exec_list,
2188             std::vector<struct drm_xe_sync> &syncs,
2189             std::vector<struct mos_xe_external_bo_info> &external_bos)
2190 {
2191     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
2192     uint32_t curr_dummy_exec_queue_id = ctx->dummy_exec_queue_id;
2193     uint32_t exec_list_size = exec_list.size();
2194     int ret = 0;
2195     std::set<uint32_t> exec_queue_ids;
2196     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL);
2197     MOS_XE_GET_KEYS_FROM_MAP(bufmgr_gem->global_ctx_info, exec_queue_ids);
2198 
2199     for (int i = 0; i < exec_list_size + num_bo; i++)
2200     {
2201         mos_xe_bo_gem *exec_bo_gem = nullptr;
2202         uint32_t exec_flags = 0;
2203         if (i < exec_list_size)
2204         {
2205             //exec list bo
2206             exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2207             exec_flags = exec_list[i].flags;
2208         }
2209         else
2210         {
2211             //batch bo
2212             exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2213             exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default
2214         }
2215 
2216         if (exec_bo_gem)
2217         {
2218             if (exec_flags == 0)
2219             {
2220                 //Add an assert message here in case of potential thread safety issue.
2221                 //Currently, exec bo's flags could only be in (0, EXEC_OBJECT_READ_XE | EXEC_OBJECT_WRITE_XE]
2222                 MOS_DRM_ASSERTMESSAGE("Invalid op flags(0x0) for exec bo(handle=%d)", exec_bo_gem->bo.handle);
2223             }
2224 
2225             if (exec_bo_gem->is_imported || exec_bo_gem->is_exported)
2226             {
2227                 //external bo, need to export its syncobj everytime.
2228                 int prime_fd = INVALID_HANDLE;
2229                 ret = mos_sync_update_exec_syncs_from_handle(
2230                             bufmgr_gem->fd,
2231                             exec_bo_gem->bo.handle,
2232                             exec_flags,
2233                             syncs,
2234                             prime_fd);
2235                 if (ret == MOS_XE_SUCCESS)
2236                 {
2237                     /**
2238                      * Note, must import batch syncobj for each external bo
2239                      * and close the syncobj created for them after exec submission.
2240                     */
2241                     int count = syncs.size();
2242                     struct mos_xe_external_bo_info infos;
2243                     memclear(infos);
2244                     infos.syncobj_handle = syncs[count - 1].handle;
2245                     infos.prime_fd = prime_fd;
2246                     external_bos.push_back(infos);
2247                 }
2248                 else
2249                 {
2250                     //Note: continue process even failed.
2251                     //This may only cause potential synchronization issue, DONT't crash umd here.
2252                     MOS_DRM_ASSERTMESSAGE("Failed to update syncobj for external bo(%d)",
2253                                 exec_bo_gem->bo.handle);
2254                 }
2255             }
2256             else
2257             {
2258                 //internal bo
2259                 ret = mos_sync_update_exec_syncs_from_timeline_deps(
2260                             curr_dummy_exec_queue_id,
2261                             exec_bo_gem->last_exec_write_exec_queue,
2262                             exec_flags,
2263                             exec_queue_ids,
2264                             exec_bo_gem->read_deps,
2265                             exec_bo_gem->write_deps,
2266                             syncs);
2267             }
2268         }
2269     }
2270     return MOS_XE_SUCCESS;
2271 }
2272 
2273 static int
__mos_context_exec_update_bo_deps_xe(struct mos_linux_bo ** bo,int num_bo,std::vector<mos_xe_exec_bo> & exec_list,uint32_t curr_exec_queue_id,struct mos_xe_dep * dep)2274 __mos_context_exec_update_bo_deps_xe(struct mos_linux_bo **bo,
2275             int num_bo,
2276             std::vector<mos_xe_exec_bo> &exec_list,
2277             uint32_t curr_exec_queue_id,
2278             struct mos_xe_dep *dep)
2279 {
2280     uint32_t exec_list_size = exec_list.size();
2281 
2282     for (int i = 0; i < exec_list_size + num_bo; i++)
2283     {
2284         mos_xe_bo_gem *exec_bo_gem = nullptr;
2285         uint32_t exec_flags = 0;
2286         if (i < exec_list_size)
2287         {
2288             //exec list bo
2289             exec_bo_gem = (mos_xe_bo_gem *)exec_list[i].bo;
2290             exec_flags = exec_list[i].flags;
2291         }
2292         else
2293         {
2294             //batch bo
2295             exec_bo_gem = (mos_xe_bo_gem *)bo[i - exec_list_size];
2296             exec_flags = EXEC_OBJECT_WRITE_XE; //use write flags for batch bo as default.
2297         }
2298         if (exec_bo_gem)
2299         {
2300             mos_sync_update_bo_deps(curr_exec_queue_id, exec_flags, dep, exec_bo_gem->read_deps, exec_bo_gem->write_deps);
2301             if (exec_flags & EXEC_OBJECT_READ_XE)
2302             {
2303                 exec_bo_gem->last_exec_read_exec_queue = curr_exec_queue_id;
2304             }
2305             if (exec_flags & EXEC_OBJECT_WRITE_XE)
2306             {
2307                 exec_bo_gem->last_exec_write_exec_queue = curr_exec_queue_id;
2308             }
2309         }
2310     }
2311 
2312     return MOS_XE_SUCCESS;
2313 }
2314 
2315 /**
2316  * @ctx indicates to guity ctx that needs to recover for re-submission
2317  * @exec indicates to exec data in previous failed submission to re-submit
2318  * @curr_exec_queue_id indicates to guilty exec_queue_id, it will be replaced by newly creating one
2319  */
2320 static int
__mos_bo_context_exec_retry_xe(struct mos_bufmgr * bufmgr,struct mos_linux_context * ctx,struct drm_xe_exec & exec,uint32_t & curr_exec_queue_id)2321 __mos_bo_context_exec_retry_xe(struct mos_bufmgr *bufmgr,
2322             struct mos_linux_context *ctx,
2323             struct drm_xe_exec &exec,
2324             uint32_t &curr_exec_queue_id)
2325 {
2326     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
2327     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
2328 
2329     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2330     int ret = MOS_XE_SUCCESS;
2331 
2332     //query ctx property firstly to check if failure is caused by exec_queue ban
2333     uint64_t property_value = 0;
2334     ret = __mos_get_context_property_xe(bufmgr, ctx, DRM_XE_EXEC_QUEUE_GET_PROPERTY_BAN, property_value);
2335 
2336     /**
2337      * if exec_queue is banned, queried value is 1, otherwise it is zero;
2338      * if exec failure is not caused by exec_queue ban, umd could not help recover it.
2339      */
2340     if (ret || !property_value)
2341     {
2342         MOS_DRM_ASSERTMESSAGE("Failed to retore ctx(%d) with error(%d)",
2343                     curr_exec_queue_id, -EPERM);
2344         return -EPERM;
2345     }
2346 
2347     ret = __mos_context_restore_xe(bufmgr, ctx);
2348 
2349     if (ret == MOS_XE_SUCCESS)
2350     {
2351         curr_exec_queue_id = ctx->ctx_id;
2352         exec.exec_queue_id = curr_exec_queue_id;
2353         //try once again to submit
2354         ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
2355         if (ret)
2356         {
2357             MOS_DRM_ASSERTMESSAGE("Failed to re-submission in DRM_IOCTL_XE_EXEC(errno:%d): new exec_queue_id = %d",
2358                         ret, curr_exec_queue_id);
2359         }
2360     }
2361     else
2362     {
2363         MOS_DRM_ASSERTMESSAGE("Failed to retore context with error(%d), exec_queue_id = %d",
2364                     ret, curr_exec_queue_id);
2365     }
2366     return ret;
2367 }
2368 
2369 /**
2370  * @bo contains batch bo only.
2371  * @num_bo indicates to batch bo num.
2372  * @ctx indicates to the exec exec_queue.
2373 
2374  *GPU<->GPU synchronization:
2375  * Exec must ensure the synchronization between GPU->GPU with bellow 8 steps:
2376  * 1. Get the deps from read_deps and write_deps by checking bo's op flags and add it into syncs array;
2377  *     a) if flags & READ: get write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY only;
2378  *     b) if flags & WRITE: get read_deps[all_exec_queue exclude ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY
2379  *        and write_deps[last_write_exec_queue != ctx->dummy_exec_queue_id] & STATUS_DEP_BUSY;
2380  *  2. Export a syncobj from external bo as dep and add it indo syncs array.
2381  *  3. Initial a new timeline dep object for exec queue if it doesn't have and add it to syncs array, otherwise add timeline
2382  *     dep from context->timeline_dep directly while it has latest avaiable timeline point in it;
2383  *  4. Exec submittion with batches and syncs.
2384  *  5. Update read_deps[ctx->dummy_exec_queue_id] and write_deps[ctx->dummy_exec_queue_id] with the new deps from the dep_queue;
2385  *  6. Update timeline dep's timeline index to be latest avaiable one for currect exec queue.
2386  *  7. Import syncobj from batch bo for each external bo's DMA buffer for external process to wait media process on demand.
2387  *  8. Close syncobj handle and syncobj fd for external bo to avoid leak.
2388  * GPU->CPU(optional):
2389  *     If bo->map_deps.dep exist:
2390  *         get it and add it to exec syncs array
2391  */
2392 static int
mos_bo_context_exec_with_sync_xe(struct mos_linux_bo ** bo,int num_bo,struct mos_linux_context * ctx,struct drm_clip_rect * cliprects,int num_cliprects,int DR4,unsigned int flags,int * fence)2393 mos_bo_context_exec_with_sync_xe(struct mos_linux_bo **bo, int num_bo, struct mos_linux_context *ctx,
2394                                struct drm_clip_rect *cliprects, int num_cliprects, int DR4,
2395                                unsigned int flags, int *fence)
2396 {
2397 
2398     MOS_DRM_CHK_NULL_RETURN_VALUE(bo, -EINVAL)
2399     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL)
2400     if (num_bo <= 0)
2401     {
2402         MOS_DRM_ASSERTMESSAGE("invalid batch bo num(%d)", num_bo);
2403         return -EINVAL;
2404     }
2405 
2406     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo[0]->bufmgr;
2407     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr_gem, -EINVAL)
2408 
2409     uint64_t batch_addrs[num_bo];
2410 
2411     std::vector<mos_xe_exec_bo> exec_list;
2412     for (int i = 0; i < num_bo; i++)
2413     {
2414         MOS_DRM_CHK_NULL_RETURN_VALUE(bo[i], -EINVAL)
2415         batch_addrs[i] = bo[i]->offset64;
2416         struct mos_xe_bo_gem *batch_bo_gem = (struct mos_xe_bo_gem *) bo[i];
2417         MOS_XE_GET_VALUES_FROM_MAP(batch_bo_gem->exec_list, exec_list);
2418     }
2419 
2420     struct mos_xe_context *context = (struct mos_xe_context *) ctx;
2421     uint32_t curr_exec_queue_id = context->ctx.ctx_id;
2422     std::vector<struct mos_xe_external_bo_info> external_bos;
2423     std::vector<struct drm_xe_sync> syncs;
2424     uint64_t curr_timeline = 0;
2425     int ret = 0;
2426 
2427     uint32_t exec_list_size = exec_list.size();
2428     if (exec_list_size == 0)
2429     {
2430         MOS_DRM_NORMALMESSAGE("invalid exec list count(%d)", exec_list_size);
2431     }
2432 
2433     bufmgr_gem->m_lock.lock();
2434 
2435     if (context->timeline_dep == nullptr)
2436     {
2437         context->timeline_dep = mos_sync_create_timeline_dep(bufmgr_gem->fd);
2438 
2439         if (context->timeline_dep == nullptr)
2440         {
2441             MOS_DRM_ASSERTMESSAGE("Failed to initial context timeline dep");
2442             bufmgr_gem->m_lock.unlock();
2443             return -ENOMEM;
2444         }
2445     }
2446 
2447     struct mos_xe_dep *dep = context->timeline_dep;
2448     //add latest avaiable timeline point(dep) into syncs as fence out point.
2449     mos_sync_update_exec_syncs_from_timeline_dep(
2450                           bufmgr_gem->fd,
2451                           dep,
2452                           syncs);
2453 
2454     bufmgr_gem->sync_obj_rw_lock.lock_shared();
2455     //update exec syncs array by external and interbal bo dep
2456     __mos_context_exec_update_syncs_xe(
2457                 bufmgr_gem,
2458                 bo,
2459                 num_bo,
2460                 context,
2461                 exec_list,
2462                 syncs,
2463                 external_bos);
2464 
2465     //exec submit
2466     uint32_t sync_count = syncs.size();
2467     struct drm_xe_sync *syncs_array = syncs.data();
2468 
2469     //dump bo deps map
2470     __mos_dump_bo_deps_map_xe(bo, num_bo, exec_list, curr_exec_queue_id, bufmgr_gem->global_ctx_info);
2471     //dump fence in and fence out info
2472     __mos_dump_syncs_array_xe(syncs_array, sync_count, dep);
2473 
2474     struct drm_xe_exec exec;
2475     memclear(exec);
2476     exec.extensions = 0;
2477     exec.exec_queue_id = curr_exec_queue_id;
2478     exec.num_syncs = sync_count;
2479     exec.syncs = (uintptr_t)syncs_array;
2480     /**
2481      * exec.address only accepts batch->offset64 when num bo == 1;
2482      * and it only accepts batch array when num bo > 1
2483     */
2484     exec.address = (num_bo == 1 ? (uintptr_t)batch_addrs[0] : (uintptr_t)batch_addrs);
2485     exec.num_batch_buffer = num_bo;
2486     ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_XE_EXEC, &exec);
2487     if (ret)
2488     {
2489         MOS_DRM_ASSERTMESSAGE("Failed to submission in DRM_IOCTL_XE_EXEC(errno:%d): exec_queue_id = %d, num_syncs = %d, num_bo = %d",
2490                     -errno, curr_exec_queue_id, sync_count, num_bo);
2491 
2492         //check if it caused by guilty exec_queue_id, if so, could restore the exec_queue_id/ queue here and re-try exec again.
2493         if (ret == -EPERM)
2494         {
2495             ret = __mos_bo_context_exec_retry_xe(&bufmgr_gem->bufmgr, ctx, exec, curr_exec_queue_id);
2496         }
2497     }
2498     curr_timeline = dep->timeline_index;
2499 
2500     //update bos' read and write dep with new timeline
2501     __mos_context_exec_update_bo_deps_xe(bo, num_bo, exec_list, context->dummy_exec_queue_id, dep);
2502 
2503     //Update dep with latest available timeline
2504     mos_sync_update_timeline_dep(dep);
2505 
2506     bufmgr_gem->sync_obj_rw_lock.unlock_shared();
2507     bufmgr_gem->m_lock.unlock();
2508 
2509     //import batch syncobj or its point for external bos and close syncobj created for external bo before.
2510     uint32_t external_bo_count = external_bos.size();
2511     int sync_file_fd = INVALID_HANDLE;
2512     int temp_syncobj = INVALID_HANDLE;
2513 
2514     if (external_bo_count > 0)
2515     {
2516         temp_syncobj = mos_sync_syncobj_create(bufmgr_gem->fd, 0);
2517         if (temp_syncobj > 0)
2518         {
2519             mos_sync_syncobj_timeline_to_binary(bufmgr_gem->fd, temp_syncobj, dep->syncobj_handle, curr_timeline, 0);
2520             sync_file_fd = mos_sync_syncobj_handle_to_syncfile_fd(bufmgr_gem->fd, temp_syncobj);
2521         }
2522     }
2523     for (int i = 0; i < external_bo_count; i++)
2524     {
2525         //import syncobj for external bos
2526         if (sync_file_fd >= 0)
2527         {
2528             mos_sync_import_syncfile_to_external_bo(bufmgr_gem->fd, external_bos[i].prime_fd, sync_file_fd);
2529         }
2530         if (external_bos[i].prime_fd != INVALID_HANDLE)
2531         {
2532             close(external_bos[i].prime_fd);
2533         }
2534         mos_sync_syncobj_destroy(bufmgr_gem->fd, external_bos[i].syncobj_handle);
2535     }
2536     if (sync_file_fd >= 0)
2537     {
2538         close(sync_file_fd);
2539     }
2540     if (temp_syncobj > 0)
2541     {
2542         mos_sync_syncobj_destroy(bufmgr_gem->fd, temp_syncobj);
2543     }
2544 
2545     //Note: keep exec return value for final return value.
2546     return ret;
2547 }
2548 
2549 /**
2550  * Get the DEVICE ID for the device.  This can be overridden by setting the
2551  * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
2552  */
2553 static int
mos_get_devid_xe(struct mos_bufmgr * bufmgr)2554 mos_get_devid_xe(struct mos_bufmgr *bufmgr)
2555 {
2556     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2557     int fd = bufmgr_gem->fd;
2558     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2559 
2560     MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, 0)
2561     struct drm_xe_query_config *config = dev->config;
2562 
2563     return (config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff);
2564 }
2565 
2566 static struct drm_xe_query_engines *
__mos_query_engines_xe(int fd)2567 __mos_query_engines_xe(int fd)
2568 {
2569     if (fd < 0)
2570     {
2571         return nullptr;
2572     }
2573 
2574     struct drm_xe_device_query query;
2575     struct drm_xe_query_engines *engines;
2576     int ret;
2577 
2578     memclear(query);
2579     query.extensions = 0;
2580     query.query = DRM_XE_DEVICE_QUERY_ENGINES;
2581     query.size = 0;
2582     query.data = 0;
2583 
2584     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2585     if (ret || !query.size)
2586     {
2587         MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2588         return nullptr;
2589     }
2590 
2591     engines = (drm_xe_query_engines *)calloc(1, query.size);
2592     MOS_DRM_CHK_NULL_RETURN_VALUE(engines, nullptr)
2593 
2594     query.data = (uintptr_t)engines;
2595     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2596     if (ret || !query.size)
2597     {
2598         MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2599         MOS_XE_SAFE_FREE(engines);
2600         return nullptr;
2601     }
2602 
2603     return engines;
2604 }
2605 
2606 static int
mos_query_engines_count_xe(struct mos_bufmgr * bufmgr,unsigned int * nengine)2607 mos_query_engines_count_xe(struct mos_bufmgr *bufmgr, unsigned int *nengine)
2608 {
2609     MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
2610     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2611     int fd = bufmgr_gem->fd;
2612     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2613 
2614     MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2615     *nengine = dev->engines->num_engines;
2616 
2617     return MOS_XE_SUCCESS;
2618 }
2619 
2620 int
mos_query_engines_xe(struct mos_bufmgr * bufmgr,__u16 engine_class,__u64 caps,unsigned int * nengine,void * engine_map)2621 mos_query_engines_xe(struct mos_bufmgr *bufmgr,
2622                       __u16 engine_class,
2623                       __u64 caps,
2624                       unsigned int *nengine,
2625                       void *engine_map)
2626 {
2627     MOS_DRM_CHK_NULL_RETURN_VALUE(nengine, -EINVAL);
2628     MOS_DRM_CHK_NULL_RETURN_VALUE(engine_map, -EINVAL);
2629 
2630     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2631     struct drm_xe_engine_class_instance *ci = (struct drm_xe_engine_class_instance *)engine_map;
2632     int fd = bufmgr_gem->fd;
2633     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2634 
2635     MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2636     struct drm_xe_query_engines *engines = dev->engines;
2637 
2638     int i, num;
2639     struct drm_xe_engine *engine;
2640     for (i = 0, num = 0; i < engines->num_engines; i++)
2641     {
2642         engine = (struct drm_xe_engine *)&engines->engines[i];
2643         if (engine_class == engine->instance.engine_class)
2644         {
2645             ci->engine_class = engine_class;
2646             ci->engine_instance = engine->instance.engine_instance;
2647             ci->gt_id = engine->instance.gt_id;
2648             ci++;
2649             num++;
2650         }
2651 
2652         if (num > *nengine)
2653         {
2654             MOS_DRM_ASSERTMESSAGE("Number of engine instances out of range, %d,%d", num, *nengine);
2655             return -1;
2656         }
2657     }
2658 
2659     //Note30: need to confirm if engine_instance is ordered, otherwise re-order needed.
2660 
2661     *nengine = num;
2662 
2663     return 0;
2664 }
2665 
2666 static size_t
mos_get_engine_class_size_xe()2667 mos_get_engine_class_size_xe()
2668 {
2669     return sizeof(struct drm_xe_engine_class_instance);
2670 }
2671 
2672 static int
mos_query_sysinfo_xe(struct mos_bufmgr * bufmgr,MEDIA_SYSTEM_INFO * gfx_info)2673 mos_query_sysinfo_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
2674 {
2675     MOS_DRM_CHK_NULL_RETURN_VALUE(bufmgr, -EINVAL);
2676     MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL);
2677 
2678     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2679     int fd = bufmgr_gem->fd;
2680     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2681     int ret;
2682 
2683     MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
2684 
2685     if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
2686                 || 0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled)
2687     {
2688         unsigned int num_vd = 0;
2689         unsigned int num_ve = 0;
2690 
2691         for (unsigned int i = 0; i < dev->engines->num_engines; i++)
2692         {
2693             if (0 == gfx_info->VDBoxInfo.NumberOfVDBoxEnabled
2694                         && dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_DECODE)
2695             {
2696                 gfx_info->VDBoxInfo.Instances.VDBoxEnableMask |=
2697                     1 << dev->engines->engines[i].instance.engine_instance;
2698                 num_vd++;
2699             }
2700 
2701             if (0 == gfx_info->VEBoxInfo.NumberOfVEBoxEnabled
2702                         && dev->engines->engines[i].instance.engine_class == DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE)
2703             {
2704                 num_ve++;
2705             }
2706         }
2707 
2708         if (num_vd > 0)
2709         {
2710             gfx_info->VDBoxInfo.NumberOfVDBoxEnabled = num_vd;
2711         }
2712 
2713         if (num_vd > 0)
2714         {
2715             gfx_info->VEBoxInfo.NumberOfVEBoxEnabled = num_ve;
2716         }
2717     }
2718 
2719     return 0;
2720 }
2721 
mos_select_fixed_engine_xe(struct mos_bufmgr * bufmgr,void * engine_map,uint32_t * nengine,uint32_t fixed_instance_mask)2722 void mos_select_fixed_engine_xe(struct mos_bufmgr *bufmgr,
2723             void *engine_map,
2724             uint32_t *nengine,
2725             uint32_t fixed_instance_mask)
2726 {
2727     MOS_UNUSED(bufmgr);
2728 #if (DEBUG || _RELEASE_INTERNAL)
2729     if (fixed_instance_mask)
2730     {
2731         struct drm_xe_engine_class_instance *_engine_map = (struct drm_xe_engine_class_instance *)engine_map;
2732         auto unselect_index = 0;
2733         for (auto bit = 0; bit < *nengine; bit++)
2734         {
2735             if (((fixed_instance_mask >> bit) & 0x1) && (bit > unselect_index))
2736             {
2737                 _engine_map[unselect_index].engine_class = _engine_map[bit].engine_class;
2738                 _engine_map[unselect_index].engine_instance = _engine_map[bit].engine_instance;
2739                 _engine_map[unselect_index].gt_id = _engine_map[bit].gt_id;
2740                 _engine_map[unselect_index].pad = _engine_map[bit].pad;
2741                 _engine_map[bit].engine_class = 0;
2742                 _engine_map[bit].engine_instance = 0;
2743                 _engine_map[bit].gt_id = 0;
2744                 _engine_map[bit].pad = 0;
2745                 unselect_index++;
2746             }
2747             else if (((fixed_instance_mask >> bit) & 0x1) && (bit == unselect_index))
2748             {
2749                 unselect_index++;
2750             }
2751             else if (!((fixed_instance_mask >> bit) & 0x1))
2752             {
2753                 _engine_map[bit].engine_class = 0;
2754                 _engine_map[bit].engine_instance = 0;
2755                 _engine_map[bit].gt_id = 0;
2756                 _engine_map[bit].pad = 0;
2757             }
2758         }
2759         *nengine = unselect_index;
2760     }
2761 #else
2762     MOS_UNUSED(engine_map);
2763     MOS_UNUSED(nengine);
2764     MOS_UNUSED(fixed_instance_mask);
2765 #endif
2766 
2767 }
2768 
2769 
2770 /**
2771  * Note: xe kmd doesn't support query blob before dg2.
2772  */
2773 static uint32_t *
__mos_query_hw_config_xe(int fd)2774 __mos_query_hw_config_xe(int fd)
2775 {
2776     struct drm_xe_device_query query;
2777     uint32_t *hw_config;
2778     int ret;
2779 
2780     if (fd < 0)
2781     {
2782         return nullptr;
2783     }
2784 
2785     memclear(query);
2786     query.query = DRM_XE_DEVICE_QUERY_HWCONFIG;
2787 
2788     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2789     if (ret || !query.size)
2790     {
2791         MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2792         return nullptr;
2793     }
2794 
2795     hw_config = (uint32_t *)calloc(1, query.size + sizeof(uint32_t));
2796     MOS_DRM_CHK_NULL_RETURN_VALUE(hw_config, nullptr)
2797 
2798     query.data = (uintptr_t)&hw_config[1];
2799     ret = drmIoctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
2800     if (ret != 0 || query.size <= 0)
2801     {
2802         MOS_DRM_ASSERTMESSAGE("ret:%d, length:%d", ret, query.size);
2803         MOS_XE_SAFE_FREE(hw_config);
2804         return nullptr;
2805     }
2806 
2807     hw_config[0] = query.size / sizeof(uint32_t);
2808 
2809     return hw_config;
2810 }
2811 
2812 static int
mos_query_device_blob_xe(struct mos_bufmgr * bufmgr,MEDIA_SYSTEM_INFO * gfx_info)2813 mos_query_device_blob_xe(struct mos_bufmgr *bufmgr, MEDIA_SYSTEM_INFO* gfx_info)
2814 {
2815     MOS_DRM_CHK_NULL_RETURN_VALUE(gfx_info, -EINVAL)
2816 
2817     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
2818     int fd = bufmgr_gem->fd;
2819     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
2820 
2821     MOS_DRM_CHK_XE_DEV(dev, hw_config, __mos_query_hw_config_xe, -ENODEV)
2822 
2823     uint32_t *hwconfig = &dev->hw_config[1];
2824     uint32_t num_config = dev->hw_config[0];
2825 
2826     int i = 0;
2827     while (i < num_config) {
2828         /* Attribute ID starts with 1 */
2829         assert(hwconfig[i] > 0);
2830 
2831     #if DEBUG_BLOB_QUERY
2832         MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hwconfig[i]], hwconfig[i+2]);
2833     #endif
2834         if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hwconfig[i])
2835         {
2836             assert(hwconfig[i+1] == 1);
2837             gfx_info->SliceCount = hwconfig[i+2];
2838             gfx_info->MaxSlicesSupported = hwconfig[i+2];
2839         }
2840 
2841         if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hwconfig[i])
2842             || (INTEL_HWCONFIG_MAX_SUBSLICE == hwconfig[i]))
2843         {
2844             assert(hwconfig[i+1] == 1);
2845             gfx_info->SubSliceCount = hwconfig[i+2];
2846             gfx_info->MaxSubSlicesSupported = hwconfig[i+2];
2847         }
2848 
2849         if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hwconfig[i])
2850             || (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hwconfig[i]))
2851         {
2852             assert(hwconfig[i+1] == 1);
2853             gfx_info->MaxEuPerSubSlice = hwconfig[i+2];
2854         }
2855 
2856         if (INTEL_HWCONFIG_DEPRECATED_L3_CACHE_SIZE_IN_KB == hwconfig[i])
2857         {
2858             assert(hwconfig[i+1] == 1);
2859             gfx_info->L3CacheSizeInKb = hwconfig[i+2];
2860         }
2861 
2862         if (INTEL_HWCONFIG_NUM_THREADS_PER_EU == hwconfig[i])
2863         {
2864             assert(hwconfig[i+1] == 1);
2865             gfx_info->NumThreadsPerEu = hwconfig[i+2];
2866         }
2867 
2868         if (INTEL_HWCONFIG_MAX_VECS == hwconfig[i])
2869         {
2870             assert(hwconfig[i+1] == 1);
2871             gfx_info->MaxVECS = hwconfig[i+2];
2872         }
2873 
2874         /* Advance to next key */
2875         i += hwconfig[i + 1];  // value size
2876         i += 2;// KL size
2877     }
2878 
2879     return 0;
2880 }
2881 
2882 static void
mos_enable_reuse_xe(struct mos_bufmgr * bufmgr)2883 mos_enable_reuse_xe(struct mos_bufmgr *bufmgr)
2884 {
2885     MOS_UNIMPLEMENT(bufmgr);
2886 }
2887 
2888 // The function is not supported on KMD
mos_query_hw_ip_version_xe(struct mos_bufmgr * bufmgr,__u16 engine_class,void * ip_ver_info)2889 static int mos_query_hw_ip_version_xe(struct mos_bufmgr *bufmgr, __u16 engine_class, void *ip_ver_info)
2890 {
2891     MOS_UNIMPLEMENT(bufmgr);
2892     MOS_UNIMPLEMENT(engine_class);
2893     MOS_UNIMPLEMENT(ip_ver_info);
2894     return 0;
2895 }
2896 
2897 static void
mos_bo_free_xe(struct mos_linux_bo * bo)2898 mos_bo_free_xe(struct mos_linux_bo *bo)
2899 {
2900     struct mos_xe_bufmgr_gem *bufmgr_gem = nullptr;
2901     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *) bo;
2902     struct drm_gem_close close_ioctl;
2903     int ret;
2904 
2905     if (nullptr == bo_gem)
2906     {
2907         MOS_DRM_ASSERTMESSAGE("bo == nullptr");
2908         return;
2909     }
2910 
2911     bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
2912 
2913     if (nullptr == bufmgr_gem)
2914     {
2915         MOS_DRM_ASSERTMESSAGE("bufmgr_gem == nullptr");
2916         return;
2917     }
2918 
2919     mos_gem_bo_wait_rendering_xe(bo);
2920 
2921     bufmgr_gem->m_lock.lock();
2922 
2923     if (!bo_gem->is_userptr)
2924     {
2925         if (bo_gem->mem_virtual)
2926         {
2927             VG(VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, 0));
2928             drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
2929             bo_gem->mem_virtual = nullptr;
2930         }
2931     }
2932 
2933     if (bo->vm_id != INVALID_VM)
2934     {
2935         ret = mos_vm_bind_sync_xe(bufmgr_gem->fd,
2936                     bo->vm_id,
2937                     0,
2938                     0,
2939                     bo->offset64,
2940                     bo->size,
2941                     bo_gem->pat_index,
2942                     DRM_XE_VM_BIND_OP_UNMAP);
2943         if (ret)
2944         {
2945             MOS_DRM_ASSERTMESSAGE("mos_gem_bo_free mos_vm_unbind ret error. bo:0x%lx, vm_id:%d\r",
2946                     (uint64_t)bo,
2947                     bo->vm_id);
2948         }
2949         else
2950         {
2951             bo->vm_id = INVALID_VM;
2952         }
2953     }
2954 
2955     if (!bo_gem->is_userptr)
2956     {
2957         /* Close this object */
2958          memclear(close_ioctl);
2959          close_ioctl.handle = bo_gem->gem_handle;
2960          ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_ioctl);
2961          if (ret != 0)
2962          {
2963              MOS_DRM_ASSERTMESSAGE("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s",
2964                  bo_gem->gem_handle, bo_gem->name, strerror(errno));
2965          }
2966     }
2967 
2968     if (bufmgr_gem->mem_profiler_fd != -1)
2969     {
2970         snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
2971         ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
2972         if (-1 == ret)
2973         {
2974             snprintf(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE, "GEM_CLOSE, %d, %d, %lu, %d\n", getpid(), bo->handle,bo->size,bo_gem->mem_region);
2975             ret = write(bufmgr_gem->mem_profiler_fd, bufmgr_gem->mem_profiler_buffer, strnlen(bufmgr_gem->mem_profiler_buffer, MEM_PROFILER_BUFFER_SIZE));
2976             if (-1 == ret)
2977             {
2978                 MOS_DRM_ASSERTMESSAGE("Failed to write to %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
2979             }
2980         }
2981     }
2982 
2983     /* Return the VMA for reuse */
2984     __mos_bo_vma_free_xe(bo->bufmgr, bo->offset64, bo->size);
2985     bufmgr_gem->m_lock.unlock();
2986 
2987     MOS_Delete(bo_gem);
2988 }
2989 
2990 static int
mos_bo_set_softpin_xe(MOS_LINUX_BO * bo)2991 mos_bo_set_softpin_xe(MOS_LINUX_BO *bo)
2992 {
2993     MOS_UNIMPLEMENT(bo);
2994     return 0;
2995 }
2996 
2997 static void
mos_bufmgr_gem_destroy_xe(struct mos_bufmgr * bufmgr)2998 mos_bufmgr_gem_destroy_xe(struct mos_bufmgr *bufmgr)
2999 {
3000     if (nullptr == bufmgr)
3001         return;
3002 
3003     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bufmgr;
3004     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
3005     int i, ret;
3006 
3007     /* Release userptr bo kept hanging around for optimisation. */
3008 
3009     mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_SYS]);
3010     mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_DEVICE]);
3011     mos_vma_heap_finish(&bufmgr_gem->vma_heap[MEMZONE_PRIME]);
3012 
3013     if (bufmgr_gem->vm_id != INVALID_VM)
3014     {
3015         __mos_vm_destroy_xe(bufmgr, bufmgr_gem->vm_id);
3016         bufmgr_gem->vm_id = INVALID_VM;
3017     }
3018 
3019     if (bufmgr_gem->mem_profiler_fd != -1)
3020     {
3021         close(bufmgr_gem->mem_profiler_fd);
3022     }
3023 
3024     MOS_XE_SAFE_FREE(dev->hw_config);
3025     dev->hw_config = nullptr;
3026 
3027     MOS_XE_SAFE_FREE(dev->config);
3028     dev->config = nullptr;
3029 
3030     MOS_XE_SAFE_FREE(dev->engines);
3031     dev->engines = nullptr;
3032 
3033     MOS_XE_SAFE_FREE(dev->mem_regions);
3034     dev->mem_regions = nullptr;
3035 
3036     MOS_XE_SAFE_FREE(dev->gt_list);
3037     dev->gt_list = nullptr;
3038 
3039     MOS_Delete(bufmgr_gem);
3040 }
3041 
3042 static void
mos_bufmgr_gem_unref_xe(struct mos_bufmgr * bufmgr)3043 mos_bufmgr_gem_unref_xe(struct mos_bufmgr *bufmgr)
3044 {
3045     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
3046 
3047     if (bufmgr_gem && atomic_add_unless(&bufmgr_gem->ref_count, -1, 1))
3048     {
3049         pthread_mutex_lock(&bufmgr_list_mutex);
3050 
3051         if (atomic_dec_and_test(&bufmgr_gem->ref_count))
3052         {
3053             DRMLISTDEL(&bufmgr_gem->managers);
3054             mos_bufmgr_gem_destroy_xe(bufmgr);
3055         }
3056 
3057         pthread_mutex_unlock(&bufmgr_list_mutex);
3058     }
3059 }
3060 
3061 static int
mo_get_context_param_xe(struct mos_linux_context * ctx,uint32_t size,uint64_t param,uint64_t * value)3062 mo_get_context_param_xe(struct mos_linux_context *ctx,
3063                 uint32_t size,
3064                 uint64_t param,
3065                 uint64_t *value)
3066 {
3067     MOS_UNIMPLEMENT(ctx);
3068     MOS_UNIMPLEMENT(size);
3069     MOS_UNIMPLEMENT(param);
3070     MOS_UNIMPLEMENT(value);
3071     return 0;
3072 }
3073 
mos_enable_softpin_xe(struct mos_bufmgr * bufmgr,bool va1m_align)3074 static void mos_enable_softpin_xe(struct mos_bufmgr *bufmgr, bool va1m_align)
3075 {
3076     MOS_UNIMPLEMENT(bufmgr);
3077     MOS_UNIMPLEMENT(va1m_align);
3078 }
3079 
3080 static int
mos_get_reset_stats_xe(struct mos_linux_context * ctx,uint32_t * reset_count,uint32_t * active,uint32_t * pending)3081 mos_get_reset_stats_xe(struct mos_linux_context *ctx,
3082               uint32_t *reset_count,
3083               uint32_t *active,
3084               uint32_t *pending)
3085 {
3086     MOS_DRM_CHK_NULL_RETURN_VALUE(ctx, -EINVAL);
3087 
3088     struct mos_xe_context *context = (struct mos_xe_context *)ctx;
3089     if (reset_count)
3090         *reset_count = context->reset_count;
3091     if (active)
3092         *active = 0;
3093     if (pending)
3094         *pending = 0;
3095     return 0;
3096 }
3097 
3098 static mos_oca_exec_list_info*
mos_bo_get_oca_exec_list_info_xe(struct mos_linux_bo * bo,int * count)3099 mos_bo_get_oca_exec_list_info_xe(struct mos_linux_bo *bo, int *count)
3100 {
3101     if (nullptr == bo  || nullptr == count)
3102     {
3103         return nullptr;
3104     }
3105 
3106     mos_oca_exec_list_info *info = nullptr;
3107     int counter = 0;
3108     int MAX_COUNT = 50;
3109     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *) bo->bufmgr;
3110     struct mos_xe_bo_gem *bo_gem = (struct mos_xe_bo_gem *)bo;
3111     int exec_list_count = bo_gem->exec_list.size();
3112 
3113     if (exec_list_count == 0 || exec_list_count > MAX_COUNT)
3114     {
3115         return nullptr;
3116     }
3117 
3118     info = (mos_oca_exec_list_info *)malloc((exec_list_count + 1) * sizeof(mos_oca_exec_list_info));
3119     if (!info)
3120     {
3121         MOS_DRM_ASSERTMESSAGE("malloc mos_oca_exec_list_info failed");
3122         return info;
3123     }
3124 
3125     for (auto &it : bo_gem->exec_list)
3126     {
3127         /*note: set capture for each bo*/
3128         struct mos_xe_bo_gem *exec_bo_gem = (struct mos_xe_bo_gem *)it.second.bo;
3129         uint32_t exec_flags = it.second.flags;
3130         if (exec_bo_gem)
3131         {
3132             info[counter].handle   = exec_bo_gem->bo.handle;
3133             info[counter].size     = exec_bo_gem->bo.size;
3134             info[counter].offset64 = exec_bo_gem->bo.offset64;
3135             info[counter].flags    = exec_flags;
3136             info[counter].mem_region = exec_bo_gem->mem_region;
3137             info[counter].is_batch = false;
3138             counter++;
3139         }
3140     }
3141 
3142     /*note: bo is cmd bo, also need to be added*/
3143     info[counter].handle   = bo->handle;
3144     info[counter].size     = bo->size;
3145     info[counter].offset64 = bo->offset64;
3146     info[counter].flags    = EXEC_OBJECT_WRITE_XE; // use write flags for batch bo as default.
3147     info[counter].mem_region = bo_gem->mem_region;
3148     info[counter].is_batch = true;
3149     counter++;
3150 
3151     *count = counter;
3152 
3153     return info;
3154 }
3155 
3156 static bool
mos_has_bsd2_xe(struct mos_bufmgr * bufmgr)3157 mos_has_bsd2_xe(struct mos_bufmgr *bufmgr)
3158 {
3159     MOS_UNUSED(bufmgr);
3160     return true;
3161 }
3162 
3163 static void
mos_bo_set_object_capture_xe(struct mos_linux_bo * bo)3164 mos_bo_set_object_capture_xe(struct mos_linux_bo *bo)
3165 {
3166     MOS_UNIMPLEMENT(bo);
3167 }
3168 
3169 static void
mos_bo_set_object_async_xe(struct mos_linux_bo * bo)3170 mos_bo_set_object_async_xe(struct mos_linux_bo *bo)
3171 {
3172     MOS_UNIMPLEMENT(bo);
3173 }
3174 
3175 static int
mos_get_driver_info_xe(struct mos_bufmgr * bufmgr,struct LinuxDriverInfo * drvInfo)3176 mos_get_driver_info_xe(struct mos_bufmgr *bufmgr, struct LinuxDriverInfo *drvInfo)
3177 {
3178     MOS_DRM_CHK_NULL_RETURN_VALUE(drvInfo, -EINVAL)
3179     struct mos_xe_bufmgr_gem *bufmgr_gem = (struct mos_xe_bufmgr_gem *)bufmgr;
3180     struct mos_xe_device *dev = &bufmgr_gem->xe_device;
3181     int fd = bufmgr_gem->fd;
3182 
3183     uint32_t MaxEuPerSubSlice = 0;
3184     int i = 0;
3185     drvInfo->hasBsd = 1;
3186     drvInfo->hasBsd2 = 1;
3187     drvInfo->hasVebox = 1;
3188 
3189     //For XE driver always has ppgtt
3190     drvInfo->hasPpgtt = 1;
3191 
3192     /**
3193      * query blob
3194      * Note: xe kmd doesn't support query blob before dg2, so don't check null and return here.
3195      */
3196     if (dev->hw_config == nullptr)
3197     {
3198         dev->hw_config = __mos_query_hw_config_xe(fd);
3199     }
3200 
3201     if (dev->hw_config)
3202     {
3203         uint32_t *hw_config = &dev->hw_config[1];
3204         uint32_t num_config = dev->hw_config[0];
3205 
3206         while (i < num_config)
3207         {
3208             /* Attribute ID starts with 1 */
3209             assert(hw_config[i] > 0);
3210 
3211 #if DEBUG_BLOB_QUERY
3212             MOS_DRM_NORMALMESSAGE("query blob: key=%s, value=%d", key_string[hw_config[i]], hw_config[i+2]);
3213 #endif
3214             if (INTEL_HWCONFIG_MAX_SLICES_SUPPORTED == hw_config[i])
3215             {
3216                 assert(hw_config[i+1] == 1);
3217                 drvInfo->sliceCount = hw_config[i+2];
3218             }
3219 
3220             if ((INTEL_HWCONFIG_MAX_DUAL_SUBSLICES_SUPPORTED == hw_config[i])
3221                 || (INTEL_HWCONFIG_MAX_SUBSLICE == hw_config[i]))
3222             {
3223                 assert(hw_config[i+1] == 1);
3224                 drvInfo->subSliceCount = hw_config[i+2];
3225             }
3226 
3227             if ((INTEL_HWCONFIG_MAX_NUM_EU_PER_DSS == hw_config[i])
3228                 || (INTEL_HWCONFIG_MAX_EU_PER_SUBSLICE == hw_config[i]))
3229             {
3230                 assert(hw_config[i+1] == 1);
3231                 MaxEuPerSubSlice = hw_config[i+2];
3232             }
3233 
3234             /* Advance to next key */
3235             i += hw_config[i + 1];  // value size
3236             i += 2;// KL size
3237         }
3238 
3239         drvInfo->euCount = drvInfo->subSliceCount * MaxEuPerSubSlice;
3240     }
3241     else
3242     {
3243         drvInfo->euCount = 96;
3244         drvInfo->subSliceCount = 6;
3245         drvInfo->sliceCount = 1;
3246     }
3247 
3248     // query engines info
3249     MOS_DRM_CHK_XE_DEV(dev, engines, __mos_query_engines_xe, -ENODEV)
3250     struct drm_xe_query_engines *engines = dev->engines;
3251     int num_vd = 0;
3252     int num_ve = 0;
3253     for (i = 0; i < engines->num_engines; i++)
3254     {
3255         if (DRM_XE_ENGINE_CLASS_VIDEO_DECODE == engines->engines[i].instance.engine_class)
3256         {
3257             num_vd++;
3258         }
3259         else if (DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE == engines->engines[i].instance.engine_class)
3260         {
3261             num_ve++;
3262         }
3263     }
3264 
3265     if (num_vd >= 1)
3266     {
3267         drvInfo->hasBsd = 1;
3268     }
3269 
3270     if (num_vd >= 2)
3271     {
3272         drvInfo->hasBsd2 = 1;
3273     }
3274 
3275     if (num_ve  >= 1)
3276     {
3277         drvInfo->hasVebox = 1;
3278     }
3279 
3280     drvInfo->hasHuc = 1;
3281     if (1 == drvInfo->hasHuc)
3282     {
3283         drvInfo->hasProtectedHuc = 1;
3284     }
3285 
3286     // query config
3287     MOS_DRM_CHK_XE_DEV(dev, config, __mos_query_config_xe, -ENODEV)
3288     struct drm_xe_query_config *config = dev->config;
3289     drvInfo->devId = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
3290     drvInfo->devRev = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] >> 16;
3291 
3292     return MOS_XE_SUCCESS;
3293 }
3294 
3295 /**
3296  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3297  * and manage map buffer objections.
3298  *
3299  * \param fd File descriptor of the opened DRM device.
3300  */
3301 struct mos_bufmgr *
mos_bufmgr_gem_init_xe(int fd,int batch_size)3302 mos_bufmgr_gem_init_xe(int fd, int batch_size)
3303 {
3304     //Note: don't put this field in bufmgr in case of bufmgr inaccessable in some functions
3305 #if (_DEBUG || _RELEASE_INTERNAL)
3306     MOS_READ_ENV_VARIABLE(INTEL_XE_BUFMGR_DEBUG, MOS_USER_FEATURE_VALUE_TYPE_INT64, __xe_bufmgr_debug__);
3307     if (__xe_bufmgr_debug__ < 0)
3308     {
3309         __xe_bufmgr_debug__ = 0;
3310     }
3311 #endif
3312 
3313     struct mos_xe_bufmgr_gem *bufmgr_gem;
3314     int ret, tmp;
3315     struct mos_xe_device *dev = nullptr;
3316 
3317     pthread_mutex_lock(&bufmgr_list_mutex);
3318 
3319     bufmgr_gem = mos_bufmgr_gem_find(fd);
3320     if (bufmgr_gem)
3321         goto exit;
3322 
3323     bufmgr_gem = MOS_New(mos_xe_bufmgr_gem);
3324     if (nullptr == bufmgr_gem)
3325         goto exit;
3326 
3327     bufmgr_gem->bufmgr = {};
3328     bufmgr_gem->xe_device = {};
3329     dev = &bufmgr_gem->xe_device;
3330 
3331     bufmgr_gem->fd = fd;
3332     bufmgr_gem->vm_id = INVALID_VM;
3333     atomic_set(&bufmgr_gem->ref_count, 1);
3334 
3335     bufmgr_gem->bufmgr.vm_create = mos_vm_create_xe;
3336     bufmgr_gem->bufmgr.vm_destroy = mos_vm_destroy_xe;
3337     bufmgr_gem->bufmgr.context_create = mos_context_create_xe;
3338     bufmgr_gem->bufmgr.context_create_ext = mos_context_create_ext_xe;
3339     bufmgr_gem->bufmgr.context_create_shared = mos_context_create_shared_xe;
3340     bufmgr_gem->bufmgr.context_destroy = mos_context_destroy_xe;
3341     bufmgr_gem->bufmgr.bo_alloc = mos_bo_alloc_xe;
3342     bufmgr_gem->bufmgr.bo_add_softpin_target = mos_gem_bo_update_exec_list_xe;
3343     bufmgr_gem->bufmgr.bo_clear_relocs = mos_gem_bo_clear_exec_list_xe;
3344     bufmgr_gem->bufmgr.bo_alloc_userptr = mos_bo_alloc_userptr_xe;
3345     bufmgr_gem->bufmgr.bo_alloc_tiled = mos_bo_alloc_tiled_xe;
3346     bufmgr_gem->bufmgr.bo_map = mos_bo_map_xe;
3347     bufmgr_gem->bufmgr.bo_busy = mos_gem_bo_busy_xe;
3348     bufmgr_gem->bufmgr.bo_wait_rendering = mos_gem_bo_wait_rendering_xe;
3349     bufmgr_gem->bufmgr.bo_wait = mos_gem_bo_wait_xe;
3350     bufmgr_gem->bufmgr.bo_map_wc = mos_bo_map_wc_xe;
3351     bufmgr_gem->bufmgr.bo_unmap = mos_bo_unmap_xe;
3352     bufmgr_gem->bufmgr.bo_unmap_wc = mos_bo_unmap_wc_xe;
3353     bufmgr_gem->bufmgr.bo_create_from_prime = mos_bo_create_from_prime_xe;
3354     bufmgr_gem->bufmgr.bo_export_to_prime = mos_bo_export_to_prime_xe;
3355     bufmgr_gem->bufmgr.get_devid = mos_get_devid_xe;
3356     bufmgr_gem->bufmgr.query_engines_count = mos_query_engines_count_xe;
3357     bufmgr_gem->bufmgr.query_engines = mos_query_engines_xe;
3358     bufmgr_gem->bufmgr.get_engine_class_size = mos_get_engine_class_size_xe;
3359     bufmgr_gem->bufmgr.query_sys_engines = mos_query_sysinfo_xe;
3360     bufmgr_gem->bufmgr.select_fixed_engine = mos_select_fixed_engine_xe;
3361     bufmgr_gem->bufmgr.query_device_blob = mos_query_device_blob_xe;
3362     bufmgr_gem->bufmgr.get_driver_info = mos_get_driver_info_xe;
3363     bufmgr_gem->bufmgr.destroy = mos_bufmgr_gem_unref_xe;
3364     bufmgr_gem->bufmgr.query_hw_ip_version = mos_query_hw_ip_version_xe;
3365     bufmgr_gem->bufmgr.get_platform_information = mos_get_platform_information_xe;
3366     bufmgr_gem->bufmgr.set_platform_information = mos_set_platform_information_xe;
3367     bufmgr_gem->bufmgr.enable_reuse = mos_enable_reuse_xe;
3368     bufmgr_gem->bufmgr.bo_reference = mos_bo_reference_xe;
3369     bufmgr_gem->bufmgr.bo_unreference = mos_bo_unreference_xe;
3370     bufmgr_gem->bufmgr.bo_set_softpin = mos_bo_set_softpin_xe;
3371     bufmgr_gem->bufmgr.enable_softpin = mos_enable_softpin_xe;
3372     bufmgr_gem->bufmgr.get_context_param = mo_get_context_param_xe;
3373     bufmgr_gem->bufmgr.get_reset_stats = mos_get_reset_stats_xe;
3374     bufmgr_gem->bufmgr.bo_get_softpin_targets_info = mos_bo_get_oca_exec_list_info_xe;
3375     bufmgr_gem->bufmgr.has_bsd2= mos_has_bsd2_xe;
3376     bufmgr_gem->bufmgr.set_object_capture = mos_bo_set_object_capture_xe;
3377     bufmgr_gem->bufmgr.set_object_async = mos_bo_set_object_async_xe;
3378     bufmgr_gem->bufmgr.bo_context_exec3 = mos_bo_context_exec_with_sync_xe;
3379 
3380     bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
3381     MOS_READ_ENV_VARIABLE(INTEL_ENGINE_TIMESLICE, MOS_USER_FEATURE_VALUE_TYPE_INT32, bufmgr_gem->exec_queue_timeslice);
3382     if (bufmgr_gem->exec_queue_timeslice <= 0
3383             || bufmgr_gem->exec_queue_timeslice >= EXEC_QUEUE_TIMESLICE_MAX)
3384     {
3385         bufmgr_gem->exec_queue_timeslice = EXEC_QUEUE_TIMESLICE_DEFAULT;
3386     }
3387 
3388     bufmgr_gem->mem_profiler_fd = -1;
3389     bufmgr_gem->mem_profiler_path = getenv("MEDIA_MEMORY_PROFILER_LOG");
3390     if (bufmgr_gem->mem_profiler_path != nullptr)
3391     {
3392         if (strcmp(bufmgr_gem->mem_profiler_path, "/sys/kernel/debug/tracing/trace_marker") == 0)
3393         {
3394             ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY );
3395         }
3396         else
3397         {
3398             ret = bufmgr_gem->mem_profiler_fd = open(bufmgr_gem->mem_profiler_path, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
3399         }
3400 
3401         if ( -1 == ret)
3402         {
3403             MOS_DRM_ASSERTMESSAGE("Failed to open %s: %s", bufmgr_gem->mem_profiler_path, strerror(errno));
3404         }
3405     }
3406 
3407     dev->uc_versions[UC_TYPE_GUC_SUBMISSION].uc_type = UC_TYPE_INVALID;
3408     dev->uc_versions[UC_TYPE_HUC].uc_type = UC_TYPE_INVALID;
3409 
3410     bufmgr_gem->vm_id = __mos_vm_create_xe(&bufmgr_gem->bufmgr);
3411     __mos_query_mem_regions_instance_mask_xe(&bufmgr_gem->bufmgr);
3412     __mos_has_vram_xe(&bufmgr_gem->bufmgr);
3413     __mos_get_default_alignment_xe(&bufmgr_gem->bufmgr);
3414 
3415     DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3416     DRMINITLISTHEAD(&bufmgr_gem->named);
3417 
3418     mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_SYS], MEMZONE_SYS_START, MEMZONE_SYS_SIZE);
3419     mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_DEVICE], MEMZONE_DEVICE_START, MEMZONE_DEVICE_SIZE);
3420     mos_vma_heap_init(&bufmgr_gem->vma_heap[MEMZONE_PRIME], MEMZONE_PRIME_START, MEMZONE_PRIME_SIZE);
3421 
3422 exit:
3423     pthread_mutex_unlock(&bufmgr_list_mutex);
3424 
3425     return bufmgr_gem != nullptr ? &bufmgr_gem->bufmgr : nullptr;
3426 }
3427 
mos_get_dev_id_xe(int fd,uint32_t * device_id)3428 int mos_get_dev_id_xe(int fd, uint32_t *device_id)
3429 {
3430     if (fd < 0 || nullptr == device_id)
3431     {
3432         return -EINVAL;
3433     }
3434     struct drm_xe_query_config *config = __mos_query_config_xe(fd);
3435     MOS_DRM_CHK_NULL_RETURN_VALUE(config, -ENODEV)
3436 
3437     *device_id = config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] & 0xffff;
3438     MOS_XE_SAFE_FREE(config);
3439 
3440     return MOS_XE_SUCCESS;
3441 }
3442