1 /*
2 * Copyright © 2009 Corbin Simpson
3 * Copyright © 2015 Advanced Micro Devices, Inc.
4 *
5 * SPDX-License-Identifier: MIT
6 */
7
8 #ifndef AMDGPU_WINSYS_H
9 #define AMDGPU_WINSYS_H
10
11 #include "pipebuffer/pb_cache.h"
12 #include "pipebuffer/pb_slab.h"
13 #include "winsys/radeon_winsys.h"
14 #include "util/simple_mtx.h"
15 #include "util/u_queue.h"
16 #include <amdgpu.h>
17
18 struct amdgpu_cs;
19
20 /* DRM file descriptors, file descriptions and buffer sharing.
21 *
22 * amdgpu_device_initialize() creates one amdgpu_device_handle for one
23 * gpu. It does this by getting sysfs path(eg /dev/dri/cardxx) for the fd.
24 * It uses the sysfs path to return the amdgpu_device_handle if already created
25 * or to create new one.
26 *
27 * Thus amdgpu_device_handle's fd will be from the first time the gpu
28 * was initialized by amdgpu_device_initialize().
29 *
30 * KMS/GEM buffer handles are specific to a DRM file description. i.e. the
31 * same handle value may refer to different underlying BOs in different
32 * DRM file descriptions even for the same gpu. The
33 * https://en.wikipedia.org/wiki/File:File_table_and_inode_table.svg diagram shows
34 * the file descriptors and its relation to file descriptions in the file table.
35 *
36 * The fd's are considered different if the fd's are obtained using open()
37 * function. The fd's that are duplicates(using dup() or fcntl F_DUPFD) of
38 * open fd, all will be same when compared with os_same_file_description()
39 * function which uses kcmp system call.
40 *
41 * amdgpu_screen_winsys's fd tracks the file description which was
42 * given to amdgpu_winsys_create(). This is the fd used by the application
43 * using the driver and may be used in other ioctl (eg: drmModeAddFB)
44 *
45 * amdgpu_winsys's fd is the file description used to initialize the
46 * device handle in libdrm_amdgpu.
47 *
48 * The 2 fds can be different, even in systems with a single GPU, eg: if
49 * radv is initialized before radeonsi.
50 *
51 * This fd tracking is useful for buffer sharing. As an example, if an app
52 * wants to use drmModeAddFB it'll need a KMS handle valid for its
53 * fd (== amdgpu_screen_winsys::fd). If both fds are identical, there's
54 * nothing to do: bo->u.real.kms_handle can be used directly
55 * (see amdgpu_bo_get_handle). If they're different, the BO has to be exported
56 * from the device fd as a dma-buf, then imported to the app fd to get the
57 * KMS handle of the buffer for that app fd.
58 *
59 * Examples:
60 * 1) OpenGL, then VAAPI:
61 * OpenGL | VAAPI (same device, != file description)
62 * -----------------------------------│-----------------------------------------
63 * fd = 5 (/dev/dri/renderD128) │fd = 9 (/dev/dri/renderD128')
64 * │ │ │
65 * device_handle = 0xffff0250 │ device_handle = 0xffff0250 (fd=5, re-used)
66 * │ │ │
67 * amdgpu_screen_winsys = 0xffff0120 │amdgpu_winsys = 0xffff0470 ◄─────────────┐
68 * │ ├─ fd = dup(5) = 6 │ │ └─ sws_list = 0xffff0120 │
69 * │ └─ aws = 0xffff0470 ◄──┐ │ │ 0xffff0640 ◄───┐ │
70 * │ │ │amdgpu_screen_winsys = 0xffff0640 ──────┘ │
71 * amdgpu_winsys = 0xffff0470 ───┘ │ └─ fd = dup(9) = 10 │
72 * │ ├─ dev = 0xffff0250 │ │
73 * │ ├─ sws_list = 0xffff0120 │ │
74 * │ └─ fd = 6 │ │
75 * dev_tab(0xffff0250) = 0xffff0470 ──│──────────────────────────────────────────┘
76 *
77 * 2) Vulkan (fd=5) then OpenGL (same device, != file description):
78 * -----------------------------
79 * fd = 9 (/dev/dri/renderD128)
80 * │
81 * device_handle = 0xffff0250 (fd=5, re-used)
82 * │
83 * amdgpu_screen_winsys = 0xffff0740
84 * │ ├─ fd = dup(9) = 10
85 * │ └─ aws = 0xffff0940 ◄───┐
86 * amdgpu_winsys = 0xffff0940 ────────┘
87 * │ ├─ dev = 0xffff0250
88 * │ ├─ sws_list = 0xffff0740
89 * │ └─ fd = 5
90 * dev_tab(0xffff0250) = 0xffff0940
91 */
92
93 /* One struct amdgpu_screen_winsys is created in amdgpu_winsys_create() for one
94 * fd. For fd's that are same (read above description for same if condition),
95 * already created amdgpu_screen_winsys will be returned.
96 */
97 struct amdgpu_screen_winsys {
98 struct radeon_winsys base;
99 struct amdgpu_winsys *aws;
100 /* See comment above */
101 int fd;
102 struct pipe_reference reference;
103 struct amdgpu_screen_winsys *next;
104
105 /* Maps a BO to its KMS handle valid for this DRM file descriptor
106 * Protected by amdgpu_winsys::sws_list_lock
107 */
108 struct hash_table *kms_handles;
109 };
110
111 /* Maximum this number of IBs can be busy per queue. When submitting a new IB and the oldest IB
112 * ("AMDGPU_FENCE_RING_SIZE" IBs ago) is still busy, the CS thread will wait for it and will
113 * also block all queues from submitting new IBs.
114 */
115 #define AMDGPU_FENCE_RING_SIZE 32
116
117 /* The maximum number of queues that can be present. */
118 #define AMDGPU_MAX_QUEUES 6
119
120 /* This can use any integer type because the logic handles integer wraparounds robustly, but
121 * uint8_t wraps around so quickly that some BOs might never become idle because we don't
122 * remove idle fences from BOs, so they become "busy" again after a queue sequence number wraps
123 * around and they may stay "busy" in pb_cache long enough that we run out of memory.
124 */
125 typedef uint16_t uint_seq_no;
126
127 struct amdgpu_queue {
128 /* Ring buffer of fences.
129 *
130 * We only remember a certain number of the most recent fences per queue. When we add a new
131 * fence, we wait for the oldest one, which implies that all older fences not present
132 * in the ring are idle. This way we don't have to keep track of a million fence references
133 * for a million BOs.
134 *
135 * We only support 1 queue per IP. If an IP has multiple queues, we always add a fence
136 * dependency on the previous fence to make it behave like there is only 1 queue.
137 *
138 * amdgpu_winsys_bo doesn't have a list of fences. It only remembers the last sequence number
139 * for every queue where it was used. We then use the BO's sequence number to look up a fence
140 * in this ring.
141 */
142 struct pipe_fence_handle *fences[AMDGPU_FENCE_RING_SIZE];
143
144 /* The sequence number of the latest fence.
145 *
146 * This sequence number is global per queue per device, shared by all contexts, and generated
147 * by the winsys, not the kernel.
148 *
149 * The latest fence is: fences[latest_seq_no % AMDGPU_FENCE_RING_SIZE]
150 * The oldest fence is: fences([latest_seq_no + 1) % AMDGPU_FENCE_RING_SIZE]
151 * The oldest sequence number in the ring: latest_seq_no - AMDGPU_FENCE_RING_SIZE + 1
152 *
153 * The sequence number is in the ring if:
154 * latest_seq_no - buffer_seq_no < AMDGPU_FENCE_RING_SIZE
155 * If the sequence number is not in the ring, it's idle.
156 *
157 * Integer wraparounds of the sequence number behave as follows:
158 *
159 * The comparison above gives the correct answer if buffer_seq_no isn't older than UINT*_MAX.
160 * If it's older than UINT*_MAX but not older than UINT*_MAX + AMDGPU_FENCE_RING_SIZE, we
161 * incorrectly pick and wait for one of the fences in the ring. That's only a problem when
162 * the type is so small (uint8_t) that seq_no wraps around very frequently, causing BOs to
163 * never become idle in certain very unlucky scenarios and running out of memory.
164 */
165 uint_seq_no latest_seq_no;
166
167 /* The last context using this queue. */
168 struct amdgpu_ctx *last_ctx;
169 };
170
171 /* This is part of every BO. */
172 struct amdgpu_seq_no_fences {
173 /* A fence sequence number per queue. This number is used to look up the fence from
174 * struct amdgpu_queue.
175 *
176 * This sequence number is global per queue per device, shared by all contexts, and generated
177 * by the winsys, not the kernel.
178 */
179 uint_seq_no seq_no[AMDGPU_MAX_QUEUES];
180
181 /* The mask of queues where seq_no[i] is valid. */
182 uint8_t valid_fence_mask;
183 };
184
185 /* valid_fence_mask should have 1 bit for each queue. */
186 static_assert(sizeof(((struct amdgpu_seq_no_fences*)NULL)->valid_fence_mask) * 8 >= AMDGPU_MAX_QUEUES, "");
187
188 /* One struct amdgpu_winsys is created for one gpu in amdgpu_winsys_create(). */
189 struct amdgpu_winsys {
190 struct pipe_reference reference;
191 /* See comment above */
192 int fd;
193
194 /* Protected by bo_fence_lock. */
195 struct amdgpu_queue queues[AMDGPU_MAX_QUEUES];
196
197 struct pb_cache bo_cache;
198 struct pb_slabs bo_slabs; /* Slab allocator. */
199
200 amdgpu_device_handle dev;
201
202 simple_mtx_t bo_fence_lock;
203
204 int num_cs; /* The number of command streams created. */
205 uint32_t surf_index_color;
206 uint32_t surf_index_fmask;
207 uint32_t next_bo_unique_id;
208 uint64_t allocated_vram;
209 uint64_t allocated_gtt;
210 uint64_t mapped_vram;
211 uint64_t mapped_gtt;
212 uint64_t slab_wasted_vram;
213 uint64_t slab_wasted_gtt;
214 uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
215 uint64_t num_gfx_IBs;
216 uint64_t num_sdma_IBs;
217 uint64_t num_mapped_buffers;
218 uint64_t gfx_bo_list_counter;
219 uint64_t gfx_ib_size_counter;
220
221 struct radeon_info info;
222
223 /* multithreaded IB submission */
224 struct util_queue cs_queue;
225
226 struct ac_addrlib *addrlib;
227
228 bool check_vm;
229 bool noop_cs;
230 bool reserve_vmid;
231 bool zero_all_vram_allocs;
232 #if MESA_DEBUG
233 bool debug_all_bos;
234
235 /* List of all allocated buffers */
236 simple_mtx_t global_bo_list_lock;
237 struct list_head global_bo_list;
238 unsigned num_buffers;
239 #endif
240
241 /* Single-linked list of all structs amdgpu_screen_winsys referencing this
242 * struct amdgpu_winsys
243 */
244 simple_mtx_t sws_list_lock;
245 struct amdgpu_screen_winsys *sws_list;
246
247 /* For returning the same amdgpu_winsys_bo instance for exported
248 * and re-imported buffers. */
249 struct hash_table *bo_export_table;
250 simple_mtx_t bo_export_table_lock;
251
252 /* Since most winsys functions require struct radeon_winsys *, dummy_sws.base is used
253 * for invoking them because sws_list can be NULL.
254 */
255 struct amdgpu_screen_winsys dummy_sws;
256 };
257
258 static inline struct amdgpu_screen_winsys *
amdgpu_screen_winsys(struct radeon_winsys * base)259 amdgpu_screen_winsys(struct radeon_winsys *base)
260 {
261 return (struct amdgpu_screen_winsys*)base;
262 }
263
264 static inline struct amdgpu_winsys *
amdgpu_winsys(struct radeon_winsys * base)265 amdgpu_winsys(struct radeon_winsys *base)
266 {
267 return amdgpu_screen_winsys(base)->aws;
268 }
269
270 void amdgpu_surface_init_functions(struct amdgpu_screen_winsys *sws);
271
272 #endif
273