xref: /aosp_15_r20/external/igt-gpu-tools/tests/amdgpu/amd_prime.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "igt.h"
25 #include "igt_vgem.h"
26 
27 #include <amdgpu.h>
28 #include <amdgpu_drm.h>
29 
30 #include <sys/poll.h>
31 
32 #define GFX_COMPUTE_NOP  0xffff1000
33 #define SDMA_NOP  0x0
34 
35 static int
amdgpu_bo_alloc_and_map(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)36 amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
37 			unsigned alignment, unsigned heap, uint64_t flags,
38 			amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
39 			amdgpu_va_handle *va_handle)
40 {
41 	struct amdgpu_bo_alloc_request request = {
42 		.alloc_size = size,
43 		.phys_alignment = alignment,
44 		.preferred_heap = heap,
45 		.flags = flags,
46 	};
47 	amdgpu_bo_handle buf_handle;
48 	amdgpu_va_handle handle;
49 	uint64_t vmc_addr;
50 	int r;
51 
52 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
53 	if (r)
54 		return r;
55 
56 	r = amdgpu_va_range_alloc(dev,
57 				  amdgpu_gpu_va_range_general,
58 				  size, alignment, 0, &vmc_addr,
59 				  &handle, 0);
60 	if (r)
61 		goto error_va_alloc;
62 
63 	r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
64 	if (r)
65 		goto error_va_map;
66 
67 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
68 	if (r)
69 		goto error_cpu_map;
70 
71 	*bo = buf_handle;
72 	*mc_address = vmc_addr;
73 	*va_handle = handle;
74 
75 	return 0;
76 
77 error_cpu_map:
78 	amdgpu_bo_cpu_unmap(buf_handle);
79 
80 error_va_map:
81 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
82 
83 error_va_alloc:
84 	amdgpu_bo_free(buf_handle);
85 	return r;
86 }
87 
88 static void
amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo,amdgpu_va_handle va_handle,uint64_t mc_addr,uint64_t size)89 amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
90 			 uint64_t mc_addr, uint64_t size)
91 {
92 	amdgpu_bo_cpu_unmap(bo);
93 	amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
94 	amdgpu_va_range_free(va_handle);
95 	amdgpu_bo_free(bo);
96 }
97 
amdgpu_cs_sync(amdgpu_context_handle context,unsigned int ip_type,int ring,unsigned int seqno)98 static void amdgpu_cs_sync(amdgpu_context_handle context,
99 			   unsigned int ip_type,
100 			   int ring,
101 			   unsigned int seqno)
102 {
103 	struct amdgpu_cs_fence fence = {
104 		.context = context,
105 		.ip_type = ip_type,
106 		.ring = ring,
107 		.fence = seqno,
108 	};
109 	uint32_t expired;
110 	int err;
111 
112 	err = amdgpu_cs_query_fence_status(&fence,
113 					   AMDGPU_TIMEOUT_INFINITE,
114 					   0, &expired);
115 	igt_assert_eq(err, 0);
116 }
117 
118 struct cork {
119 	int device;
120 	uint32_t fence;
121 	union {
122 		uint32_t handle;
123 		amdgpu_bo_handle amd_handle;
124 	};
125 };
126 
plug(int fd,struct cork * c)127 static void plug(int fd, struct cork *c)
128 {
129 	struct vgem_bo bo;
130 	int dmabuf;
131 
132 	c->device = drm_open_driver(DRIVER_VGEM);
133 
134 	bo.width = bo.height = 1;
135 	bo.bpp = 4;
136 	vgem_create(c->device, &bo);
137 	c->fence = vgem_fence_attach(c->device, &bo, VGEM_FENCE_WRITE);
138 
139 	dmabuf = prime_handle_to_fd(c->device, bo.handle);
140 	c->handle = prime_fd_to_handle(fd, dmabuf);
141 	close(dmabuf);
142 }
143 
amd_plug(amdgpu_device_handle device,struct cork * c)144 static void amd_plug(amdgpu_device_handle device, struct cork *c)
145 {
146 	struct amdgpu_bo_import_result import;
147 	struct vgem_bo bo;
148 	int dmabuf;
149 
150 	c->device = drm_open_driver(DRIVER_VGEM);
151 
152 	bo.width = bo.height = 1;
153 	bo.bpp = 4;
154 	vgem_create(c->device, &bo);
155 	c->fence = vgem_fence_attach(c->device, &bo, VGEM_FENCE_WRITE);
156 
157 	dmabuf = prime_handle_to_fd(c->device, bo.handle);
158 	amdgpu_bo_import(device, amdgpu_bo_handle_type_dma_buf_fd,
159 			 dmabuf, &import);
160 	close(dmabuf);
161 
162 	c->amd_handle = import.buf_handle;
163 }
164 
unplug(struct cork * c)165 static void unplug(struct cork *c)
166 {
167 	vgem_fence_signal(c->device, c->fence);
168 	close(c->device);
169 }
170 
i915_to_amd(int i915,int amd,amdgpu_device_handle device)171 static void i915_to_amd(int i915, int amd, amdgpu_device_handle device)
172 {
173 	const uint32_t bbe = MI_BATCH_BUFFER_END;
174 	struct drm_i915_gem_exec_object2 obj[2];
175 	struct drm_i915_gem_execbuffer2 execbuf;
176 	unsigned int engines[16], engine;
177 	unsigned int nengine;
178 	unsigned long count;
179 	struct cork c;
180 
181 	nengine = 0;
182 	for_each_physical_engine(i915, engine)
183 		engines[nengine++] = engine;
184 	igt_require(nengine);
185 
186 	memset(obj, 0, sizeof(obj));
187 	obj[1].handle = gem_create(i915, 4096);
188 	gem_write(i915, obj[1].handle, 0, &bbe, sizeof(bbe));
189 
190 	memset(&execbuf, 0, sizeof(execbuf));
191 	execbuf.buffers_ptr = to_user_pointer(obj);
192 	execbuf.buffer_count = 2;
193 
194 	plug(i915, &c);
195 	obj[0].handle = c.handle;
196 
197 	count = 0;
198 	igt_until_timeout(5) {
199 		execbuf.rsvd1 = gem_context_create(i915);
200 
201 		for (unsigned n = 0; n < nengine; n++) {
202 			execbuf.flags = engines[n];
203 			gem_execbuf(i915, &execbuf);
204 		}
205 
206 		gem_context_destroy(i915, execbuf.rsvd1);
207 		count++;
208 
209 		if (!gem_uses_full_ppgtt(i915))
210 			break;
211 	}
212 
213 	igt_info("Reservation width = %ldx%d\n", count, nengine);
214 
215 	{
216 		const int ring = 0;
217 		const unsigned int ip_type = AMDGPU_HW_IP_GFX;
218 		struct amdgpu_bo_import_result import;
219 		amdgpu_bo_handle ib_result_handle;
220 		void *ib_result_cpu;
221 		uint64_t ib_result_mc_address;
222 		struct amdgpu_cs_request ibs_request;
223 		struct amdgpu_cs_ib_info ib_info;
224 		uint32_t *ptr;
225 		int i, r, dmabuf;
226 		amdgpu_bo_list_handle bo_list;
227 		amdgpu_va_handle va_handle;
228 		amdgpu_context_handle context;
229 
230 		r = amdgpu_cs_ctx_create(device, &context);
231 		igt_assert_eq(r, 0);
232 
233 		dmabuf = prime_handle_to_fd(i915, obj[1].handle);
234 		r = amdgpu_bo_import(device, amdgpu_bo_handle_type_dma_buf_fd,
235 				     dmabuf, &import);
236 		close(dmabuf);
237 
238 		r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
239 					    AMDGPU_GEM_DOMAIN_GTT, 0,
240 					    &ib_result_handle, &ib_result_cpu,
241 					    &ib_result_mc_address, &va_handle);
242 		igt_assert_eq(r, 0);
243 
244 		ptr = ib_result_cpu;
245 		for (i = 0; i < 16; ++i)
246 			ptr[i] = GFX_COMPUTE_NOP;
247 
248 		r = amdgpu_bo_list_create(device, 2,
249 					  (amdgpu_bo_handle[]) {
250 					  import.buf_handle,
251 					  ib_result_handle
252 					  },
253 					  NULL, &bo_list);
254 		igt_assert_eq(r, 0);
255 
256 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
257 		ib_info.ib_mc_address = ib_result_mc_address;
258 		ib_info.size = 16;
259 
260 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
261 		ibs_request.ip_type = ip_type;
262 		ibs_request.ring = ring;
263 		ibs_request.number_of_ibs = 1;
264 		ibs_request.ibs = &ib_info;
265 		ibs_request.resources = bo_list;
266 
267 		r = amdgpu_cs_submit(context, 0, &ibs_request, 1);
268 		igt_assert_eq(r, 0);
269 
270 		unplug(&c);
271 
272 		amdgpu_cs_sync(context, ip_type, ring,
273 			       ibs_request.seq_no);
274 
275 
276 		r = amdgpu_bo_list_destroy(bo_list);
277 		igt_assert_eq(r, 0);
278 
279 		amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
280 					 ib_result_mc_address, 4096);
281 
282 		amdgpu_cs_ctx_free(context);
283 	}
284 
285 	gem_sync(i915, obj[1].handle);
286 	gem_close(i915, obj[1].handle);
287 }
288 
amd_to_i915(int i915,int amd,amdgpu_device_handle device)289 static void amd_to_i915(int i915, int amd, amdgpu_device_handle device)
290 {
291 	const uint32_t bbe = MI_BATCH_BUFFER_END;
292 	struct drm_i915_gem_exec_object2 obj[2];
293 	struct drm_i915_gem_execbuffer2 execbuf;
294 	const int ring = 0;
295 	const unsigned int ip_type = AMDGPU_HW_IP_GFX;
296 	amdgpu_bo_handle ib_result_handle;
297 	void *ib_result_cpu;
298 	uint64_t ib_result_mc_address;
299 	struct amdgpu_cs_request ibs_request;
300 	struct amdgpu_cs_ib_info ib_info;
301 	uint32_t *ptr;
302 	amdgpu_context_handle *contexts;
303 	int i, r, dmabuf;
304 	amdgpu_bo_list_handle bo_list;
305 	amdgpu_va_handle va_handle;
306 	unsigned long count, size;
307 	struct cork c;
308 
309 	memset(obj, 0, sizeof(obj));
310 	obj[1].handle = gem_create(i915, 4096);
311 	gem_write(i915, obj[1].handle, 0, &bbe, sizeof(bbe));
312 
313 	memset(&execbuf, 0, sizeof(execbuf));
314 	execbuf.buffers_ptr = to_user_pointer(obj);
315 	execbuf.buffer_count = 2;
316 
317 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
318 				    AMDGPU_GEM_DOMAIN_GTT, 0,
319 				    &ib_result_handle, &ib_result_cpu,
320 				    &ib_result_mc_address, &va_handle);
321 	igt_assert_eq(r, 0);
322 
323 	ptr = ib_result_cpu;
324 	for (i = 0; i < 16; ++i)
325 		ptr[i] = GFX_COMPUTE_NOP;
326 
327 	amd_plug(device, &c);
328 
329 	r = amdgpu_bo_list_create(device, 2,
330 				  (amdgpu_bo_handle[]) {
331 				  c.amd_handle,
332 				  ib_result_handle
333 				  },
334 				  NULL, &bo_list);
335 	igt_assert_eq(r, 0);
336 
337 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
338 	ib_info.ib_mc_address = ib_result_mc_address;
339 	ib_info.size = 16;
340 
341 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
342 	ibs_request.ip_type = ip_type;
343 	ibs_request.ring = ring;
344 	ibs_request.number_of_ibs = 1;
345 	ibs_request.ibs = &ib_info;
346 	ibs_request.resources = bo_list;
347 
348 	count = 0;
349 	size = 64 << 10;
350 	contexts = malloc(size * sizeof(*contexts));
351 	igt_until_timeout(2) { /* must all complete within vgem timeout (10s) */
352 		if (count == size) {
353 			size *= 2;
354 			contexts = realloc(contexts, size * sizeof(*contexts));
355 		}
356 
357 		if (amdgpu_cs_ctx_create(device, &contexts[count]))
358 			break;
359 
360 		r = amdgpu_cs_submit(contexts[count], 0, &ibs_request, 1);
361 		igt_assert_eq(r, 0);
362 
363 		count++;
364 	}
365 
366 	igt_info("Reservation width = %ld\n", count);
367 	igt_require(count);
368 
369 	amdgpu_bo_export(ib_result_handle,
370 			 amdgpu_bo_handle_type_dma_buf_fd,
371 			 (uint32_t *)&dmabuf);
372 	igt_assert_eq(poll(&(struct pollfd){dmabuf, POLLOUT}, 1, 0), 0);
373 	obj[0].handle = prime_fd_to_handle(i915, dmabuf);
374 	obj[0].flags = EXEC_OBJECT_WRITE;
375 	close(dmabuf);
376 
377 	gem_execbuf(i915, &execbuf);
378 	igt_assert(gem_bo_busy(i915, obj[1].handle));
379 
380 	unplug(&c);
381 
382 	gem_sync(i915, obj[1].handle);
383 	gem_close(i915, obj[1].handle);
384 
385 	while (count--)
386 		amdgpu_cs_ctx_free(contexts[count]);
387 	free(contexts);
388 
389 	r = amdgpu_bo_list_destroy(bo_list);
390 	igt_assert_eq(r, 0);
391 
392 	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
393 				 ib_result_mc_address, 4096);
394 }
395 
shrink(int i915,int amd,amdgpu_device_handle device)396 static void shrink(int i915, int amd, amdgpu_device_handle device)
397 {
398 	struct amdgpu_bo_alloc_request request = {
399 		.alloc_size = 1024 * 1024 * 4,
400 		.phys_alignment = 4096,
401 		.preferred_heap = AMDGPU_GEM_DOMAIN_GTT,
402 	};
403 	amdgpu_bo_handle bo;
404 	uint32_t handle;
405 	int dmabuf;
406 
407 	igt_assert_eq(amdgpu_bo_alloc(device, &request, &bo), 0);
408 	amdgpu_bo_export(bo,
409 			 amdgpu_bo_handle_type_dma_buf_fd,
410 			 (uint32_t *)&dmabuf);
411 	amdgpu_bo_free(bo);
412 
413 	handle = prime_fd_to_handle(i915, dmabuf);
414 	close(dmabuf);
415 
416 	/* Populate the i915_bo->pages. */
417 	gem_set_domain(i915, handle, I915_GEM_DOMAIN_GTT, 0);
418 
419 	/* Now evict them, establishing the link from i915:shrinker to amd. */
420 	igt_drop_caches_set(i915, DROP_SHRINK_ALL);
421 
422 	gem_close(i915, handle);
423 }
424 
425 igt_main
426 {
427 	amdgpu_device_handle device;
428 	int i915 = -1, amd = -1;
429 
430 	igt_skip_on_simulation();
431 
432 	igt_fixture {
433 		uint32_t major, minor;
434 		int err;
435 
436 		i915 = drm_open_driver(DRIVER_INTEL);
437 		igt_require_gem(i915);
438 		igt_require(gem_has_exec_fence(i915));
439 
440 		amd = drm_open_driver(DRIVER_AMDGPU);
441 		err = amdgpu_device_initialize(amd, &major, &minor, &device);
442 		igt_require(err == 0);
443 	}
444 
445 	igt_subtest("i915-to-amd") {
446 		gem_require_contexts(i915);
447 		i915_to_amd(i915, amd, device);
448 	}
449 
450 	igt_subtest("amd-to-i915")
451 		amd_to_i915(i915, amd, device);
452 
453 	igt_subtest("shrink")
454 		shrink(i915, amd, device);
455 
456 	igt_fixture {
457 		amdgpu_device_deinitialize(device);
458 		close(amd);
459 		close(i915);
460 	}
461 }
462