xref: /aosp_15_r20/external/igt-gpu-tools/tests/amdgpu/amd_basic.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Based on libdrm/tests/amdgpu/basic_tests.c
23  */
24 
25 #include "config.h"
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #ifdef HAVE_ALLOCA_H
31 # include <alloca.h>
32 #endif
33 
34 #include "drmtest.h"
35 
36 #include <amdgpu.h>
37 #include <amdgpu_drm.h>
38 
39 static amdgpu_device_handle device;
40 
41 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
42 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
43 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
44 
45 #define BUFFER_SIZE (8 * 1024)
46 #define SDMA_PKT_HEADER_op_offset 0
47 #define SDMA_PKT_HEADER_op_mask   0x000000FF
48 #define SDMA_PKT_HEADER_op_shift  0
49 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
50 #define SDMA_OPCODE_CONSTANT_FILL  11
51 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
52 	/* 0 = byte fill
53 	 * 2 = DW fill
54 	 */
55 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
56 					(((sub_op) & 0xFF) << 8) |	\
57 					(((op) & 0xFF) << 0))
58 #define	SDMA_OPCODE_WRITE				  2
59 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
60 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
61 
62 #define	SDMA_OPCODE_COPY				  1
63 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
64 
65 #define GFX_COMPUTE_NOP  0xffff1000
66 #define SDMA_NOP  0x0
67 
68 /* PM4 */
69 #define	PACKET_TYPE0	0
70 #define	PACKET_TYPE1	1
71 #define	PACKET_TYPE2	2
72 #define	PACKET_TYPE3	3
73 
74 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
75 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
76 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
77 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
78 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
79 			 ((reg) & 0xFFFF) |			\
80 			 ((n) & 0x3FFF) << 16)
81 #define CP_PACKET2			0x80000000
82 #define		PACKET2_PAD_SHIFT		0
83 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
84 
85 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
86 
87 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
88 			 (((op) & 0xFF) << 8) |				\
89 			 ((n) & 0x3FFF) << 16)
90 
91 /* Packet 3 types */
92 #define	PACKET3_NOP					0x10
93 
94 #define	PACKET3_WRITE_DATA				0x37
95 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
96 		/* 0 - register
97 		 * 1 - memory (sync - via GRBM)
98 		 * 2 - gl2
99 		 * 3 - gds
100 		 * 4 - reserved
101 		 * 5 - memory (async - direct)
102 		 */
103 #define		WR_ONE_ADDR                             (1 << 16)
104 #define		WR_CONFIRM                              (1 << 20)
105 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
106 		/* 0 - LRU
107 		 * 1 - Stream
108 		 */
109 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
110 		/* 0 - me
111 		 * 1 - pfp
112 		 * 2 - ce
113 		 */
114 
115 #define	PACKET3_DMA_DATA				0x50
116 /* 1. header
117  * 2. CONTROL
118  * 3. SRC_ADDR_LO or DATA [31:0]
119  * 4. SRC_ADDR_HI [31:0]
120  * 5. DST_ADDR_LO [31:0]
121  * 6. DST_ADDR_HI [7:0]
122  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
123  */
124 /* CONTROL */
125 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
126 		/* 0 - ME
127 		 * 1 - PFP
128 		 */
129 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
130 		/* 0 - LRU
131 		 * 1 - Stream
132 		 * 2 - Bypass
133 		 */
134 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
135 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
136 		/* 0 - DST_ADDR using DAS
137 		 * 1 - GDS
138 		 * 3 - DST_ADDR using L2
139 		 */
140 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
141 		/* 0 - LRU
142 		 * 1 - Stream
143 		 * 2 - Bypass
144 		 */
145 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
146 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
147 		/* 0 - SRC_ADDR using SAS
148 		 * 1 - GDS
149 		 * 2 - DATA
150 		 * 3 - SRC_ADDR using L2
151 		 */
152 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
153 /* COMMAND */
154 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
155 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
156 		/* 0 - none
157 		 * 1 - 8 in 16
158 		 * 2 - 8 in 32
159 		 * 3 - 8 in 64
160 		 */
161 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
162 		/* 0 - none
163 		 * 1 - 8 in 16
164 		 * 2 - 8 in 32
165 		 * 3 - 8 in 64
166 		 */
167 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
168 		/* 0 - memory
169 		 * 1 - register
170 		 */
171 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
172 		/* 0 - memory
173 		 * 1 - register
174 		 */
175 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
176 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
177 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
178 
amdgpu_query_info_test(void)179 static void amdgpu_query_info_test(void)
180 {
181 	struct amdgpu_gpu_info gpu_info = {};
182 	uint32_t version, feature;
183 	int r;
184 
185 	r = amdgpu_query_gpu_info(device, &gpu_info);
186 	igt_assert_eq(r, 0);
187 
188 	r = amdgpu_query_firmware_version(device, AMDGPU_INFO_FW_VCE, 0,
189 					  0, &version, &feature);
190 	igt_assert_eq(r, 0);
191 }
192 
gpu_mem_alloc(amdgpu_device_handle device_handle,uint64_t size,uint64_t alignment,uint32_t type,uint64_t flags,uint64_t * vmc_addr,amdgpu_va_handle * va_handle)193 static amdgpu_bo_handle gpu_mem_alloc(amdgpu_device_handle device_handle,
194 				      uint64_t size,
195 				      uint64_t alignment,
196 				      uint32_t type,
197 				      uint64_t flags,
198 				      uint64_t *vmc_addr,
199 				      amdgpu_va_handle *va_handle)
200 {
201 	struct amdgpu_bo_alloc_request req = {
202 		.alloc_size = size,
203 		.phys_alignment = alignment,
204 		.preferred_heap = type,
205 		.flags = flags,
206 	};
207 	amdgpu_bo_handle buf_handle;
208 	int r;
209 
210 	r = amdgpu_bo_alloc(device_handle, &req, &buf_handle);
211 	igt_assert_eq(r, 0);
212 
213 	r = amdgpu_va_range_alloc(device_handle,
214 				  amdgpu_gpu_va_range_general,
215 				  size, alignment, 0, vmc_addr,
216 				  va_handle, 0);
217 	igt_assert_eq(r, 0);
218 
219 	r = amdgpu_bo_va_op(buf_handle, 0, size, *vmc_addr, 0, AMDGPU_VA_OP_MAP);
220 	igt_assert_eq(r, 0);
221 
222 	return buf_handle;
223 }
224 
gpu_mem_free(amdgpu_bo_handle bo,amdgpu_va_handle va_handle,uint64_t vmc_addr,uint64_t size)225 static void gpu_mem_free(amdgpu_bo_handle bo,
226 			 amdgpu_va_handle va_handle,
227 			 uint64_t vmc_addr,
228 			 uint64_t size)
229 {
230 	int r;
231 
232 	r = amdgpu_bo_va_op(bo, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
233 	igt_assert_eq(r, 0);
234 
235 	r = amdgpu_va_range_free(va_handle);
236 	igt_assert_eq(r, 0);
237 
238 	r = amdgpu_bo_free(bo);
239 	igt_assert_eq(r, 0);
240 }
241 
242 static int
amdgpu_bo_alloc_and_map(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)243 amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
244 			unsigned alignment, unsigned heap, uint64_t flags,
245 			amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
246 			amdgpu_va_handle *va_handle)
247 {
248 	struct amdgpu_bo_alloc_request request = {
249 		.alloc_size = size,
250 		.phys_alignment = alignment,
251 		.preferred_heap = heap,
252 		.flags = flags,
253 	};
254 	amdgpu_bo_handle buf_handle;
255 	amdgpu_va_handle handle;
256 	uint64_t vmc_addr;
257 	int r;
258 
259 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
260 	if (r)
261 		return r;
262 
263 	r = amdgpu_va_range_alloc(dev,
264 				  amdgpu_gpu_va_range_general,
265 				  size, alignment, 0, &vmc_addr,
266 				  &handle, 0);
267 	if (r)
268 		goto error_va_alloc;
269 
270 	r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
271 	if (r)
272 		goto error_va_map;
273 
274 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
275 	if (r)
276 		goto error_cpu_map;
277 
278 	*bo = buf_handle;
279 	*mc_address = vmc_addr;
280 	*va_handle = handle;
281 
282 	return 0;
283 
284 error_cpu_map:
285 	amdgpu_bo_cpu_unmap(buf_handle);
286 
287 error_va_map:
288 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
289 
290 error_va_alloc:
291 	amdgpu_bo_free(buf_handle);
292 	return r;
293 }
294 
295 static void
amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo,amdgpu_va_handle va_handle,uint64_t mc_addr,uint64_t size)296 amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
297 			 uint64_t mc_addr, uint64_t size)
298 {
299 	amdgpu_bo_cpu_unmap(bo);
300 	amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
301 	amdgpu_va_range_free(va_handle);
302 	amdgpu_bo_free(bo);
303 }
304 
305 static int
amdgpu_get_bo_list(amdgpu_device_handle dev,amdgpu_bo_handle bo1,amdgpu_bo_handle bo2,amdgpu_bo_list_handle * list)306 amdgpu_get_bo_list(amdgpu_device_handle dev, amdgpu_bo_handle bo1,
307 		   amdgpu_bo_handle bo2, amdgpu_bo_list_handle *list)
308 {
309 	amdgpu_bo_handle resources[] = {bo1, bo2};
310 
311 	return amdgpu_bo_list_create(dev, bo2 ? 2 : 1, resources, NULL, list);
312 }
313 
amdgpu_memory_alloc(void)314 static void amdgpu_memory_alloc(void)
315 {
316 	amdgpu_bo_handle bo;
317 	amdgpu_va_handle va_handle;
318 	uint64_t bo_mc;
319 
320 	/* Test visible VRAM */
321 	bo = gpu_mem_alloc(device,
322 			   4096, 4096,
323 			   AMDGPU_GEM_DOMAIN_VRAM,
324 			   AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
325 			   &bo_mc, &va_handle);
326 
327 	gpu_mem_free(bo, va_handle, bo_mc, 4096);
328 
329 	/* Test invisible VRAM */
330 	bo = gpu_mem_alloc(device,
331 			   4096, 4096,
332 			   AMDGPU_GEM_DOMAIN_VRAM,
333 			   AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
334 			   &bo_mc, &va_handle);
335 
336 	gpu_mem_free(bo, va_handle, bo_mc, 4096);
337 
338 	/* Test GART Cacheable */
339 	bo = gpu_mem_alloc(device,
340 			   4096, 4096,
341 			   AMDGPU_GEM_DOMAIN_GTT,
342 			   0, &bo_mc, &va_handle);
343 
344 	gpu_mem_free(bo, va_handle, bo_mc, 4096);
345 
346 	/* Test GART USWC */
347 	bo = gpu_mem_alloc(device,
348 			   4096, 4096,
349 			   AMDGPU_GEM_DOMAIN_GTT,
350 			   AMDGPU_GEM_CREATE_CPU_GTT_USWC,
351 			   &bo_mc, &va_handle);
352 
353 	gpu_mem_free(bo, va_handle, bo_mc, 4096);
354 }
355 
amdgpu_command_submission_gfx_separate_ibs(void)356 static void amdgpu_command_submission_gfx_separate_ibs(void)
357 {
358 	amdgpu_context_handle context_handle;
359 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
360 	void *ib_result_cpu, *ib_result_ce_cpu;
361 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
362 	struct amdgpu_cs_request ibs_request = {0};
363 	struct amdgpu_cs_ib_info ib_info[2];
364 	struct amdgpu_cs_fence fence_status = {0};
365 	uint32_t *ptr;
366 	uint32_t expired;
367 	amdgpu_bo_list_handle bo_list;
368 	amdgpu_va_handle va_handle, va_handle_ce;
369 	int r;
370 
371 	r = amdgpu_cs_ctx_create(device, &context_handle);
372 	igt_assert_eq(r, 0);
373 
374 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
375 				    AMDGPU_GEM_DOMAIN_GTT, 0,
376 				    &ib_result_handle, &ib_result_cpu,
377 				    &ib_result_mc_address, &va_handle);
378 	igt_assert_eq(r, 0);
379 
380 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
381 				    AMDGPU_GEM_DOMAIN_GTT, 0,
382 				    &ib_result_ce_handle, &ib_result_ce_cpu,
383 				    &ib_result_ce_mc_address, &va_handle_ce);
384 	igt_assert_eq(r, 0);
385 
386 	r = amdgpu_get_bo_list(device, ib_result_handle,
387 			       ib_result_ce_handle, &bo_list);
388 	igt_assert_eq(r, 0);
389 
390 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
391 
392 	/* IT_SET_CE_DE_COUNTERS */
393 	ptr = ib_result_ce_cpu;
394 	ptr[0] = 0xc0008900;
395 	ptr[1] = 0;
396 	ptr[2] = 0xc0008400;
397 	ptr[3] = 1;
398 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
399 	ib_info[0].size = 4;
400 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
401 
402 	/* IT_WAIT_ON_CE_COUNTER */
403 	ptr = ib_result_cpu;
404 	ptr[0] = 0xc0008600;
405 	ptr[1] = 0x00000001;
406 	ib_info[1].ib_mc_address = ib_result_mc_address;
407 	ib_info[1].size = 2;
408 
409 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
410 	ibs_request.number_of_ibs = 2;
411 	ibs_request.ibs = ib_info;
412 	ibs_request.resources = bo_list;
413 	ibs_request.fence_info.handle = NULL;
414 
415 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
416 
417 	igt_assert_eq(r, 0);
418 
419 	fence_status.context = context_handle;
420 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
421 	fence_status.ip_instance = 0;
422 	fence_status.fence = ibs_request.seq_no;
423 
424 	r = amdgpu_cs_query_fence_status(&fence_status,
425 					 AMDGPU_TIMEOUT_INFINITE,
426 					 0, &expired);
427 	igt_assert_eq(r, 0);
428 
429 	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
430 				 ib_result_mc_address, 4096);
431 	amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
432 				 ib_result_ce_mc_address, 4096);
433 
434 	r = amdgpu_bo_list_destroy(bo_list);
435 	igt_assert_eq(r, 0);
436 
437 	r = amdgpu_cs_ctx_free(context_handle);
438 	igt_assert_eq(r, 0);
439 }
440 
amdgpu_command_submission_gfx_shared_ib(void)441 static void amdgpu_command_submission_gfx_shared_ib(void)
442 {
443 	amdgpu_context_handle context_handle;
444 	amdgpu_bo_handle ib_result_handle;
445 	void *ib_result_cpu;
446 	uint64_t ib_result_mc_address;
447 	struct amdgpu_cs_request ibs_request = {0};
448 	struct amdgpu_cs_ib_info ib_info[2];
449 	struct amdgpu_cs_fence fence_status = {0};
450 	uint32_t *ptr;
451 	uint32_t expired;
452 	amdgpu_bo_list_handle bo_list;
453 	amdgpu_va_handle va_handle;
454 	int r;
455 
456 	r = amdgpu_cs_ctx_create(device, &context_handle);
457 	igt_assert_eq(r, 0);
458 
459 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
460 				    AMDGPU_GEM_DOMAIN_GTT, 0,
461 				    &ib_result_handle, &ib_result_cpu,
462 				    &ib_result_mc_address, &va_handle);
463 	igt_assert_eq(r, 0);
464 
465 	r = amdgpu_get_bo_list(device, ib_result_handle, NULL,
466 			       &bo_list);
467 	igt_assert_eq(r, 0);
468 
469 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
470 
471 	/* IT_SET_CE_DE_COUNTERS */
472 	ptr = ib_result_cpu;
473 	ptr[0] = 0xc0008900;
474 	ptr[1] = 0;
475 	ptr[2] = 0xc0008400;
476 	ptr[3] = 1;
477 	ib_info[0].ib_mc_address = ib_result_mc_address;
478 	ib_info[0].size = 4;
479 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
480 
481 	ptr = (uint32_t *)ib_result_cpu + 4;
482 	ptr[0] = 0xc0008600;
483 	ptr[1] = 0x00000001;
484 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
485 	ib_info[1].size = 2;
486 
487 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
488 	ibs_request.number_of_ibs = 2;
489 	ibs_request.ibs = ib_info;
490 	ibs_request.resources = bo_list;
491 	ibs_request.fence_info.handle = NULL;
492 
493 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
494 
495 	igt_assert_eq(r, 0);
496 
497 	fence_status.context = context_handle;
498 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
499 	fence_status.ip_instance = 0;
500 	fence_status.fence = ibs_request.seq_no;
501 
502 	r = amdgpu_cs_query_fence_status(&fence_status,
503 					 AMDGPU_TIMEOUT_INFINITE,
504 					 0, &expired);
505 	igt_assert_eq(r, 0);
506 
507 	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
508 				 ib_result_mc_address, 4096);
509 
510 	r = amdgpu_bo_list_destroy(bo_list);
511 	igt_assert_eq(r, 0);
512 
513 	r = amdgpu_cs_ctx_free(context_handle);
514 	igt_assert_eq(r, 0);
515 }
516 
amdgpu_command_submission_gfx_cp_write_data(void)517 static void amdgpu_command_submission_gfx_cp_write_data(void)
518 {
519 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
520 }
521 
amdgpu_command_submission_gfx_cp_const_fill(void)522 static void amdgpu_command_submission_gfx_cp_const_fill(void)
523 {
524 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
525 }
526 
amdgpu_command_submission_gfx_cp_copy_data(void)527 static void amdgpu_command_submission_gfx_cp_copy_data(void)
528 {
529 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
530 }
531 
amdgpu_command_submission_gfx(void)532 static void amdgpu_command_submission_gfx(void)
533 {
534 	/* write data using the CP */
535 	amdgpu_command_submission_gfx_cp_write_data();
536 	/* const fill using the CP */
537 	amdgpu_command_submission_gfx_cp_const_fill();
538 	/* copy data using the CP */
539 	amdgpu_command_submission_gfx_cp_copy_data();
540 	/* separate IB buffers for multi-IB submission */
541 	amdgpu_command_submission_gfx_separate_ibs();
542 	/* shared IB buffer for multi-IB submission */
543 	amdgpu_command_submission_gfx_shared_ib();
544 }
545 
amdgpu_semaphore_test(void)546 static void amdgpu_semaphore_test(void)
547 {
548 	amdgpu_context_handle context_handle[2];
549 	amdgpu_semaphore_handle sem;
550 	amdgpu_bo_handle ib_result_handle[2];
551 	void *ib_result_cpu[2];
552 	uint64_t ib_result_mc_address[2];
553 	struct amdgpu_cs_request ibs_request[2] = {};
554 	struct amdgpu_cs_ib_info ib_info[2] = {};
555 	struct amdgpu_cs_fence fence_status = {};
556 	uint32_t *ptr;
557 	uint32_t expired;
558 	amdgpu_bo_list_handle bo_list[2];
559 	amdgpu_va_handle va_handle[2];
560 	int r, i;
561 
562 	r = amdgpu_cs_create_semaphore(&sem);
563 	igt_assert_eq(r, 0);
564 	for (i = 0; i < 2; i++) {
565 		r = amdgpu_cs_ctx_create(device, &context_handle[i]);
566 		igt_assert_eq(r, 0);
567 
568 		r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
569 					    AMDGPU_GEM_DOMAIN_GTT, 0,
570 					    &ib_result_handle[i], &ib_result_cpu[i],
571 					    &ib_result_mc_address[i], &va_handle[i]);
572 		igt_assert_eq(r, 0);
573 
574 		r = amdgpu_get_bo_list(device, ib_result_handle[i],
575 				       NULL, &bo_list[i]);
576 		igt_assert_eq(r, 0);
577 	}
578 
579 	/* 1. same context different engine */
580 	ptr = ib_result_cpu[0];
581 	ptr[0] = SDMA_NOP;
582 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
583 	ib_info[0].size = 1;
584 
585 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
586 	ibs_request[0].number_of_ibs = 1;
587 	ibs_request[0].ibs = &ib_info[0];
588 	ibs_request[0].resources = bo_list[0];
589 	ibs_request[0].fence_info.handle = NULL;
590 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
591 	igt_assert_eq(r, 0);
592 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
593 	igt_assert_eq(r, 0);
594 
595 	r = amdgpu_cs_wait_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
596 	igt_assert_eq(r, 0);
597 	ptr = ib_result_cpu[1];
598 	ptr[0] = GFX_COMPUTE_NOP;
599 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
600 	ib_info[1].size = 1;
601 
602 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
603 	ibs_request[1].number_of_ibs = 1;
604 	ibs_request[1].ibs = &ib_info[1];
605 	ibs_request[1].resources = bo_list[1];
606 	ibs_request[1].fence_info.handle = NULL;
607 
608 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
609 	igt_assert_eq(r, 0);
610 
611 	fence_status.context = context_handle[0];
612 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
613 	fence_status.ip_instance = 0;
614 	fence_status.fence = ibs_request[1].seq_no;
615 	r = amdgpu_cs_query_fence_status(&fence_status,
616 					 500000000, 0, &expired);
617 	igt_assert_eq(r, 0);
618 	igt_assert_eq(expired, true);
619 
620 	/* 2. same engine different context */
621 	ptr = ib_result_cpu[0];
622 	ptr[0] = GFX_COMPUTE_NOP;
623 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
624 	ib_info[0].size = 1;
625 
626 	ibs_request[0].ip_type = AMDGPU_HW_IP_GFX;
627 	ibs_request[0].number_of_ibs = 1;
628 	ibs_request[0].ibs = &ib_info[0];
629 	ibs_request[0].resources = bo_list[0];
630 	ibs_request[0].fence_info.handle = NULL;
631 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
632 	igt_assert_eq(r, 0);
633 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_GFX, 0, 0, sem);
634 	igt_assert_eq(r, 0);
635 
636 	r = amdgpu_cs_wait_semaphore(context_handle[1], AMDGPU_HW_IP_GFX, 0, 0, sem);
637 	igt_assert_eq(r, 0);
638 	ptr = ib_result_cpu[1];
639 	ptr[0] = GFX_COMPUTE_NOP;
640 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
641 	ib_info[1].size = 1;
642 
643 	ibs_request[1].ip_type = AMDGPU_HW_IP_GFX;
644 	ibs_request[1].number_of_ibs = 1;
645 	ibs_request[1].ibs = &ib_info[1];
646 	ibs_request[1].resources = bo_list[1];
647 	ibs_request[1].fence_info.handle = NULL;
648 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
649 
650 	igt_assert_eq(r, 0);
651 
652 	fence_status.context = context_handle[1];
653 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
654 	fence_status.ip_instance = 0;
655 	fence_status.fence = ibs_request[1].seq_no;
656 	r = amdgpu_cs_query_fence_status(&fence_status,
657 					 500000000, 0, &expired);
658 	igt_assert_eq(r, 0);
659 	igt_assert_eq(expired, true);
660 	for (i = 0; i < 2; i++) {
661 		amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
662 					 ib_result_mc_address[i], 4096);
663 
664 		r = amdgpu_bo_list_destroy(bo_list[i]);
665 		igt_assert_eq(r, 0);
666 
667 		r = amdgpu_cs_ctx_free(context_handle[i]);
668 		igt_assert_eq(r, 0);
669 	}
670 
671 	r = amdgpu_cs_destroy_semaphore(sem);
672 	igt_assert_eq(r, 0);
673 }
674 
amdgpu_command_submission_compute_nop(void)675 static void amdgpu_command_submission_compute_nop(void)
676 {
677 	amdgpu_context_handle context_handle;
678 	amdgpu_bo_handle ib_result_handle;
679 	void *ib_result_cpu;
680 	uint64_t ib_result_mc_address;
681 	struct amdgpu_cs_request ibs_request;
682 	struct amdgpu_cs_ib_info ib_info;
683 	struct amdgpu_cs_fence fence_status;
684 	uint32_t *ptr;
685 	uint32_t expired;
686 	int i, r, instance;
687 	amdgpu_bo_list_handle bo_list;
688 	amdgpu_va_handle va_handle;
689 
690 	r = amdgpu_cs_ctx_create(device, &context_handle);
691 	igt_assert_eq(r, 0);
692 
693 	for (instance = 0; instance < 8; instance++) {
694 		r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
695 					    AMDGPU_GEM_DOMAIN_GTT, 0,
696 					    &ib_result_handle, &ib_result_cpu,
697 					    &ib_result_mc_address, &va_handle);
698 		igt_assert_eq(r, 0);
699 
700 		r = amdgpu_get_bo_list(device, ib_result_handle, NULL,
701 				       &bo_list);
702 		igt_assert_eq(r, 0);
703 
704 		ptr = ib_result_cpu;
705 		for (i = 0; i < 16; ++i)
706 			ptr[i] = 0xffff1000;
707 
708 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
709 		ib_info.ib_mc_address = ib_result_mc_address;
710 		ib_info.size = 16;
711 
712 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
713 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
714 		ibs_request.ring = instance;
715 		ibs_request.number_of_ibs = 1;
716 		ibs_request.ibs = &ib_info;
717 		ibs_request.resources = bo_list;
718 		ibs_request.fence_info.handle = NULL;
719 
720 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
721 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
722 		igt_assert_eq(r, 0);
723 
724 		fence_status.context = context_handle;
725 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
726 		fence_status.ip_instance = 0;
727 		fence_status.ring = instance;
728 		fence_status.fence = ibs_request.seq_no;
729 
730 		r = amdgpu_cs_query_fence_status(&fence_status,
731 						 AMDGPU_TIMEOUT_INFINITE,
732 						 0, &expired);
733 		igt_assert_eq(r, 0);
734 
735 		r = amdgpu_bo_list_destroy(bo_list);
736 		igt_assert_eq(r, 0);
737 
738 		amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
739 					 ib_result_mc_address, 4096);
740 	}
741 
742 	r = amdgpu_cs_ctx_free(context_handle);
743 	igt_assert_eq(r, 0);
744 }
745 
amdgpu_command_submission_compute_cp_write_data(void)746 static void amdgpu_command_submission_compute_cp_write_data(void)
747 {
748 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
749 }
750 
amdgpu_command_submission_compute_cp_const_fill(void)751 static void amdgpu_command_submission_compute_cp_const_fill(void)
752 {
753 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
754 }
755 
amdgpu_command_submission_compute_cp_copy_data(void)756 static void amdgpu_command_submission_compute_cp_copy_data(void)
757 {
758 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
759 }
760 
amdgpu_command_submission_compute(void)761 static void amdgpu_command_submission_compute(void)
762 {
763 	/* write data using the CP */
764 	amdgpu_command_submission_compute_cp_write_data();
765 	/* const fill using the CP */
766 	amdgpu_command_submission_compute_cp_const_fill();
767 	/* copy data using the CP */
768 	amdgpu_command_submission_compute_cp_copy_data();
769 	/* nop test */
770 	amdgpu_command_submission_compute_nop();
771 }
772 
773 /*
774  * caller need create/release:
775  * pm4_src, resources, ib_info, and ibs_request
776  * submit command stream described in ibs_request and wait for this IB accomplished
777  */
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)778 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
779 				       unsigned ip_type,
780 				       int instance, int pm4_dw, uint32_t *pm4_src,
781 				       int res_cnt, amdgpu_bo_handle *resources,
782 				       struct amdgpu_cs_ib_info *ib_info,
783 				       struct amdgpu_cs_request *ibs_request)
784 {
785 	int r;
786 	uint32_t expired;
787 	uint32_t *ring_ptr;
788 	amdgpu_bo_handle ib_result_handle;
789 	void *ib_result_cpu;
790 	uint64_t ib_result_mc_address;
791 	struct amdgpu_cs_fence fence_status = {0};
792 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
793 	amdgpu_va_handle va_handle;
794 
795 	/* prepare CS */
796 	igt_assert(pm4_dw <= 1024);
797 
798 	/* allocate IB */
799 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
800 				    AMDGPU_GEM_DOMAIN_GTT, 0,
801 				    &ib_result_handle, &ib_result_cpu,
802 				    &ib_result_mc_address, &va_handle);
803 	igt_assert_eq(r, 0);
804 
805 	/* copy PM4 packet to ring from caller */
806 	ring_ptr = ib_result_cpu;
807 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
808 
809 	ib_info->ib_mc_address = ib_result_mc_address;
810 	ib_info->size = pm4_dw;
811 
812 	ibs_request->ip_type = ip_type;
813 	ibs_request->ring = instance;
814 	ibs_request->number_of_ibs = 1;
815 	ibs_request->ibs = ib_info;
816 	ibs_request->fence_info.handle = NULL;
817 
818 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
819 	all_res[res_cnt] = ib_result_handle;
820 
821 	r = amdgpu_bo_list_create(device, res_cnt+1, all_res,
822 				  NULL, &ibs_request->resources);
823 	igt_assert_eq(r, 0);
824 
825 	/* submit CS */
826 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
827 	igt_assert_eq(r, 0);
828 
829 	r = amdgpu_bo_list_destroy(ibs_request->resources);
830 	igt_assert_eq(r, 0);
831 
832 	fence_status.ip_type = ip_type;
833 	fence_status.ip_instance = 0;
834 	fence_status.ring = ibs_request->ring;
835 	fence_status.context = context_handle;
836 	fence_status.fence = ibs_request->seq_no;
837 
838 	/* wait for IB accomplished */
839 	r = amdgpu_cs_query_fence_status(&fence_status,
840 					 AMDGPU_TIMEOUT_INFINITE,
841 					 0, &expired);
842 	igt_assert_eq(r, 0);
843 	igt_assert_eq(expired, true);
844 
845 	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
846 				 ib_result_mc_address, 4096);
847 }
848 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)849 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
850 {
851 	const int sdma_write_length = 128;
852 	const int pm4_dw = 256;
853 	amdgpu_context_handle context_handle;
854 	amdgpu_bo_handle bo;
855 	amdgpu_bo_handle *resources;
856 	uint32_t *pm4;
857 	struct amdgpu_cs_ib_info *ib_info;
858 	struct amdgpu_cs_request *ibs_request;
859 	struct amdgpu_gpu_info gpu_info = {0};
860 	uint64_t bo_mc;
861 	volatile uint32_t *bo_cpu;
862 	int i, j, r, loop;
863 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
864 	amdgpu_va_handle va_handle;
865 
866 	r = amdgpu_query_gpu_info(device, &gpu_info);
867 	igt_assert_eq(r, 0);
868 
869 	pm4 = calloc(pm4_dw, sizeof(*pm4));
870 	igt_assert(pm4);
871 
872 	ib_info = calloc(1, sizeof(*ib_info));
873 	igt_assert(ib_info);
874 
875 	ibs_request = calloc(1, sizeof(*ibs_request));
876 	igt_assert(ibs_request);
877 
878 	r = amdgpu_cs_ctx_create(device, &context_handle);
879 	igt_assert_eq(r, 0);
880 
881 	/* prepare resource */
882 	resources = calloc(1, sizeof(amdgpu_bo_handle));
883 	igt_assert(resources);
884 
885 	loop = 0;
886 	while(loop < 2) {
887 		/* allocate UC bo for sDMA use */
888 		r = amdgpu_bo_alloc_and_map(device,
889 					    sdma_write_length * sizeof(uint32_t),
890 					    4096, AMDGPU_GEM_DOMAIN_GTT,
891 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
892 					    &bo_mc, &va_handle);
893 		igt_assert_eq(r, 0);
894 
895 		/* clear bo */
896 		memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
897 
898 
899 		resources[0] = bo;
900 
901 		/* fulfill PM4: test DMA write-linear */
902 		i = j = 0;
903 		if (ip_type == AMDGPU_HW_IP_DMA) {
904 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
905 					       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
906 			pm4[i++] = 0xffffffff & bo_mc;
907 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
908 			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
909 				pm4[i++] = sdma_write_length - 1;
910 			else
911 				pm4[i++] = sdma_write_length;
912 			while(j++ < sdma_write_length)
913 				pm4[i++] = 0xdeadbeaf;
914 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
915 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
916 			pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
917 			pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
918 			pm4[i++] = 0xfffffffc & bo_mc;
919 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
920 			while(j++ < sdma_write_length)
921 				pm4[i++] = 0xdeadbeaf;
922 		}
923 
924 		amdgpu_test_exec_cs_helper(context_handle,
925 					   ip_type, 0,
926 					   i, pm4,
927 					   1, resources,
928 					   ib_info, ibs_request);
929 
930 		/* verify if SDMA test result meets with expected */
931 		i = 0;
932 		while(i < sdma_write_length) {
933 			igt_assert_eq(bo_cpu[i++], 0xdeadbeaf);
934 		}
935 
936 		amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
937 					 sdma_write_length * sizeof(uint32_t));
938 		loop++;
939 	}
940 	/* clean resources */
941 	free(resources);
942 	free(ibs_request);
943 	free(ib_info);
944 	free(pm4);
945 
946 	/* end of test */
947 	r = amdgpu_cs_ctx_free(context_handle);
948 	igt_assert_eq(r, 0);
949 }
950 
amdgpu_command_submission_sdma_write_linear(void)951 static void amdgpu_command_submission_sdma_write_linear(void)
952 {
953 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
954 }
955 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)956 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
957 {
958 	const int sdma_write_length = 1024 * 1024;
959 	const int pm4_dw = 256;
960 	amdgpu_context_handle context_handle;
961 	amdgpu_bo_handle bo;
962 	amdgpu_bo_handle *resources;
963 	uint32_t *pm4;
964 	struct amdgpu_cs_ib_info *ib_info;
965 	struct amdgpu_cs_request *ibs_request;
966 	struct amdgpu_gpu_info gpu_info = {0};
967 	uint64_t bo_mc;
968 	volatile uint32_t *bo_cpu;
969 	int i, j, r, loop;
970 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
971 	amdgpu_va_handle va_handle;
972 
973 	r = amdgpu_query_gpu_info(device, &gpu_info);
974 	igt_assert_eq(r, 0);
975 
976 	pm4 = calloc(pm4_dw, sizeof(*pm4));
977 	igt_assert(pm4);
978 
979 	ib_info = calloc(1, sizeof(*ib_info));
980 	igt_assert(ib_info);
981 
982 	ibs_request = calloc(1, sizeof(*ibs_request));
983 	igt_assert(ibs_request);
984 
985 	r = amdgpu_cs_ctx_create(device, &context_handle);
986 	igt_assert_eq(r, 0);
987 
988 	/* prepare resource */
989 	resources = calloc(1, sizeof(amdgpu_bo_handle));
990 	igt_assert(resources);
991 
992 	loop = 0;
993 	while(loop < 2) {
994 		/* allocate UC bo for sDMA use */
995 		r = amdgpu_bo_alloc_and_map(device,
996 					    sdma_write_length, 4096,
997 					    AMDGPU_GEM_DOMAIN_GTT,
998 					    gtt_flags[loop], &bo, (void**)&bo_cpu,
999 					    &bo_mc, &va_handle);
1000 		igt_assert_eq(r, 0);
1001 
1002 		/* clear bo */
1003 		memset((void*)bo_cpu, 0, sdma_write_length);
1004 
1005 		resources[0] = bo;
1006 
1007 		/* fulfill PM4: test DMA const fill */
1008 		i = j = 0;
1009 		if (ip_type == AMDGPU_HW_IP_DMA) {
1010 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1011 					       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1012 			pm4[i++] = 0xffffffff & bo_mc;
1013 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1014 			pm4[i++] = 0xdeadbeaf;
1015 			if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
1016 				pm4[i++] = sdma_write_length - 1;
1017 			else
1018 				pm4[i++] = sdma_write_length;
1019 		} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1020 			   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1021 			pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1022 			pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1023 				PACKET3_DMA_DATA_DST_SEL(0) |
1024 				PACKET3_DMA_DATA_SRC_SEL(2) |
1025 				PACKET3_DMA_DATA_CP_SYNC;
1026 			pm4[i++] = 0xdeadbeaf;
1027 			pm4[i++] = 0;
1028 			pm4[i++] = 0xfffffffc & bo_mc;
1029 			pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1030 			pm4[i++] = sdma_write_length;
1031 		}
1032 
1033 		amdgpu_test_exec_cs_helper(context_handle,
1034 					   ip_type, 0,
1035 					   i, pm4,
1036 					   1, resources,
1037 					   ib_info, ibs_request);
1038 
1039 		/* verify if SDMA test result meets with expected */
1040 		i = 0;
1041 		while(i < (sdma_write_length / 4)) {
1042 			igt_assert_eq(bo_cpu[i++], 0xdeadbeaf);
1043 		}
1044 
1045 		amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1046 					 sdma_write_length);
1047 		loop++;
1048 	}
1049 	/* clean resources */
1050 	free(resources);
1051 	free(ibs_request);
1052 	free(ib_info);
1053 	free(pm4);
1054 
1055 	/* end of test */
1056 	r = amdgpu_cs_ctx_free(context_handle);
1057 	igt_assert_eq(r, 0);
1058 }
1059 
amdgpu_command_submission_sdma_const_fill(void)1060 static void amdgpu_command_submission_sdma_const_fill(void)
1061 {
1062 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1063 }
1064 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1065 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1066 {
1067 	const int sdma_write_length = 1024;
1068 	const int pm4_dw = 256;
1069 	amdgpu_context_handle context_handle;
1070 	amdgpu_bo_handle bo1, bo2;
1071 	amdgpu_bo_handle *resources;
1072 	uint32_t *pm4;
1073 	struct amdgpu_cs_ib_info *ib_info;
1074 	struct amdgpu_cs_request *ibs_request;
1075 	struct amdgpu_gpu_info gpu_info = {0};
1076 	uint64_t bo1_mc, bo2_mc;
1077 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1078 	int i, j, r, loop1, loop2;
1079 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1080 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1081 
1082 	r = amdgpu_query_gpu_info(device, &gpu_info);
1083 	igt_assert_eq(r, 0);
1084 
1085 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1086 	igt_assert(pm4);
1087 
1088 	ib_info = calloc(1, sizeof(*ib_info));
1089 	igt_assert(ib_info);
1090 
1091 	ibs_request = calloc(1, sizeof(*ibs_request));
1092 	igt_assert(ibs_request);
1093 
1094 	r = amdgpu_cs_ctx_create(device, &context_handle);
1095 	igt_assert_eq(r, 0);
1096 
1097 	/* prepare resource */
1098 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1099 	igt_assert(resources);
1100 
1101 	loop1 = loop2 = 0;
1102 	/* run 9 circle to test all mapping combination */
1103 	while(loop1 < 2) {
1104 		while(loop2 < 2) {
1105 			/* allocate UC bo1for sDMA use */
1106 			r = amdgpu_bo_alloc_and_map(device,
1107 						    sdma_write_length, 4096,
1108 						    AMDGPU_GEM_DOMAIN_GTT,
1109 						    gtt_flags[loop1], &bo1,
1110 						    (void**)&bo1_cpu, &bo1_mc,
1111 						    &bo1_va_handle);
1112 			igt_assert_eq(r, 0);
1113 
1114 			/* set bo1 */
1115 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1116 
1117 			/* allocate UC bo2 for sDMA use */
1118 			r = amdgpu_bo_alloc_and_map(device,
1119 						    sdma_write_length, 4096,
1120 						    AMDGPU_GEM_DOMAIN_GTT,
1121 						    gtt_flags[loop2], &bo2,
1122 						    (void**)&bo2_cpu, &bo2_mc,
1123 						    &bo2_va_handle);
1124 			igt_assert_eq(r, 0);
1125 
1126 			/* clear bo2 */
1127 			memset((void*)bo2_cpu, 0, sdma_write_length);
1128 
1129 			resources[0] = bo1;
1130 			resources[1] = bo2;
1131 
1132 			/* fulfill PM4: test DMA copy linear */
1133 			i = j = 0;
1134 			if (ip_type == AMDGPU_HW_IP_DMA) {
1135 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1136 				if (gpu_info.family_id >= AMDGPU_FAMILY_AI)
1137 					pm4[i++] = sdma_write_length - 1;
1138 				else
1139 					pm4[i++] = sdma_write_length;
1140 				pm4[i++] = 0;
1141 				pm4[i++] = 0xffffffff & bo1_mc;
1142 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1143 				pm4[i++] = 0xffffffff & bo2_mc;
1144 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1145 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1146 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1147 				pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1148 				pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1149 					PACKET3_DMA_DATA_DST_SEL(0) |
1150 					PACKET3_DMA_DATA_SRC_SEL(0) |
1151 					PACKET3_DMA_DATA_CP_SYNC;
1152 				pm4[i++] = 0xfffffffc & bo1_mc;
1153 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1154 				pm4[i++] = 0xfffffffc & bo2_mc;
1155 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1156 				pm4[i++] = sdma_write_length;
1157 			}
1158 
1159 			amdgpu_test_exec_cs_helper(context_handle,
1160 						   ip_type, 0,
1161 						   i, pm4,
1162 						   2, resources,
1163 						   ib_info, ibs_request);
1164 
1165 			/* verify if SDMA test result meets with expected */
1166 			i = 0;
1167 			while(i < sdma_write_length) {
1168 				igt_assert_eq(bo2_cpu[i++], 0xaa);
1169 			}
1170 
1171 			amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1172 						 sdma_write_length);
1173 			amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1174 						 sdma_write_length);
1175 			loop2++;
1176 		}
1177 		loop1++;
1178 	}
1179 	/* clean resources */
1180 	free(resources);
1181 	free(ibs_request);
1182 	free(ib_info);
1183 	free(pm4);
1184 
1185 	/* end of test */
1186 	r = amdgpu_cs_ctx_free(context_handle);
1187 	igt_assert_eq(r, 0);
1188 }
1189 
amdgpu_command_submission_sdma_copy_linear(void)1190 static void amdgpu_command_submission_sdma_copy_linear(void)
1191 {
1192 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
1193 }
1194 
amdgpu_command_submission_sdma(void)1195 static void amdgpu_command_submission_sdma(void)
1196 {
1197 	amdgpu_command_submission_sdma_write_linear();
1198 	amdgpu_command_submission_sdma_const_fill();
1199 	amdgpu_command_submission_sdma_copy_linear();
1200 }
1201 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)1202 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
1203 {
1204 	amdgpu_context_handle context_handle;
1205 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
1206 	void *ib_result_cpu, *ib_result_ce_cpu;
1207 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
1208 	struct amdgpu_cs_request ibs_request[2] = {};
1209 	struct amdgpu_cs_ib_info ib_info[2];
1210 	struct amdgpu_cs_fence fence_status[2] = {};
1211 	uint32_t *ptr;
1212 	uint32_t expired;
1213 	amdgpu_bo_list_handle bo_list;
1214 	amdgpu_va_handle va_handle, va_handle_ce;
1215 	int r;
1216 	int i, ib_cs_num = 2;
1217 
1218 	r = amdgpu_cs_ctx_create(device, &context_handle);
1219 	igt_assert_eq(r, 0);
1220 
1221 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
1222 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1223 				    &ib_result_handle, &ib_result_cpu,
1224 				    &ib_result_mc_address, &va_handle);
1225 	igt_assert_eq(r, 0);
1226 
1227 	r = amdgpu_bo_alloc_and_map(device, 4096, 4096,
1228 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1229 				    &ib_result_ce_handle, &ib_result_ce_cpu,
1230 				    &ib_result_ce_mc_address, &va_handle_ce);
1231 	igt_assert_eq(r, 0);
1232 
1233 	r = amdgpu_get_bo_list(device, ib_result_handle,
1234 			       ib_result_ce_handle, &bo_list);
1235 	igt_assert_eq(r, 0);
1236 
1237 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
1238 
1239 	/* IT_SET_CE_DE_COUNTERS */
1240 	ptr = ib_result_ce_cpu;
1241 	ptr[0] = 0xc0008900;
1242 	ptr[1] = 0;
1243 	ptr[2] = 0xc0008400;
1244 	ptr[3] = 1;
1245 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
1246 	ib_info[0].size = 4;
1247 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
1248 
1249 	/* IT_WAIT_ON_CE_COUNTER */
1250 	ptr = ib_result_cpu;
1251 	ptr[0] = 0xc0008600;
1252 	ptr[1] = 0x00000001;
1253 	ib_info[1].ib_mc_address = ib_result_mc_address;
1254 	ib_info[1].size = 2;
1255 
1256 	for (i = 0; i < ib_cs_num; i++) {
1257 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
1258 		ibs_request[i].number_of_ibs = 2;
1259 		ibs_request[i].ibs = ib_info;
1260 		ibs_request[i].resources = bo_list;
1261 		ibs_request[i].fence_info.handle = NULL;
1262 	}
1263 
1264 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
1265 
1266 	igt_assert_eq(r, 0);
1267 
1268 	for (i = 0; i < ib_cs_num; i++) {
1269 		fence_status[i].context = context_handle;
1270 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
1271 		fence_status[i].fence = ibs_request[i].seq_no;
1272 	}
1273 
1274 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
1275 				  AMDGPU_TIMEOUT_INFINITE,
1276 				  &expired, NULL);
1277 	igt_assert_eq(r, 0);
1278 
1279 	amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1280 				 ib_result_mc_address, 4096);
1281 
1282 	amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
1283 				 ib_result_ce_mc_address, 4096);
1284 
1285 	r = amdgpu_bo_list_destroy(bo_list);
1286 	igt_assert_eq(r, 0);
1287 
1288 	r = amdgpu_cs_ctx_free(context_handle);
1289 	igt_assert_eq(r, 0);
1290 }
1291 
amdgpu_command_submission_multi_fence(void)1292 static void amdgpu_command_submission_multi_fence(void)
1293 {
1294 	amdgpu_command_submission_multi_fence_wait_all(true);
1295 	amdgpu_command_submission_multi_fence_wait_all(false);
1296 }
1297 
amdgpu_userptr_test(void)1298 static void amdgpu_userptr_test(void)
1299 {
1300 	int i, r, j;
1301 	uint32_t *pm4 = NULL;
1302 	uint64_t bo_mc;
1303 	void *ptr = NULL;
1304 	int pm4_dw = 256;
1305 	int sdma_write_length = 4;
1306 	amdgpu_bo_handle handle;
1307 	amdgpu_context_handle context_handle;
1308 	struct amdgpu_cs_ib_info *ib_info;
1309 	struct amdgpu_cs_request *ibs_request;
1310 	amdgpu_bo_handle buf_handle;
1311 	amdgpu_va_handle va_handle;
1312 
1313 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1314 	igt_assert(pm4);
1315 
1316 	ib_info = calloc(1, sizeof(*ib_info));
1317 	igt_assert(ib_info);
1318 
1319 	ibs_request = calloc(1, sizeof(*ibs_request));
1320 	igt_assert(ibs_request);
1321 
1322 	r = amdgpu_cs_ctx_create(device, &context_handle);
1323 	igt_assert_eq(r, 0);
1324 
1325 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
1326 	igt_assert(ptr);
1327 	memset(ptr, 0, BUFFER_SIZE);
1328 
1329 	r = amdgpu_create_bo_from_user_mem(device,
1330 					   ptr, BUFFER_SIZE, &buf_handle);
1331 	igt_assert_eq(r, 0);
1332 
1333 	r = amdgpu_va_range_alloc(device,
1334 				  amdgpu_gpu_va_range_general,
1335 				  BUFFER_SIZE, 1, 0, &bo_mc,
1336 				  &va_handle, 0);
1337 	igt_assert_eq(r, 0);
1338 
1339 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
1340 	igt_assert_eq(r, 0);
1341 
1342 	handle = buf_handle;
1343 
1344 	j = i = 0;
1345 	pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1346 			       SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
1347 	pm4[i++] = 0xffffffff & bo_mc;
1348 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1349 	pm4[i++] = sdma_write_length;
1350 
1351 	while (j++ < sdma_write_length)
1352 		pm4[i++] = 0xdeadbeaf;
1353 
1354 	amdgpu_test_exec_cs_helper(context_handle,
1355 				   AMDGPU_HW_IP_DMA, 0,
1356 				   i, pm4,
1357 				   1, &handle,
1358 				   ib_info, ibs_request);
1359 	i = 0;
1360 	while (i < sdma_write_length) {
1361 		igt_assert_eq(((int*)ptr)[i++], 0xdeadbeaf);
1362 	}
1363 	free(ibs_request);
1364 	free(ib_info);
1365 	free(pm4);
1366 
1367 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
1368 	igt_assert_eq(r, 0);
1369 	r = amdgpu_va_range_free(va_handle);
1370 	igt_assert_eq(r, 0);
1371 	r = amdgpu_bo_free(buf_handle);
1372 	igt_assert_eq(r, 0);
1373 	free(ptr);
1374 
1375 	r = amdgpu_cs_ctx_free(context_handle);
1376 	igt_assert_eq(r, 0);
1377 }
1378 
1379 igt_main
1380 {
1381 	int fd = -1;
1382 
1383 	igt_fixture {
1384 		uint32_t major, minor;
1385 		int err;
1386 
1387 		fd = drm_open_driver(DRIVER_AMDGPU);
1388 
1389 		err = amdgpu_device_initialize(fd, &major, &minor, &device);
1390 		igt_require(err == 0);
1391 
1392 		igt_info("Initialized amdgpu, driver version %d.%d\n",
1393 			 major, minor);
1394 	}
1395 
1396 	igt_subtest("query-info")
1397 		amdgpu_query_info_test();
1398 
1399 	igt_subtest("memory-alloc")
1400 		amdgpu_memory_alloc();
1401 
1402 	igt_subtest("userptr")
1403 		amdgpu_userptr_test();
1404 
1405 	igt_subtest("cs-gfx")
1406 		amdgpu_command_submission_gfx();
1407 
1408 	igt_subtest("cs-compute")
1409 		amdgpu_command_submission_compute();
1410 
1411 	igt_subtest("cs-multi-fence")
1412 		amdgpu_command_submission_multi_fence();
1413 
1414 	igt_subtest("cs-sdma")
1415 		amdgpu_command_submission_sdma();
1416 
1417 	igt_subtest("semaphore")
1418 		amdgpu_semaphore_test();
1419 
1420 	igt_fixture {
1421 		amdgpu_device_deinitialize(device);
1422 		close(fd);
1423 	}
1424 }
1425