xref: /aosp_15_r20/external/libdrm/tests/amdgpu/basic_tests.c (revision 7688df22e49036ff52a766b7101da3a49edadb8c)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37 
38 #include "CUnit/Basic.h"
39 
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44 
45 static  amdgpu_device_handle device_handle;
46 static  uint32_t  major_version;
47 static  uint32_t  minor_version;
48 static  uint32_t  family_id;
49 static  uint32_t  chip_id;
50 static  uint32_t  chip_rev;
51 
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66 
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 				       unsigned ip_type,
72 				       int instance, int pm4_dw, uint32_t *pm4_src,
73 				       int res_cnt, amdgpu_bo_handle *resources,
74 				       struct amdgpu_cs_ib_info *ib_info,
75 				       struct amdgpu_cs_request *ibs_request);
76 
77 CU_TestInfo basic_tests[] = {
78 	{ "Query Info Test",  amdgpu_query_info_test },
79 	{ "Userptr Test",  amdgpu_userptr_test },
80 	{ "bo eviction Test",  amdgpu_bo_eviction_test },
81 	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
82 	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 	{ "SW semaphore Test",  amdgpu_semaphore_test },
86 	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
87 	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
88 	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
89 	{ "Draw Test",  amdgpu_draw_test },
90 	{ "GPU reset Test", amdgpu_gpu_reset_test },
91 	{ "Stable pstate Test", amdgpu_stable_pstate_test },
92 	CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask   0x000000FF
97 #define SDMA_PKT_HEADER_op_shift  0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL  11
100 #       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
101 	/* 0 = byte fill
102 	 * 2 = DW fill
103 	 */
104 #define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
105 					(((sub_op) & 0xFF) << 8) |	\
106 					(((op) & 0xFF) << 0))
107 #define	SDMA_OPCODE_WRITE				  2
108 #       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
109 #       define SDMA_WRTIE_SUB_OPCODE_TILED                1
110 
111 #define	SDMA_OPCODE_COPY				  1
112 #       define SDMA_COPY_SUB_OPCODE_LINEAR                0
113 
114 #define	SDMA_OPCODE_ATOMIC				  10
115 #		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
116         /* 0 - single_pass_atomic.
117          * 1 - loop_until_compare_satisfied.
118          */
119 #		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
120 		/* 0 - non-TMZ.
121 		 * 1 - TMZ.
122 	     */
123 #		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
124 		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 		 * same as Packet 3
126 		 */
127 
128 #define GFX_COMPUTE_NOP  0xffff1000
129 #define SDMA_NOP  0x0
130 
131 /* PM4 */
132 #define	PACKET_TYPE0	0
133 #define	PACKET_TYPE1	1
134 #define	PACKET_TYPE2	2
135 #define	PACKET_TYPE3	3
136 
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
142 			 ((reg) & 0xFFFF) |			\
143 			 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2			0x80000000
145 #define		PACKET2_PAD_SHIFT		0
146 #define		PACKET2_PAD_MASK		(0x3fffffff << 0)
147 
148 #define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149 
150 #define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
151 			 (((op) & 0xFF) << 8) |				\
152 			 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154 
155 /* Packet 3 types */
156 #define	PACKET3_NOP					0x10
157 
158 #define	PACKET3_WRITE_DATA				0x37
159 #define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
160 		/* 0 - register
161 		 * 1 - memory (sync - via GRBM)
162 		 * 2 - gl2
163 		 * 3 - gds
164 		 * 4 - reserved
165 		 * 5 - memory (async - direct)
166 		 */
167 #define		WR_ONE_ADDR                             (1 << 16)
168 #define		WR_CONFIRM                              (1 << 20)
169 #define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
170 		/* 0 - LRU
171 		 * 1 - Stream
172 		 */
173 #define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
174 		/* 0 - me
175 		 * 1 - pfp
176 		 * 2 - ce
177 		 */
178 
179 #define	PACKET3_ATOMIC_MEM				0x1E
180 #define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
181 #define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
182             /* 0 - single_pass_atomic.
183              * 1 - loop_until_compare_satisfied.
184              */
185 #define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
186             /* 0 - lru.
187              * 1 - stream.
188              */
189 #define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
190             /* 0 - micro_engine.
191 			 */
192 
193 #define	PACKET3_DMA_DATA				0x50
194 /* 1. header
195  * 2. CONTROL
196  * 3. SRC_ADDR_LO or DATA [31:0]
197  * 4. SRC_ADDR_HI [31:0]
198  * 5. DST_ADDR_LO [31:0]
199  * 6. DST_ADDR_HI [7:0]
200  * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201  */
202 /* CONTROL */
203 #              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
204 		/* 0 - ME
205 		 * 1 - PFP
206 		 */
207 #              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 		/* 0 - LRU
209 		 * 1 - Stream
210 		 * 2 - Bypass
211 		 */
212 #              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 #              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
214 		/* 0 - DST_ADDR using DAS
215 		 * 1 - GDS
216 		 * 3 - DST_ADDR using L2
217 		 */
218 #              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 		/* 0 - LRU
220 		 * 1 - Stream
221 		 * 2 - Bypass
222 		 */
223 #              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 #              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
225 		/* 0 - SRC_ADDR using SAS
226 		 * 1 - GDS
227 		 * 2 - DATA
228 		 * 3 - SRC_ADDR using L2
229 		 */
230 #              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
231 /* COMMAND */
232 #              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
233 #              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 		/* 0 - none
235 		 * 1 - 8 in 16
236 		 * 2 - 8 in 32
237 		 * 3 - 8 in 64
238 		 */
239 #              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 		/* 0 - none
241 		 * 1 - 8 in 16
242 		 * 2 - 8 in 32
243 		 * 3 - 8 in 64
244 		 */
245 #              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
246 		/* 0 - memory
247 		 * 1 - register
248 		 */
249 #              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
250 		/* 0 - memory
251 		 * 1 - register
252 		 */
253 #              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
254 #              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
255 #              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
256 
257 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258 						(((b) & 0x1) << 26) |		\
259 						(((t) & 0x1) << 23) |		\
260 						(((s) & 0x1) << 22) |		\
261 						(((cnt) & 0xFFFFF) << 0))
262 #define	SDMA_OPCODE_COPY_SI	3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI	13
264 #define SDMA_NOP_SI  0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define	PACKET3_DMA_DATA_SI	0x41
267 #              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268 		/* 0 - ME
269 		 * 1 - PFP
270 		 */
271 #              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272 		/* 0 - DST_ADDR using DAS
273 		 * 1 - GDS
274 		 * 3 - DST_ADDR using L2
275 		 */
276 #              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277 		/* 0 - SRC_ADDR using SAS
278 		 * 1 - GDS
279 		 * 2 - DATA
280 		 * 3 - SRC_ADDR using L2
281 		 */
282 #              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283 
284 
285 #define PKT3_CONTEXT_CONTROL                   0x28
286 #define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
287 #define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
288 #define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
289 
290 #define PKT3_CLEAR_STATE                       0x12
291 
292 #define PKT3_SET_SH_REG                        0x76
293 #define		PACKET3_SET_SH_REG_START			0x00002c00
294 
295 #define PKT3_SET_SH_REG_INDEX			0x9B
296 
297 #define	PACKET3_DISPATCH_DIRECT				0x15
298 #define PACKET3_EVENT_WRITE				0x46
299 #define PACKET3_ACQUIRE_MEM				0x58
300 #define PACKET3_SET_CONTEXT_REG				0x69
301 #define PACKET3_SET_UCONFIG_REG				0x79
302 #define PACKET3_DRAW_INDEX_AUTO				0x2D
303 /* gfx 8 */
304 #define mmCOMPUTE_PGM_LO                                                        0x2e0c
305 #define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
306 #define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
307 #define mmCOMPUTE_USER_DATA_0                                                   0x2e40
308 #define mmCOMPUTE_USER_DATA_1                                                   0x2e41
309 #define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
310 #define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
311 
312 
313 
314 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315 		      ((num & 0x0000ff00) << 8) | \
316 		      ((num & 0x00ff0000) >> 8) | \
317 		      ((num & 0x000000ff) << 24))
318 
319 
320 /* Shader code
321  * void main()
322 {
323 
324 	float x = some_input;
325 		for (unsigned i = 0; i < 1000000; i++)
326   	x = sin(x);
327 
328 	u[0] = 42u;
329 }
330 */
331 
332 static  uint32_t shader_bin[] = {
333 	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334 	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335 	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336 	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337 };
338 
339 #define CODE_OFFSET 512
340 #define DATA_OFFSET 1024
341 
342 enum cs_type {
343 	CS_BUFFERCLEAR,
344 	CS_BUFFERCOPY,
345 	CS_HANG,
346 	CS_HANG_SLOW
347 };
348 
349 static const uint32_t bufferclear_cs_shader_gfx9[] = {
350     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351     0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352     0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353     0xbf810000
354 };
355 
356 static const uint32_t bufferclear_cs_shader_gfx10[] = {
357 	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358 	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359 	0xBF810000
360 };
361 
362 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363 	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
364 	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
365 	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366 	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367 	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368 };
369 
370 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371 
372 static const uint32_t buffercopy_cs_shader_gfx9[] = {
373     0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374     0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375     0xe01c2000, 0x80010200, 0xbf810000
376 };
377 
378 static const uint32_t buffercopy_cs_shader_gfx10[] = {
379 	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380 	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381 };
382 
383 static const uint32_t preamblecache_gfx9[] = {
384 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386 	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389 	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
390 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392 	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400 	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401 	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402 	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403 	0xc0017900, 0x24b, 0x0
404 };
405 
406 static const uint32_t preamblecache_gfx10[] = {
407 	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408 	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409 	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410 	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411 	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412 	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413 	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414 	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415 	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416 	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417 	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418 	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419 	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420 	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421 	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422 	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423 	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424 	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425 	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426 	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427 	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428 	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429 	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430 };
431 
432 enum ps_type {
433 	PS_CONST,
434 	PS_TEX,
435 	PS_HANG,
436 	PS_HANG_SLOW
437 };
438 
439 static const uint32_t ps_const_shader_gfx9[] = {
440     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441     0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442     0xC4001C0F, 0x00000100, 0xBF810000
443 };
444 
445 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446 
447 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452      { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453      { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454      { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455      { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456      { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458     }
459 };
460 
461 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462     0x00000004
463 };
464 
465 static const uint32_t ps_num_sh_registers_gfx9 = 2;
466 
467 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468     {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470 };
471 
472 static const uint32_t ps_num_context_registers_gfx9 = 7;
473 
474 static const uint32_t ps_const_context_reg_gfx9[][2] = {
475     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
476     {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
477     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
478     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
479     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
480     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
481     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
482 };
483 
484 static const uint32_t ps_const_shader_gfx10[] = {
485     0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486     0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487     0xF8001C0F, 0x00000100, 0xBF810000
488 };
489 
490 static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491 
492 static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497      { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498      { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499      { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500      { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501      { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503     }
504 };
505 
506 static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507     0x00000004
508 };
509 
510 static const uint32_t ps_num_sh_registers_gfx10 = 2;
511 
512 static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513     {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514     {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515 };
516 
517 static const uint32_t ps_tex_shader_gfx9[] = {
518     0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519     0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520     0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521     0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522     0x00000100, 0xBF810000
523 };
524 
525 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526     0x0000000B
527 };
528 
529 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530 
531 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536      { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537      { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538      { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539      { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540      { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542     }
543 };
544 
545 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546     {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547     {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548 };
549 
550 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551     {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
552     {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
553     {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
554     {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
555     {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
556     {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
557     {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
558 };
559 
560 static const uint32_t ps_tex_shader_gfx10[] = {
561     0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562     0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563     0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564     0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565     0xF8001C0F, 0x00000100, 0xBF810000
566 };
567 
568 static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569     0x0000000C
570 };
571 
572 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573 
574 static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575     {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579      { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580      { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581      { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582      { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583      { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584      { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585     }
586 };
587 
588 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589     0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590     0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591     0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592     0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593     0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594     0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595     0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596     0xC400020F, 0x05060403, 0xBF810000
597 };
598 
599 static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600     0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601     0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602     0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603     0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604     0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605     0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606     0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607     0xBF810000
608 };
609 
610 static const uint32_t cached_cmd_gfx9[] = {
611 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614 	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
615 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617 	0xc0026900, 0x292, 0x20, 0x60201b8,
618 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619 };
620 
621 static const uint32_t cached_cmd_gfx10[] = {
622 	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623 	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624 	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625 	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626 	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627 	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628 	0xc0026900, 0x292, 0x20, 0x6020000,
629 	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630 };
631 
632 unsigned int memcpy_ps_hang[] = {
633         0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634         0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635         0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636         0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637         0xF800180F, 0x03020100, 0xBF810000
638 };
639 
640 struct amdgpu_test_shader {
641 	uint32_t *shader;
642 	uint32_t header_length;
643 	uint32_t body_length;
644 	uint32_t foot_length;
645 };
646 
647 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648     0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649     0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650 };
651 
652 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653         memcpy_cs_hang_slow_ai_codes,
654         4,
655         3,
656         1
657 };
658 
659 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660     0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661     0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662 };
663 
664 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665         memcpy_cs_hang_slow_rv_codes,
666         4,
667         3,
668         1
669 };
670 
671 unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672     0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673     0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674 };
675 
676 struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677         memcpy_cs_hang_slow_nv_codes,
678         4,
679         3,
680         1
681 };
682 
683 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684         0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685         0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686         0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687         0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688         0x03020100, 0xbf810000
689 };
690 
691 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692         memcpy_ps_hang_slow_ai_codes,
693         7,
694         2,
695         9
696 };
697 
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)698 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699 			unsigned alignment, unsigned heap, uint64_t alloc_flags,
700 			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701 			uint64_t *mc_address,
702 			amdgpu_va_handle *va_handle)
703 {
704 	struct amdgpu_bo_alloc_request request = {};
705 	amdgpu_bo_handle buf_handle;
706 	amdgpu_va_handle handle;
707 	uint64_t vmc_addr;
708 	int r;
709 
710 	request.alloc_size = size;
711 	request.phys_alignment = alignment;
712 	request.preferred_heap = heap;
713 	request.flags = alloc_flags;
714 
715 	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716 	if (r)
717 		return r;
718 
719 	r = amdgpu_va_range_alloc(dev,
720 				  amdgpu_gpu_va_range_general,
721 				  size, alignment, 0, &vmc_addr,
722 				  &handle, 0);
723 	if (r)
724 		goto error_va_alloc;
725 
726 	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
727 				   AMDGPU_VM_PAGE_READABLE |
728 				   AMDGPU_VM_PAGE_WRITEABLE |
729 				   AMDGPU_VM_PAGE_EXECUTABLE |
730 				   mapping_flags,
731 				   AMDGPU_VA_OP_MAP);
732 	if (r)
733 		goto error_va_map;
734 
735 	r = amdgpu_bo_cpu_map(buf_handle, cpu);
736 	if (r)
737 		goto error_cpu_map;
738 
739 	*bo = buf_handle;
740 	*mc_address = vmc_addr;
741 	*va_handle = handle;
742 
743 	return 0;
744 
745  error_cpu_map:
746 	amdgpu_bo_cpu_unmap(buf_handle);
747 
748  error_va_map:
749 	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750 
751  error_va_alloc:
752 	amdgpu_bo_free(buf_handle);
753 	return r;
754 }
755 
756 
757 
suite_basic_tests_enable(void)758 CU_BOOL suite_basic_tests_enable(void)
759 {
760 
761 	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762 					     &minor_version, &device_handle))
763 		return CU_FALSE;
764 
765 
766 	family_id = device_handle->info.family_id;
767 	chip_id = device_handle->info.chip_external_rev;
768 	chip_rev = device_handle->info.chip_rev;
769 
770 	if (amdgpu_device_deinitialize(device_handle))
771 		return CU_FALSE;
772 
773 	/* disable gfx engine basic test cases for some asics have no CPG */
774 	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775 		if (amdgpu_set_test_active("Basic Tests",
776 					"Command submission Test (GFX)",
777 					CU_FALSE))
778 			fprintf(stderr, "test deactivation failed - %s\n",
779 				CU_get_error_msg());
780 
781 		if (amdgpu_set_test_active("Basic Tests",
782 					"Command submission Test (Multi-Fence)",
783 					CU_FALSE))
784 			fprintf(stderr, "test deactivation failed - %s\n",
785 				CU_get_error_msg());
786 
787 		if (amdgpu_set_test_active("Basic Tests",
788 					"Sync dependency Test",
789 					CU_FALSE))
790 			fprintf(stderr, "test deactivation failed - %s\n",
791 				CU_get_error_msg());
792 	}
793 
794 	return CU_TRUE;
795 }
796 
suite_basic_tests_init(void)797 int suite_basic_tests_init(void)
798 {
799 	struct amdgpu_gpu_info gpu_info = {0};
800 	int r;
801 
802 	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803 				   &minor_version, &device_handle);
804 
805 	if (r) {
806 		if ((r == -EACCES) && (errno == EACCES))
807 			printf("\n\nError:%s. "
808 				"Hint:Try to run this test program as root.",
809 				strerror(errno));
810 		return CUE_SINIT_FAILED;
811 	}
812 
813 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814 	if (r)
815 		return CUE_SINIT_FAILED;
816 
817 	family_id = gpu_info.family_id;
818 
819 	return CUE_SUCCESS;
820 }
821 
suite_basic_tests_clean(void)822 int suite_basic_tests_clean(void)
823 {
824 	int r = amdgpu_device_deinitialize(device_handle);
825 
826 	if (r == 0)
827 		return CUE_SUCCESS;
828 	else
829 		return CUE_SCLEAN_FAILED;
830 }
831 
amdgpu_query_info_test(void)832 static void amdgpu_query_info_test(void)
833 {
834 	struct amdgpu_gpu_info gpu_info = {0};
835 	uint32_t version, feature;
836 	int r;
837 
838 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839 	CU_ASSERT_EQUAL(r, 0);
840 
841 	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842 					  0, &version, &feature);
843 	CU_ASSERT_EQUAL(r, 0);
844 }
845 
amdgpu_command_submission_gfx_separate_ibs(void)846 static void amdgpu_command_submission_gfx_separate_ibs(void)
847 {
848 	amdgpu_context_handle context_handle;
849 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850 	void *ib_result_cpu, *ib_result_ce_cpu;
851 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852 	struct amdgpu_cs_request ibs_request = {0};
853 	struct amdgpu_cs_ib_info ib_info[2];
854 	struct amdgpu_cs_fence fence_status = {0};
855 	uint32_t *ptr;
856 	uint32_t expired;
857 	amdgpu_bo_list_handle bo_list;
858 	amdgpu_va_handle va_handle, va_handle_ce;
859 	int r, i = 0;
860 	struct drm_amdgpu_info_hw_ip info;
861 
862 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
863 	CU_ASSERT_EQUAL(r, 0);
864 
865 	if (info.hw_ip_version_major >= 11)
866 		return;
867 
868 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
869 	CU_ASSERT_EQUAL(r, 0);
870 
871 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
872 				    AMDGPU_GEM_DOMAIN_GTT, 0,
873 				    &ib_result_handle, &ib_result_cpu,
874 				    &ib_result_mc_address, &va_handle);
875 	CU_ASSERT_EQUAL(r, 0);
876 
877 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
878 				    AMDGPU_GEM_DOMAIN_GTT, 0,
879 				    &ib_result_ce_handle, &ib_result_ce_cpu,
880 				    &ib_result_ce_mc_address, &va_handle_ce);
881 	CU_ASSERT_EQUAL(r, 0);
882 
883 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
884 			       ib_result_ce_handle, &bo_list);
885 	CU_ASSERT_EQUAL(r, 0);
886 
887 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
888 
889 	/* IT_SET_CE_DE_COUNTERS */
890 	ptr = ib_result_ce_cpu;
891 	if (family_id != AMDGPU_FAMILY_SI) {
892 		ptr[i++] = 0xc0008900;
893 		ptr[i++] = 0;
894 	}
895 	ptr[i++] = 0xc0008400;
896 	ptr[i++] = 1;
897 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
898 	ib_info[0].size = i;
899 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
900 
901 	/* IT_WAIT_ON_CE_COUNTER */
902 	ptr = ib_result_cpu;
903 	ptr[0] = 0xc0008600;
904 	ptr[1] = 0x00000001;
905 	ib_info[1].ib_mc_address = ib_result_mc_address;
906 	ib_info[1].size = 2;
907 
908 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
909 	ibs_request.number_of_ibs = 2;
910 	ibs_request.ibs = ib_info;
911 	ibs_request.resources = bo_list;
912 	ibs_request.fence_info.handle = NULL;
913 
914 	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
915 
916 	CU_ASSERT_EQUAL(r, 0);
917 
918 	fence_status.context = context_handle;
919 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
920 	fence_status.ip_instance = 0;
921 	fence_status.fence = ibs_request.seq_no;
922 
923 	r = amdgpu_cs_query_fence_status(&fence_status,
924 					 AMDGPU_TIMEOUT_INFINITE,
925 					 0, &expired);
926 	CU_ASSERT_EQUAL(r, 0);
927 
928 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
929 				     ib_result_mc_address, 4096);
930 	CU_ASSERT_EQUAL(r, 0);
931 
932 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
933 				     ib_result_ce_mc_address, 4096);
934 	CU_ASSERT_EQUAL(r, 0);
935 
936 	r = amdgpu_bo_list_destroy(bo_list);
937 	CU_ASSERT_EQUAL(r, 0);
938 
939 	r = amdgpu_cs_ctx_free(context_handle);
940 	CU_ASSERT_EQUAL(r, 0);
941 
942 }
943 
amdgpu_command_submission_gfx_shared_ib(void)944 static void amdgpu_command_submission_gfx_shared_ib(void)
945 {
946 	amdgpu_context_handle context_handle;
947 	amdgpu_bo_handle ib_result_handle;
948 	void *ib_result_cpu;
949 	uint64_t ib_result_mc_address;
950 	struct amdgpu_cs_request ibs_request = {0};
951 	struct amdgpu_cs_ib_info ib_info[2];
952 	struct amdgpu_cs_fence fence_status = {0};
953 	uint32_t *ptr;
954 	uint32_t expired;
955 	amdgpu_bo_list_handle bo_list;
956 	amdgpu_va_handle va_handle;
957 	int r, i = 0;
958 	struct drm_amdgpu_info_hw_ip info;
959 
960 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
961 	CU_ASSERT_EQUAL(r, 0);
962 
963 	if (info.hw_ip_version_major >= 11)
964 		return;
965 
966 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
967 	CU_ASSERT_EQUAL(r, 0);
968 
969 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
970 				    AMDGPU_GEM_DOMAIN_GTT, 0,
971 				    &ib_result_handle, &ib_result_cpu,
972 				    &ib_result_mc_address, &va_handle);
973 	CU_ASSERT_EQUAL(r, 0);
974 
975 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
976 			       &bo_list);
977 	CU_ASSERT_EQUAL(r, 0);
978 
979 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
980 
981 	/* IT_SET_CE_DE_COUNTERS */
982 	ptr = ib_result_cpu;
983 	if (family_id != AMDGPU_FAMILY_SI) {
984 		ptr[i++] = 0xc0008900;
985 		ptr[i++] = 0;
986 	}
987 	ptr[i++] = 0xc0008400;
988 	ptr[i++] = 1;
989 	ib_info[0].ib_mc_address = ib_result_mc_address;
990 	ib_info[0].size = i;
991 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
992 
993 	ptr = (uint32_t *)ib_result_cpu + 4;
994 	ptr[0] = 0xc0008600;
995 	ptr[1] = 0x00000001;
996 	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
997 	ib_info[1].size = 2;
998 
999 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1000 	ibs_request.number_of_ibs = 2;
1001 	ibs_request.ibs = ib_info;
1002 	ibs_request.resources = bo_list;
1003 	ibs_request.fence_info.handle = NULL;
1004 
1005 	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
1006 
1007 	CU_ASSERT_EQUAL(r, 0);
1008 
1009 	fence_status.context = context_handle;
1010 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
1011 	fence_status.ip_instance = 0;
1012 	fence_status.fence = ibs_request.seq_no;
1013 
1014 	r = amdgpu_cs_query_fence_status(&fence_status,
1015 					 AMDGPU_TIMEOUT_INFINITE,
1016 					 0, &expired);
1017 	CU_ASSERT_EQUAL(r, 0);
1018 
1019 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1020 				     ib_result_mc_address, 4096);
1021 	CU_ASSERT_EQUAL(r, 0);
1022 
1023 	r = amdgpu_bo_list_destroy(bo_list);
1024 	CU_ASSERT_EQUAL(r, 0);
1025 
1026 	r = amdgpu_cs_ctx_free(context_handle);
1027 	CU_ASSERT_EQUAL(r, 0);
1028 }
1029 
amdgpu_command_submission_gfx_cp_write_data(void)1030 static void amdgpu_command_submission_gfx_cp_write_data(void)
1031 {
1032 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1033 }
1034 
amdgpu_command_submission_gfx_cp_const_fill(void)1035 static void amdgpu_command_submission_gfx_cp_const_fill(void)
1036 {
1037 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1038 }
1039 
amdgpu_command_submission_gfx_cp_copy_data(void)1040 static void amdgpu_command_submission_gfx_cp_copy_data(void)
1041 {
1042 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1043 }
1044 
amdgpu_bo_eviction_test(void)1045 static void amdgpu_bo_eviction_test(void)
1046 {
1047 	const int sdma_write_length = 1024;
1048 	const int pm4_dw = 256;
1049 	amdgpu_context_handle context_handle;
1050 	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1051 	amdgpu_bo_handle *resources;
1052 	uint32_t *pm4;
1053 	struct amdgpu_cs_ib_info *ib_info;
1054 	struct amdgpu_cs_request *ibs_request;
1055 	uint64_t bo1_mc, bo2_mc;
1056 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1057 	int i, j, r, loop1, loop2;
1058 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1059 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1060 	struct amdgpu_heap_info vram_info, gtt_info;
1061 
1062 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1063 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1064 
1065 	ib_info = calloc(1, sizeof(*ib_info));
1066 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1067 
1068 	ibs_request = calloc(1, sizeof(*ibs_request));
1069 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1070 
1071 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1072 	CU_ASSERT_EQUAL(r, 0);
1073 
1074 	/* prepare resource */
1075 	resources = calloc(4, sizeof(amdgpu_bo_handle));
1076 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1077 
1078 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1079 				   0, &vram_info);
1080 	CU_ASSERT_EQUAL(r, 0);
1081 
1082 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1083 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1084 	CU_ASSERT_EQUAL(r, 0);
1085 	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1086 				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1087 	CU_ASSERT_EQUAL(r, 0);
1088 
1089 	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1090 				   0, &gtt_info);
1091 	CU_ASSERT_EQUAL(r, 0);
1092 
1093 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1094 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
1095 	CU_ASSERT_EQUAL(r, 0);
1096 	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1097 				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
1098 	CU_ASSERT_EQUAL(r, 0);
1099 
1100 
1101 
1102 	loop1 = loop2 = 0;
1103 	/* run 9 circle to test all mapping combination */
1104 	while(loop1 < 2) {
1105 		while(loop2 < 2) {
1106 			/* allocate UC bo1for sDMA use */
1107 			r = amdgpu_bo_alloc_and_map(device_handle,
1108 						    sdma_write_length, 4096,
1109 						    AMDGPU_GEM_DOMAIN_GTT,
1110 						    gtt_flags[loop1], &bo1,
1111 						    (void**)&bo1_cpu, &bo1_mc,
1112 						    &bo1_va_handle);
1113 			CU_ASSERT_EQUAL(r, 0);
1114 
1115 			/* set bo1 */
1116 			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1117 
1118 			/* allocate UC bo2 for sDMA use */
1119 			r = amdgpu_bo_alloc_and_map(device_handle,
1120 						    sdma_write_length, 4096,
1121 						    AMDGPU_GEM_DOMAIN_GTT,
1122 						    gtt_flags[loop2], &bo2,
1123 						    (void**)&bo2_cpu, &bo2_mc,
1124 						    &bo2_va_handle);
1125 			CU_ASSERT_EQUAL(r, 0);
1126 
1127 			/* clear bo2 */
1128 			memset((void*)bo2_cpu, 0, sdma_write_length);
1129 
1130 			resources[0] = bo1;
1131 			resources[1] = bo2;
1132 			resources[2] = vram_max[loop2];
1133 			resources[3] = gtt_max[loop2];
1134 
1135 			/* fulfill PM4: test DMA copy linear */
1136 			i = j = 0;
1137 			if (family_id == AMDGPU_FAMILY_SI) {
1138 				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1139 							  sdma_write_length);
1140 				pm4[i++] = 0xffffffff & bo2_mc;
1141 				pm4[i++] = 0xffffffff & bo1_mc;
1142 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1143 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1144 			} else {
1145 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1146 				if (family_id >= AMDGPU_FAMILY_AI)
1147 					pm4[i++] = sdma_write_length - 1;
1148 				else
1149 					pm4[i++] = sdma_write_length;
1150 				pm4[i++] = 0;
1151 				pm4[i++] = 0xffffffff & bo1_mc;
1152 				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1153 				pm4[i++] = 0xffffffff & bo2_mc;
1154 				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1155 			}
1156 
1157 			amdgpu_test_exec_cs_helper(context_handle,
1158 						   AMDGPU_HW_IP_DMA, 0,
1159 						   i, pm4,
1160 						   4, resources,
1161 						   ib_info, ibs_request);
1162 
1163 			/* verify if SDMA test result meets with expected */
1164 			i = 0;
1165 			while(i < sdma_write_length) {
1166 				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1167 			}
1168 			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1169 						     sdma_write_length);
1170 			CU_ASSERT_EQUAL(r, 0);
1171 			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1172 						     sdma_write_length);
1173 			CU_ASSERT_EQUAL(r, 0);
1174 			loop2++;
1175 		}
1176 		loop2 = 0;
1177 		loop1++;
1178 	}
1179 	amdgpu_bo_free(vram_max[0]);
1180 	amdgpu_bo_free(vram_max[1]);
1181 	amdgpu_bo_free(gtt_max[0]);
1182 	amdgpu_bo_free(gtt_max[1]);
1183 	/* clean resources */
1184 	free(resources);
1185 	free(ibs_request);
1186 	free(ib_info);
1187 	free(pm4);
1188 
1189 	/* end of test */
1190 	r = amdgpu_cs_ctx_free(context_handle);
1191 	CU_ASSERT_EQUAL(r, 0);
1192 }
1193 
1194 
amdgpu_command_submission_gfx(void)1195 static void amdgpu_command_submission_gfx(void)
1196 {
1197 	/* write data using the CP */
1198 	amdgpu_command_submission_gfx_cp_write_data();
1199 	/* const fill using the CP */
1200 	amdgpu_command_submission_gfx_cp_const_fill();
1201 	/* copy data using the CP */
1202 	amdgpu_command_submission_gfx_cp_copy_data();
1203 	/* separate IB buffers for multi-IB submission */
1204 	amdgpu_command_submission_gfx_separate_ibs();
1205 	/* shared IB buffer for multi-IB submission */
1206 	amdgpu_command_submission_gfx_shared_ib();
1207 }
1208 
amdgpu_semaphore_test(void)1209 static void amdgpu_semaphore_test(void)
1210 {
1211 	amdgpu_context_handle context_handle[2];
1212 	amdgpu_semaphore_handle sem;
1213 	amdgpu_bo_handle ib_result_handle[2];
1214 	void *ib_result_cpu[2];
1215 	uint64_t ib_result_mc_address[2];
1216 	struct amdgpu_cs_request ibs_request[2] = {0};
1217 	struct amdgpu_cs_ib_info ib_info[2] = {0};
1218 	struct amdgpu_cs_fence fence_status = {0};
1219 	uint32_t *ptr;
1220 	uint32_t expired;
1221 	uint32_t sdma_nop, gfx_nop;
1222 	amdgpu_bo_list_handle bo_list[2];
1223 	amdgpu_va_handle va_handle[2];
1224 	int r, i;
1225 	struct amdgpu_gpu_info gpu_info = {0};
1226 	unsigned gc_ip_type;
1227 
1228 	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1229 	CU_ASSERT_EQUAL(r, 0);
1230 
1231 	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1232 			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1233 
1234 	if (family_id == AMDGPU_FAMILY_SI) {
1235 		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1236 		gfx_nop = GFX_COMPUTE_NOP_SI;
1237 	} else {
1238 		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1239 		gfx_nop = GFX_COMPUTE_NOP;
1240 	}
1241 
1242 	r = amdgpu_cs_create_semaphore(&sem);
1243 	CU_ASSERT_EQUAL(r, 0);
1244 	for (i = 0; i < 2; i++) {
1245 		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1246 		CU_ASSERT_EQUAL(r, 0);
1247 
1248 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1249 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1250 					    &ib_result_handle[i], &ib_result_cpu[i],
1251 					    &ib_result_mc_address[i], &va_handle[i]);
1252 		CU_ASSERT_EQUAL(r, 0);
1253 
1254 		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1255 				       NULL, &bo_list[i]);
1256 		CU_ASSERT_EQUAL(r, 0);
1257 	}
1258 
1259 	/* 1. same context different engine */
1260 	ptr = ib_result_cpu[0];
1261 	ptr[0] = sdma_nop;
1262 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1263 	ib_info[0].size = 1;
1264 
1265 	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1266 	ibs_request[0].number_of_ibs = 1;
1267 	ibs_request[0].ibs = &ib_info[0];
1268 	ibs_request[0].resources = bo_list[0];
1269 	ibs_request[0].fence_info.handle = NULL;
1270 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1271 	CU_ASSERT_EQUAL(r, 0);
1272 	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1273 	CU_ASSERT_EQUAL(r, 0);
1274 
1275 	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1276 	CU_ASSERT_EQUAL(r, 0);
1277 	ptr = ib_result_cpu[1];
1278 	ptr[0] = gfx_nop;
1279 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1280 	ib_info[1].size = 1;
1281 
1282 	ibs_request[1].ip_type = gc_ip_type;
1283 	ibs_request[1].number_of_ibs = 1;
1284 	ibs_request[1].ibs = &ib_info[1];
1285 	ibs_request[1].resources = bo_list[1];
1286 	ibs_request[1].fence_info.handle = NULL;
1287 
1288 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1289 	CU_ASSERT_EQUAL(r, 0);
1290 
1291 	fence_status.context = context_handle[0];
1292 	fence_status.ip_type = gc_ip_type;
1293 	fence_status.ip_instance = 0;
1294 	fence_status.fence = ibs_request[1].seq_no;
1295 	r = amdgpu_cs_query_fence_status(&fence_status,
1296 					 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
1297 	CU_ASSERT_EQUAL(r, 0);
1298 	CU_ASSERT_EQUAL(expired, true);
1299 
1300 	/* 2. same engine different context */
1301 	ptr = ib_result_cpu[0];
1302 	ptr[0] = gfx_nop;
1303 	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1304 	ib_info[0].size = 1;
1305 
1306 	ibs_request[0].ip_type = gc_ip_type;
1307 	ibs_request[0].number_of_ibs = 1;
1308 	ibs_request[0].ibs = &ib_info[0];
1309 	ibs_request[0].resources = bo_list[0];
1310 	ibs_request[0].fence_info.handle = NULL;
1311 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1312 	CU_ASSERT_EQUAL(r, 0);
1313 	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1314 	CU_ASSERT_EQUAL(r, 0);
1315 
1316 	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1317 	CU_ASSERT_EQUAL(r, 0);
1318 	ptr = ib_result_cpu[1];
1319 	ptr[0] = gfx_nop;
1320 	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1321 	ib_info[1].size = 1;
1322 
1323 	ibs_request[1].ip_type = gc_ip_type;
1324 	ibs_request[1].number_of_ibs = 1;
1325 	ibs_request[1].ibs = &ib_info[1];
1326 	ibs_request[1].resources = bo_list[1];
1327 	ibs_request[1].fence_info.handle = NULL;
1328 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1329 
1330 	CU_ASSERT_EQUAL(r, 0);
1331 
1332 	fence_status.context = context_handle[1];
1333 	fence_status.ip_type = gc_ip_type;
1334 	fence_status.ip_instance = 0;
1335 	fence_status.fence = ibs_request[1].seq_no;
1336 	r = amdgpu_cs_query_fence_status(&fence_status,
1337 					 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
1338 	CU_ASSERT_EQUAL(r, 0);
1339 	CU_ASSERT_EQUAL(expired, true);
1340 
1341 	for (i = 0; i < 2; i++) {
1342 		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1343 					     ib_result_mc_address[i], 4096);
1344 		CU_ASSERT_EQUAL(r, 0);
1345 
1346 		r = amdgpu_bo_list_destroy(bo_list[i]);
1347 		CU_ASSERT_EQUAL(r, 0);
1348 
1349 		r = amdgpu_cs_ctx_free(context_handle[i]);
1350 		CU_ASSERT_EQUAL(r, 0);
1351 	}
1352 
1353 	r = amdgpu_cs_destroy_semaphore(sem);
1354 	CU_ASSERT_EQUAL(r, 0);
1355 }
1356 
amdgpu_command_submission_compute_nop(void)1357 static void amdgpu_command_submission_compute_nop(void)
1358 {
1359 	amdgpu_context_handle context_handle;
1360 	amdgpu_bo_handle ib_result_handle;
1361 	void *ib_result_cpu;
1362 	uint64_t ib_result_mc_address;
1363 	struct amdgpu_cs_request ibs_request;
1364 	struct amdgpu_cs_ib_info ib_info;
1365 	struct amdgpu_cs_fence fence_status;
1366 	uint32_t *ptr;
1367 	uint32_t expired;
1368 	int r, instance;
1369 	amdgpu_bo_list_handle bo_list;
1370 	amdgpu_va_handle va_handle;
1371 	struct drm_amdgpu_info_hw_ip info;
1372 
1373 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1374 	CU_ASSERT_EQUAL(r, 0);
1375 
1376 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1377 	CU_ASSERT_EQUAL(r, 0);
1378 
1379 	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1380 		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1381 					    AMDGPU_GEM_DOMAIN_GTT, 0,
1382 					    &ib_result_handle, &ib_result_cpu,
1383 					    &ib_result_mc_address, &va_handle);
1384 		CU_ASSERT_EQUAL(r, 0);
1385 
1386 		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1387 				       &bo_list);
1388 		CU_ASSERT_EQUAL(r, 0);
1389 
1390 		ptr = ib_result_cpu;
1391 		memset(ptr, 0, 16);
1392 		ptr[0]=PACKET3(PACKET3_NOP, 14);
1393 
1394 		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1395 		ib_info.ib_mc_address = ib_result_mc_address;
1396 		ib_info.size = 16;
1397 
1398 		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1399 		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1400 		ibs_request.ring = instance;
1401 		ibs_request.number_of_ibs = 1;
1402 		ibs_request.ibs = &ib_info;
1403 		ibs_request.resources = bo_list;
1404 		ibs_request.fence_info.handle = NULL;
1405 
1406 		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1407 		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1408 		CU_ASSERT_EQUAL(r, 0);
1409 
1410 		fence_status.context = context_handle;
1411 		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1412 		fence_status.ip_instance = 0;
1413 		fence_status.ring = instance;
1414 		fence_status.fence = ibs_request.seq_no;
1415 
1416 		r = amdgpu_cs_query_fence_status(&fence_status,
1417 						 AMDGPU_TIMEOUT_INFINITE,
1418 						 0, &expired);
1419 		CU_ASSERT_EQUAL(r, 0);
1420 
1421 		r = amdgpu_bo_list_destroy(bo_list);
1422 		CU_ASSERT_EQUAL(r, 0);
1423 
1424 		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1425 					     ib_result_mc_address, 4096);
1426 		CU_ASSERT_EQUAL(r, 0);
1427 	}
1428 
1429 	r = amdgpu_cs_ctx_free(context_handle);
1430 	CU_ASSERT_EQUAL(r, 0);
1431 }
1432 
amdgpu_command_submission_compute_cp_write_data(void)1433 static void amdgpu_command_submission_compute_cp_write_data(void)
1434 {
1435 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1436 }
1437 
amdgpu_command_submission_compute_cp_const_fill(void)1438 static void amdgpu_command_submission_compute_cp_const_fill(void)
1439 {
1440 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1441 }
1442 
amdgpu_command_submission_compute_cp_copy_data(void)1443 static void amdgpu_command_submission_compute_cp_copy_data(void)
1444 {
1445 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1446 }
1447 
amdgpu_command_submission_compute(void)1448 static void amdgpu_command_submission_compute(void)
1449 {
1450 	/* write data using the CP */
1451 	amdgpu_command_submission_compute_cp_write_data();
1452 	/* const fill using the CP */
1453 	amdgpu_command_submission_compute_cp_const_fill();
1454 	/* copy data using the CP */
1455 	amdgpu_command_submission_compute_cp_copy_data();
1456 	/* nop test */
1457 	amdgpu_command_submission_compute_nop();
1458 }
1459 
1460 /*
1461  * caller need create/release:
1462  * pm4_src, resources, ib_info, and ibs_request
1463  * submit command stream described in ibs_request and wait for this IB accomplished
1464  */
1465 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1466 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1467 			       amdgpu_context_handle context_handle,
1468 			       unsigned ip_type, int instance, int pm4_dw,
1469 			       uint32_t *pm4_src, int res_cnt,
1470 			       amdgpu_bo_handle *resources,
1471 			       struct amdgpu_cs_ib_info *ib_info,
1472 			       struct amdgpu_cs_request *ibs_request,
1473 			       bool secure)
1474 {
1475 	int r;
1476 	uint32_t expired;
1477 	uint32_t *ring_ptr;
1478 	amdgpu_bo_handle ib_result_handle;
1479 	void *ib_result_cpu;
1480 	uint64_t ib_result_mc_address;
1481 	struct amdgpu_cs_fence fence_status = {0};
1482 	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1483 	amdgpu_va_handle va_handle;
1484 
1485 	/* prepare CS */
1486 	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1487 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1488 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1489 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1490 	CU_ASSERT_TRUE(pm4_dw <= 1024);
1491 
1492 	/* allocate IB */
1493 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1494 				    AMDGPU_GEM_DOMAIN_GTT, 0,
1495 				    &ib_result_handle, &ib_result_cpu,
1496 				    &ib_result_mc_address, &va_handle);
1497 	CU_ASSERT_EQUAL(r, 0);
1498 
1499 	/* copy PM4 packet to ring from caller */
1500 	ring_ptr = ib_result_cpu;
1501 	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1502 
1503 	ib_info->ib_mc_address = ib_result_mc_address;
1504 	ib_info->size = pm4_dw;
1505 	if (secure)
1506 		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1507 
1508 	ibs_request->ip_type = ip_type;
1509 	ibs_request->ring = instance;
1510 	ibs_request->number_of_ibs = 1;
1511 	ibs_request->ibs = ib_info;
1512 	ibs_request->fence_info.handle = NULL;
1513 
1514 	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1515 	all_res[res_cnt] = ib_result_handle;
1516 
1517 	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1518 				  NULL, &ibs_request->resources);
1519 	CU_ASSERT_EQUAL(r, 0);
1520 
1521 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1522 
1523 	/* submit CS */
1524 	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1525 	CU_ASSERT_EQUAL(r, 0);
1526 
1527 	r = amdgpu_bo_list_destroy(ibs_request->resources);
1528 	CU_ASSERT_EQUAL(r, 0);
1529 
1530 	fence_status.ip_type = ip_type;
1531 	fence_status.ip_instance = 0;
1532 	fence_status.ring = ibs_request->ring;
1533 	fence_status.context = context_handle;
1534 	fence_status.fence = ibs_request->seq_no;
1535 
1536 	/* wait for IB accomplished */
1537 	r = amdgpu_cs_query_fence_status(&fence_status,
1538 					 AMDGPU_TIMEOUT_INFINITE,
1539 					 0, &expired);
1540 	CU_ASSERT_EQUAL(r, 0);
1541 	CU_ASSERT_EQUAL(expired, true);
1542 
1543 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1544 				     ib_result_mc_address, 4096);
1545 	CU_ASSERT_EQUAL(r, 0);
1546 }
1547 
1548 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1549 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1550 			   unsigned ip_type, int instance, int pm4_dw,
1551 			   uint32_t *pm4_src, int res_cnt,
1552 			   amdgpu_bo_handle *resources,
1553 			   struct amdgpu_cs_ib_info *ib_info,
1554 			   struct amdgpu_cs_request *ibs_request)
1555 {
1556 	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1557 				       ip_type, instance, pm4_dw, pm4_src,
1558 				       res_cnt, resources, ib_info,
1559 				       ibs_request, false);
1560 }
1561 
1562 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1563 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1564 							  device, unsigned
1565 							  ip_type, bool secure)
1566 {
1567 	const int sdma_write_length = 128;
1568 	const int pm4_dw = 256;
1569 	amdgpu_context_handle context_handle;
1570 	amdgpu_bo_handle bo;
1571 	amdgpu_bo_handle *resources;
1572 	uint32_t *pm4;
1573 	struct amdgpu_cs_ib_info *ib_info;
1574 	struct amdgpu_cs_request *ibs_request;
1575 	uint64_t bo_mc;
1576 	volatile uint32_t *bo_cpu;
1577 	uint32_t bo_cpu_origin;
1578 	int i, j, r, loop, ring_id;
1579 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1580 	amdgpu_va_handle va_handle;
1581 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1582 
1583 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1584 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1585 
1586 	ib_info = calloc(1, sizeof(*ib_info));
1587 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1588 
1589 	ibs_request = calloc(1, sizeof(*ibs_request));
1590 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1591 
1592 	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1593 	CU_ASSERT_EQUAL(r, 0);
1594 
1595 	for (i = 0; secure && (i < 2); i++)
1596 		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1597 
1598 	r = amdgpu_cs_ctx_create(device, &context_handle);
1599 
1600 	CU_ASSERT_EQUAL(r, 0);
1601 
1602 	/* prepare resource */
1603 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1604 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1605 
1606 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1607 		loop = 0;
1608 		while(loop < 2) {
1609 			/* allocate UC bo for sDMA use */
1610 			r = amdgpu_bo_alloc_and_map(device,
1611 						    sdma_write_length * sizeof(uint32_t),
1612 						    4096, AMDGPU_GEM_DOMAIN_GTT,
1613 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1614 						    &bo_mc, &va_handle);
1615 			CU_ASSERT_EQUAL(r, 0);
1616 
1617 			/* clear bo */
1618 			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1619 
1620 			resources[0] = bo;
1621 
1622 			/* fulfill PM4: test DMA write-linear */
1623 			i = j = 0;
1624 			if (ip_type == AMDGPU_HW_IP_DMA) {
1625 				if (family_id == AMDGPU_FAMILY_SI)
1626 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1627 								  sdma_write_length);
1628 				else
1629 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1630 							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1631 							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1632 				pm4[i++] = 0xfffffffc & bo_mc;
1633 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1634 				if (family_id >= AMDGPU_FAMILY_AI)
1635 					pm4[i++] = sdma_write_length - 1;
1636 				else if (family_id != AMDGPU_FAMILY_SI)
1637 					pm4[i++] = sdma_write_length;
1638 				while(j++ < sdma_write_length)
1639 					pm4[i++] = 0xdeadbeaf;
1640 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1641 				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1642 				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1643 				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1644 				pm4[i++] = 0xfffffffc & bo_mc;
1645 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1646 				while(j++ < sdma_write_length)
1647 					pm4[i++] = 0xdeadbeaf;
1648 			}
1649 
1650 			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1651 						       ip_type, ring_id, i, pm4,
1652 						       1, resources, ib_info,
1653 						       ibs_request, secure);
1654 
1655 			/* verify if SDMA test result meets with expected */
1656 			i = 0;
1657 			if (!secure) {
1658 				while(i < sdma_write_length) {
1659 					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1660 				}
1661 			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1662 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1663 				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1664 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1665 				 * command, 1-loop_until_compare_satisfied.
1666 				 * single_pass_atomic, 0-lru
1667 				 * engine_sel, 0-micro_engine
1668 				 */
1669 				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1670 							ATOMIC_MEM_COMMAND(1) |
1671 							ATOMIC_MEM_CACHEPOLICAY(0) |
1672 							ATOMIC_MEM_ENGINESEL(0));
1673 				pm4[i++] = 0xfffffffc & bo_mc;
1674 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1675 				pm4[i++] = 0x12345678;
1676 				pm4[i++] = 0x0;
1677 				pm4[i++] = 0xdeadbeaf;
1678 				pm4[i++] = 0x0;
1679 				pm4[i++] = 0x100;
1680 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1681 							ip_type, ring_id, i, pm4,
1682 							1, resources, ib_info,
1683 							ibs_request, true);
1684 			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1685 				/* restore the bo_cpu to compare */
1686 				bo_cpu_origin = bo_cpu[0];
1687 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1688 				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1689 				 * loop, 1-loop_until_compare_satisfied.
1690 				 * single_pass_atomic, 0-lru
1691 				 */
1692 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1693 							       0,
1694 							       SDMA_ATOMIC_LOOP(1) |
1695 							       SDMA_ATOMIC_TMZ(1) |
1696 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1697 				pm4[i++] = 0xfffffffc & bo_mc;
1698 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1699 				pm4[i++] = 0x12345678;
1700 				pm4[i++] = 0x0;
1701 				pm4[i++] = 0xdeadbeaf;
1702 				pm4[i++] = 0x0;
1703 				pm4[i++] = 0x100;
1704 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1705 							ip_type, ring_id, i, pm4,
1706 							1, resources, ib_info,
1707 							ibs_request, true);
1708 				/* DMA's atomic behavir is unlike GFX
1709 				 * If the comparing data is not equal to destination data,
1710 				 * For GFX, loop again till gfx timeout(system hang).
1711 				 * For DMA, loop again till timer expired and then send interrupt.
1712 				 * So testcase can't use interrupt mechanism.
1713 				 * We take another way to verify. When the comparing data is not
1714 				 * equal to destination data, overwrite the source data to the destination
1715 				 * buffer. Otherwise, original destination data unchanged.
1716 				 * So if the bo_cpu data is overwritten, the result is passed.
1717 				 */
1718 				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1719 
1720 				/* compare again for the case of dest_data != cmp_data */
1721 				i = 0;
1722 				/* restore again, here dest_data should be */
1723 				bo_cpu_origin = bo_cpu[0];
1724 				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1725 				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1726 							       0,
1727 							       SDMA_ATOMIC_LOOP(1) |
1728 							       SDMA_ATOMIC_TMZ(1) |
1729 							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1730 				pm4[i++] = 0xfffffffc & bo_mc;
1731 				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1732 				pm4[i++] = 0x87654321;
1733 				pm4[i++] = 0x0;
1734 				pm4[i++] = 0xdeadbeaf;
1735 				pm4[i++] = 0x0;
1736 				pm4[i++] = 0x100;
1737 				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1738 							ip_type, ring_id, i, pm4,
1739 							1, resources, ib_info,
1740 							ibs_request, true);
1741 				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1742 				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1743 			}
1744 
1745 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1746 						     sdma_write_length * sizeof(uint32_t));
1747 			CU_ASSERT_EQUAL(r, 0);
1748 			loop++;
1749 		}
1750 	}
1751 	/* clean resources */
1752 	free(resources);
1753 	free(ibs_request);
1754 	free(ib_info);
1755 	free(pm4);
1756 
1757 	/* end of test */
1758 	r = amdgpu_cs_ctx_free(context_handle);
1759 	CU_ASSERT_EQUAL(r, 0);
1760 }
1761 
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1762 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1763 {
1764 	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1765 								  ip_type,
1766 								  false);
1767 }
1768 
amdgpu_command_submission_sdma_write_linear(void)1769 static void amdgpu_command_submission_sdma_write_linear(void)
1770 {
1771 	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1772 }
1773 
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1774 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1775 {
1776 	const int sdma_write_length = 1024 * 1024;
1777 	const int pm4_dw = 256;
1778 	amdgpu_context_handle context_handle;
1779 	amdgpu_bo_handle bo;
1780 	amdgpu_bo_handle *resources;
1781 	uint32_t *pm4;
1782 	struct amdgpu_cs_ib_info *ib_info;
1783 	struct amdgpu_cs_request *ibs_request;
1784 	uint64_t bo_mc;
1785 	volatile uint32_t *bo_cpu;
1786 	int i, j, r, loop, ring_id;
1787 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1788 	amdgpu_va_handle va_handle;
1789 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1790 
1791 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1792 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1793 
1794 	ib_info = calloc(1, sizeof(*ib_info));
1795 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1796 
1797 	ibs_request = calloc(1, sizeof(*ibs_request));
1798 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1799 
1800 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1801 	CU_ASSERT_EQUAL(r, 0);
1802 
1803 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1804 	CU_ASSERT_EQUAL(r, 0);
1805 
1806 	/* prepare resource */
1807 	resources = calloc(1, sizeof(amdgpu_bo_handle));
1808 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1809 
1810 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1811 		loop = 0;
1812 		while(loop < 2) {
1813 			/* allocate UC bo for sDMA use */
1814 			r = amdgpu_bo_alloc_and_map(device_handle,
1815 						    sdma_write_length, 4096,
1816 						    AMDGPU_GEM_DOMAIN_GTT,
1817 						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1818 						    &bo_mc, &va_handle);
1819 			CU_ASSERT_EQUAL(r, 0);
1820 
1821 			/* clear bo */
1822 			memset((void*)bo_cpu, 0, sdma_write_length);
1823 
1824 			resources[0] = bo;
1825 
1826 			/* fulfill PM4: test DMA const fill */
1827 			i = j = 0;
1828 			if (ip_type == AMDGPU_HW_IP_DMA) {
1829 				if (family_id == AMDGPU_FAMILY_SI) {
1830 					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1831 								  0, 0, 0,
1832 								  sdma_write_length / 4);
1833 					pm4[i++] = 0xfffffffc & bo_mc;
1834 					pm4[i++] = 0xdeadbeaf;
1835 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1836 				} else {
1837 					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1838 							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1839 					pm4[i++] = 0xffffffff & bo_mc;
1840 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1841 					pm4[i++] = 0xdeadbeaf;
1842 					if (family_id >= AMDGPU_FAMILY_AI)
1843 						pm4[i++] = sdma_write_length - 1;
1844 					else
1845 						pm4[i++] = sdma_write_length;
1846 				}
1847 			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1848 				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1849 				if (family_id == AMDGPU_FAMILY_SI) {
1850 					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1851 					pm4[i++] = 0xdeadbeaf;
1852 					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1853 						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1854 						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1855 						   PACKET3_DMA_DATA_SI_CP_SYNC;
1856 					pm4[i++] = 0xffffffff & bo_mc;
1857 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1858 					pm4[i++] = sdma_write_length;
1859 				} else {
1860 					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1861 					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1862 						   PACKET3_DMA_DATA_DST_SEL(0) |
1863 						   PACKET3_DMA_DATA_SRC_SEL(2) |
1864 						   PACKET3_DMA_DATA_CP_SYNC;
1865 					pm4[i++] = 0xdeadbeaf;
1866 					pm4[i++] = 0;
1867 					pm4[i++] = 0xfffffffc & bo_mc;
1868 					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1869 					pm4[i++] = sdma_write_length;
1870 				}
1871 			}
1872 
1873 			amdgpu_test_exec_cs_helper(context_handle,
1874 						   ip_type, ring_id,
1875 						   i, pm4,
1876 						   1, resources,
1877 						   ib_info, ibs_request);
1878 
1879 			/* verify if SDMA test result meets with expected */
1880 			i = 0;
1881 			while(i < (sdma_write_length / 4)) {
1882 				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1883 			}
1884 
1885 			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1886 						     sdma_write_length);
1887 			CU_ASSERT_EQUAL(r, 0);
1888 			loop++;
1889 		}
1890 	}
1891 	/* clean resources */
1892 	free(resources);
1893 	free(ibs_request);
1894 	free(ib_info);
1895 	free(pm4);
1896 
1897 	/* end of test */
1898 	r = amdgpu_cs_ctx_free(context_handle);
1899 	CU_ASSERT_EQUAL(r, 0);
1900 }
1901 
amdgpu_command_submission_sdma_const_fill(void)1902 static void amdgpu_command_submission_sdma_const_fill(void)
1903 {
1904 	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1905 }
1906 
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1907 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1908 {
1909 	const int sdma_write_length = 1024;
1910 	const int pm4_dw = 256;
1911 	amdgpu_context_handle context_handle;
1912 	amdgpu_bo_handle bo1, bo2;
1913 	amdgpu_bo_handle *resources;
1914 	uint32_t *pm4;
1915 	struct amdgpu_cs_ib_info *ib_info;
1916 	struct amdgpu_cs_request *ibs_request;
1917 	uint64_t bo1_mc, bo2_mc;
1918 	volatile unsigned char *bo1_cpu, *bo2_cpu;
1919 	int i, j, r, loop1, loop2, ring_id;
1920 	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1921 	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1922 	struct drm_amdgpu_info_hw_ip hw_ip_info;
1923 
1924 	pm4 = calloc(pm4_dw, sizeof(*pm4));
1925 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1926 
1927 	ib_info = calloc(1, sizeof(*ib_info));
1928 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1929 
1930 	ibs_request = calloc(1, sizeof(*ibs_request));
1931 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1932 
1933 	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1934 	CU_ASSERT_EQUAL(r, 0);
1935 
1936 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1937 	CU_ASSERT_EQUAL(r, 0);
1938 
1939 	/* prepare resource */
1940 	resources = calloc(2, sizeof(amdgpu_bo_handle));
1941 	CU_ASSERT_NOT_EQUAL(resources, NULL);
1942 
1943 	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1944 		loop1 = loop2 = 0;
1945 		/* run 9 circle to test all mapping combination */
1946 		while(loop1 < 2) {
1947 			while(loop2 < 2) {
1948 				/* allocate UC bo1for sDMA use */
1949 				r = amdgpu_bo_alloc_and_map(device_handle,
1950 							    sdma_write_length, 4096,
1951 							    AMDGPU_GEM_DOMAIN_GTT,
1952 							    gtt_flags[loop1], &bo1,
1953 							    (void**)&bo1_cpu, &bo1_mc,
1954 							    &bo1_va_handle);
1955 				CU_ASSERT_EQUAL(r, 0);
1956 
1957 				/* set bo1 */
1958 				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1959 
1960 				/* allocate UC bo2 for sDMA use */
1961 				r = amdgpu_bo_alloc_and_map(device_handle,
1962 							    sdma_write_length, 4096,
1963 							    AMDGPU_GEM_DOMAIN_GTT,
1964 							    gtt_flags[loop2], &bo2,
1965 							    (void**)&bo2_cpu, &bo2_mc,
1966 							    &bo2_va_handle);
1967 				CU_ASSERT_EQUAL(r, 0);
1968 
1969 				/* clear bo2 */
1970 				memset((void*)bo2_cpu, 0, sdma_write_length);
1971 
1972 				resources[0] = bo1;
1973 				resources[1] = bo2;
1974 
1975 				/* fulfill PM4: test DMA copy linear */
1976 				i = j = 0;
1977 				if (ip_type == AMDGPU_HW_IP_DMA) {
1978 					if (family_id == AMDGPU_FAMILY_SI) {
1979 						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1980 									  0, 0, 0,
1981 									  sdma_write_length);
1982 						pm4[i++] = 0xffffffff & bo2_mc;
1983 						pm4[i++] = 0xffffffff & bo1_mc;
1984 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1986 					} else {
1987 						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1988 								       SDMA_COPY_SUB_OPCODE_LINEAR,
1989 								       0);
1990 						if (family_id >= AMDGPU_FAMILY_AI)
1991 							pm4[i++] = sdma_write_length - 1;
1992 						else
1993 							pm4[i++] = sdma_write_length;
1994 						pm4[i++] = 0;
1995 						pm4[i++] = 0xffffffff & bo1_mc;
1996 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1997 						pm4[i++] = 0xffffffff & bo2_mc;
1998 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1999 					}
2000 				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
2001 					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
2002 					if (family_id == AMDGPU_FAMILY_SI) {
2003 						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
2004 						pm4[i++] = 0xfffffffc & bo1_mc;
2005 						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
2006 							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
2007 							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
2008 							   PACKET3_DMA_DATA_SI_CP_SYNC |
2009 							   (0xffff00000000 & bo1_mc) >> 32;
2010 						pm4[i++] = 0xfffffffc & bo2_mc;
2011 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2012 						pm4[i++] = sdma_write_length;
2013 					} else {
2014 						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2015 						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2016 							   PACKET3_DMA_DATA_DST_SEL(0) |
2017 							   PACKET3_DMA_DATA_SRC_SEL(0) |
2018 							   PACKET3_DMA_DATA_CP_SYNC;
2019 						pm4[i++] = 0xfffffffc & bo1_mc;
2020 						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2021 						pm4[i++] = 0xfffffffc & bo2_mc;
2022 						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2023 						pm4[i++] = sdma_write_length;
2024 					}
2025 				}
2026 
2027 				amdgpu_test_exec_cs_helper(context_handle,
2028 							   ip_type, ring_id,
2029 							   i, pm4,
2030 							   2, resources,
2031 							   ib_info, ibs_request);
2032 
2033 				/* verify if SDMA test result meets with expected */
2034 				i = 0;
2035 				while(i < sdma_write_length) {
2036 					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2037 				}
2038 				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2039 							     sdma_write_length);
2040 				CU_ASSERT_EQUAL(r, 0);
2041 				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2042 							     sdma_write_length);
2043 				CU_ASSERT_EQUAL(r, 0);
2044 				loop2++;
2045 			}
2046 			loop1++;
2047 		}
2048 	}
2049 	/* clean resources */
2050 	free(resources);
2051 	free(ibs_request);
2052 	free(ib_info);
2053 	free(pm4);
2054 
2055 	/* end of test */
2056 	r = amdgpu_cs_ctx_free(context_handle);
2057 	CU_ASSERT_EQUAL(r, 0);
2058 }
2059 
amdgpu_command_submission_sdma_copy_linear(void)2060 static void amdgpu_command_submission_sdma_copy_linear(void)
2061 {
2062 	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2063 }
2064 
amdgpu_command_submission_sdma(void)2065 static void amdgpu_command_submission_sdma(void)
2066 {
2067 	amdgpu_command_submission_sdma_write_linear();
2068 	amdgpu_command_submission_sdma_const_fill();
2069 	amdgpu_command_submission_sdma_copy_linear();
2070 }
2071 
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)2072 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2073 {
2074 	amdgpu_context_handle context_handle;
2075 	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2076 	void *ib_result_cpu, *ib_result_ce_cpu;
2077 	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2078 	struct amdgpu_cs_request ibs_request[2] = {0};
2079 	struct amdgpu_cs_ib_info ib_info[2];
2080 	struct amdgpu_cs_fence fence_status[2] = {0};
2081 	uint32_t *ptr;
2082 	uint32_t expired;
2083 	amdgpu_bo_list_handle bo_list;
2084 	amdgpu_va_handle va_handle, va_handle_ce;
2085 	int r;
2086 	int i = 0, ib_cs_num = 2;
2087 	struct drm_amdgpu_info_hw_ip info;
2088 
2089 	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2090 	CU_ASSERT_EQUAL(r, 0);
2091 
2092 	if (info.hw_ip_version_major >= 11)
2093 		return;
2094 
2095 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2096 	CU_ASSERT_EQUAL(r, 0);
2097 
2098 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2099 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2100 				    &ib_result_handle, &ib_result_cpu,
2101 				    &ib_result_mc_address, &va_handle);
2102 	CU_ASSERT_EQUAL(r, 0);
2103 
2104 	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2105 				    AMDGPU_GEM_DOMAIN_GTT, 0,
2106 				    &ib_result_ce_handle, &ib_result_ce_cpu,
2107 				    &ib_result_ce_mc_address, &va_handle_ce);
2108 	CU_ASSERT_EQUAL(r, 0);
2109 
2110 	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2111 			       ib_result_ce_handle, &bo_list);
2112 	CU_ASSERT_EQUAL(r, 0);
2113 
2114 	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2115 
2116 	/* IT_SET_CE_DE_COUNTERS */
2117 	ptr = ib_result_ce_cpu;
2118 	if (family_id != AMDGPU_FAMILY_SI) {
2119 		ptr[i++] = 0xc0008900;
2120 		ptr[i++] = 0;
2121 	}
2122 	ptr[i++] = 0xc0008400;
2123 	ptr[i++] = 1;
2124 	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2125 	ib_info[0].size = i;
2126 	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2127 
2128 	/* IT_WAIT_ON_CE_COUNTER */
2129 	ptr = ib_result_cpu;
2130 	ptr[0] = 0xc0008600;
2131 	ptr[1] = 0x00000001;
2132 	ib_info[1].ib_mc_address = ib_result_mc_address;
2133 	ib_info[1].size = 2;
2134 
2135 	for (i = 0; i < ib_cs_num; i++) {
2136 		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2137 		ibs_request[i].number_of_ibs = 2;
2138 		ibs_request[i].ibs = ib_info;
2139 		ibs_request[i].resources = bo_list;
2140 		ibs_request[i].fence_info.handle = NULL;
2141 	}
2142 
2143 	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2144 
2145 	CU_ASSERT_EQUAL(r, 0);
2146 
2147 	for (i = 0; i < ib_cs_num; i++) {
2148 		fence_status[i].context = context_handle;
2149 		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2150 		fence_status[i].fence = ibs_request[i].seq_no;
2151 	}
2152 
2153 	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2154 				AMDGPU_TIMEOUT_INFINITE,
2155 				&expired, NULL);
2156 	CU_ASSERT_EQUAL(r, 0);
2157 
2158 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2159 				     ib_result_mc_address, 4096);
2160 	CU_ASSERT_EQUAL(r, 0);
2161 
2162 	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2163 				     ib_result_ce_mc_address, 4096);
2164 	CU_ASSERT_EQUAL(r, 0);
2165 
2166 	r = amdgpu_bo_list_destroy(bo_list);
2167 	CU_ASSERT_EQUAL(r, 0);
2168 
2169 	r = amdgpu_cs_ctx_free(context_handle);
2170 	CU_ASSERT_EQUAL(r, 0);
2171 }
2172 
amdgpu_command_submission_multi_fence(void)2173 static void amdgpu_command_submission_multi_fence(void)
2174 {
2175 	amdgpu_command_submission_multi_fence_wait_all(true);
2176 	amdgpu_command_submission_multi_fence_wait_all(false);
2177 }
2178 
amdgpu_userptr_test(void)2179 static void amdgpu_userptr_test(void)
2180 {
2181 	int i, r, j;
2182 	uint32_t *pm4 = NULL;
2183 	uint64_t bo_mc;
2184 	void *ptr = NULL;
2185 	int pm4_dw = 256;
2186 	int sdma_write_length = 4;
2187 	amdgpu_bo_handle handle;
2188 	amdgpu_context_handle context_handle;
2189 	struct amdgpu_cs_ib_info *ib_info;
2190 	struct amdgpu_cs_request *ibs_request;
2191 	amdgpu_bo_handle buf_handle;
2192 	amdgpu_va_handle va_handle;
2193 
2194 	pm4 = calloc(pm4_dw, sizeof(*pm4));
2195 	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2196 
2197 	ib_info = calloc(1, sizeof(*ib_info));
2198 	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2199 
2200 	ibs_request = calloc(1, sizeof(*ibs_request));
2201 	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2202 
2203 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2204 	CU_ASSERT_EQUAL(r, 0);
2205 
2206 	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2207 	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2208 	memset(ptr, 0, BUFFER_SIZE);
2209 
2210 	r = amdgpu_create_bo_from_user_mem(device_handle,
2211 					   ptr, BUFFER_SIZE, &buf_handle);
2212 	CU_ASSERT_EQUAL(r, 0);
2213 
2214 	r = amdgpu_va_range_alloc(device_handle,
2215 				  amdgpu_gpu_va_range_general,
2216 				  BUFFER_SIZE, 1, 0, &bo_mc,
2217 				  &va_handle, 0);
2218 	CU_ASSERT_EQUAL(r, 0);
2219 
2220 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2221 	CU_ASSERT_EQUAL(r, 0);
2222 
2223 	handle = buf_handle;
2224 
2225 	j = i = 0;
2226 
2227 	if (family_id == AMDGPU_FAMILY_SI)
2228 		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2229 				sdma_write_length);
2230 	else
2231 		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2232 				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2233 	pm4[i++] = 0xffffffff & bo_mc;
2234 	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2235 	if (family_id >= AMDGPU_FAMILY_AI)
2236 		pm4[i++] = sdma_write_length - 1;
2237 	else if (family_id != AMDGPU_FAMILY_SI)
2238 		pm4[i++] = sdma_write_length;
2239 
2240 	while (j++ < sdma_write_length)
2241 		pm4[i++] = 0xdeadbeaf;
2242 
2243 	if (!fork()) {
2244 		pm4[0] = 0x0;
2245 		exit(0);
2246 	}
2247 
2248 	amdgpu_test_exec_cs_helper(context_handle,
2249 				   AMDGPU_HW_IP_DMA, 0,
2250 				   i, pm4,
2251 				   1, &handle,
2252 				   ib_info, ibs_request);
2253 	i = 0;
2254 	while (i < sdma_write_length) {
2255 		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2256 	}
2257 	free(ibs_request);
2258 	free(ib_info);
2259 	free(pm4);
2260 
2261 	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2262 	CU_ASSERT_EQUAL(r, 0);
2263 	r = amdgpu_va_range_free(va_handle);
2264 	CU_ASSERT_EQUAL(r, 0);
2265 	r = amdgpu_bo_free(buf_handle);
2266 	CU_ASSERT_EQUAL(r, 0);
2267 	free(ptr);
2268 
2269 	r = amdgpu_cs_ctx_free(context_handle);
2270 	CU_ASSERT_EQUAL(r, 0);
2271 
2272 	wait(NULL);
2273 }
2274 
amdgpu_sync_dependency_test(void)2275 static void amdgpu_sync_dependency_test(void)
2276 {
2277 	amdgpu_context_handle context_handle[2];
2278 	amdgpu_bo_handle ib_result_handle;
2279 	void *ib_result_cpu;
2280 	uint64_t ib_result_mc_address;
2281 	struct amdgpu_cs_request ibs_request;
2282 	struct amdgpu_cs_ib_info ib_info;
2283 	struct amdgpu_cs_fence fence_status;
2284 	uint32_t expired;
2285 	int i, j, r;
2286 	amdgpu_bo_list_handle bo_list;
2287 	amdgpu_va_handle va_handle;
2288 	static uint32_t *ptr;
2289 	uint64_t seq_no;
2290 
2291 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2292 	CU_ASSERT_EQUAL(r, 0);
2293 	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2294 	CU_ASSERT_EQUAL(r, 0);
2295 
2296 	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2297 			AMDGPU_GEM_DOMAIN_GTT, 0,
2298 						    &ib_result_handle, &ib_result_cpu,
2299 						    &ib_result_mc_address, &va_handle);
2300 	CU_ASSERT_EQUAL(r, 0);
2301 
2302 	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2303 			       &bo_list);
2304 	CU_ASSERT_EQUAL(r, 0);
2305 
2306 	ptr = ib_result_cpu;
2307 	i = 0;
2308 
2309 	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2310 
2311 	/* Dispatch minimal init config and verify it's executed */
2312 	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2313 	ptr[i++] = 0x80000000;
2314 	ptr[i++] = 0x80000000;
2315 
2316 	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2317 	ptr[i++] = 0x80000000;
2318 
2319 
2320 	/* Program compute regs */
2321 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2322 	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2323 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2324 	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2325 
2326 
2327 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2328 	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2329 	/*
2330 	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2331 	                                      SGPRS = 1
2332 	                                      PRIORITY = 0
2333 	                                      FLOAT_MODE = 192 (0xc0)
2334 	                                      PRIV = 0
2335 	                                      DX10_CLAMP = 1
2336 	                                      DEBUG_MODE = 0
2337 	                                      IEEE_MODE = 0
2338 	                                      BULKY = 0
2339 	                                      CDBG_USER = 0
2340 	 *
2341 	 */
2342 	ptr[i++] = 0x002c0040;
2343 
2344 
2345 	/*
2346 	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2347 	                                      USER_SGPR = 8
2348 	                                      TRAP_PRESENT = 0
2349 	                                      TGID_X_EN = 0
2350 	                                      TGID_Y_EN = 0
2351 	                                      TGID_Z_EN = 0
2352 	                                      TG_SIZE_EN = 0
2353 	                                      TIDIG_COMP_CNT = 0
2354 	                                      EXCP_EN_MSB = 0
2355 	                                      LDS_SIZE = 0
2356 	                                      EXCP_EN = 0
2357 	 *
2358 	 */
2359 	ptr[i++] = 0x00000010;
2360 
2361 
2362 /*
2363  * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2364                                          WAVESIZE = 0
2365  *
2366  */
2367 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2368 	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2369 	ptr[i++] = 0x00000100;
2370 
2371 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2372 	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2373 	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2374 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2375 
2376 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2377 	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2378 	ptr[i++] = 0;
2379 
2380 	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2381 	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2382 	ptr[i++] = 1;
2383 	ptr[i++] = 1;
2384 	ptr[i++] = 1;
2385 
2386 
2387 	/* Dispatch */
2388 	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2389 	ptr[i++] = 1;
2390 	ptr[i++] = 1;
2391 	ptr[i++] = 1;
2392 	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2393 
2394 
2395 	while (i & 7)
2396 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2397 
2398 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2399 	ib_info.ib_mc_address = ib_result_mc_address;
2400 	ib_info.size = i;
2401 
2402 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2403 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2404 	ibs_request.ring = 0;
2405 	ibs_request.number_of_ibs = 1;
2406 	ibs_request.ibs = &ib_info;
2407 	ibs_request.resources = bo_list;
2408 	ibs_request.fence_info.handle = NULL;
2409 
2410 	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2411 	CU_ASSERT_EQUAL(r, 0);
2412 	seq_no = ibs_request.seq_no;
2413 
2414 
2415 
2416 	/* Prepare second command with dependency on the first */
2417 	j = i;
2418 	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2419 	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2420 	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2421 	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2422 	ptr[i++] = 99;
2423 
2424 	while (i & 7)
2425 		ptr[i++] =  0xffff1000; /* type3 nop packet */
2426 
2427 	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2428 	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2429 	ib_info.size = i - j;
2430 
2431 	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2432 	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2433 	ibs_request.ring = 0;
2434 	ibs_request.number_of_ibs = 1;
2435 	ibs_request.ibs = &ib_info;
2436 	ibs_request.resources = bo_list;
2437 	ibs_request.fence_info.handle = NULL;
2438 
2439 	ibs_request.number_of_dependencies = 1;
2440 
2441 	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2442 	ibs_request.dependencies[0].context = context_handle[1];
2443 	ibs_request.dependencies[0].ip_instance = 0;
2444 	ibs_request.dependencies[0].ring = 0;
2445 	ibs_request.dependencies[0].fence = seq_no;
2446 
2447 
2448 	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2449 	CU_ASSERT_EQUAL(r, 0);
2450 
2451 
2452 	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2453 	fence_status.context = context_handle[0];
2454 	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2455 	fence_status.ip_instance = 0;
2456 	fence_status.ring = 0;
2457 	fence_status.fence = ibs_request.seq_no;
2458 
2459 	r = amdgpu_cs_query_fence_status(&fence_status,
2460 		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2461 	CU_ASSERT_EQUAL(r, 0);
2462 
2463 	/* Expect the second command to wait for shader to complete */
2464 	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2465 
2466 	r = amdgpu_bo_list_destroy(bo_list);
2467 	CU_ASSERT_EQUAL(r, 0);
2468 
2469 	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2470 				     ib_result_mc_address, 4096);
2471 	CU_ASSERT_EQUAL(r, 0);
2472 
2473 	r = amdgpu_cs_ctx_free(context_handle[0]);
2474 	CU_ASSERT_EQUAL(r, 0);
2475 	r = amdgpu_cs_ctx_free(context_handle[1]);
2476 	CU_ASSERT_EQUAL(r, 0);
2477 
2478 	free(ibs_request.dependencies);
2479 }
2480 
amdgpu_compute_dispatch_test(void)2481 static void amdgpu_compute_dispatch_test(void)
2482 {
2483 	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
2484 }
amdgpu_gfx_dispatch_test(void)2485 static void amdgpu_gfx_dispatch_test(void)
2486 {
2487 	amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX);
2488 }
2489 
amdgpu_draw_test(void)2490 static void amdgpu_draw_test(void)
2491 {
2492 	amdgpu_test_draw_helper(device_handle);
2493 }
amdgpu_gpu_reset_test(void)2494 static void amdgpu_gpu_reset_test(void)
2495 {
2496 	int r;
2497 	char debugfs_path[256], tmp[10];
2498 	int fd;
2499 	struct stat sbuf;
2500 	amdgpu_context_handle context_handle;
2501 	uint32_t hang_state, hangs;
2502 
2503 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2504 	CU_ASSERT_EQUAL(r, 0);
2505 
2506 	r = fstat(drm_amdgpu[0], &sbuf);
2507 	CU_ASSERT_EQUAL(r, 0);
2508 
2509 	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
2510 	fd = open(debugfs_path, O_RDONLY);
2511 	CU_ASSERT(fd >= 0);
2512 
2513 	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
2514 	CU_ASSERT(r > 0);
2515 
2516 	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2517 	CU_ASSERT_EQUAL(r, 0);
2518 	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2519 
2520 	close(fd);
2521 	r = amdgpu_cs_ctx_free(context_handle);
2522 	CU_ASSERT_EQUAL(r, 0);
2523 
2524 	amdgpu_compute_dispatch_test();
2525 	amdgpu_gfx_dispatch_test();
2526 }
2527 
amdgpu_stable_pstate_test(void)2528 static void amdgpu_stable_pstate_test(void)
2529 {
2530 	int r;
2531 	amdgpu_context_handle context_handle;
2532 	uint32_t current_pstate = 0, new_pstate = 0;
2533 
2534 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2535 	CU_ASSERT_EQUAL(r, 0);
2536 
2537 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2538 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2539 					0, &current_pstate);
2540 	CU_ASSERT_EQUAL(r, 0);
2541 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
2542 
2543 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2544 					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
2545 					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
2546 	CU_ASSERT_EQUAL(r, 0);
2547 
2548 	r = amdgpu_cs_ctx_stable_pstate(context_handle,
2549 					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2550 					0, &new_pstate);
2551 	CU_ASSERT_EQUAL(r, 0);
2552 	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
2553 
2554 	r = amdgpu_cs_ctx_free(context_handle);
2555 	CU_ASSERT_EQUAL(r, 0);
2556 }
2557