1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <unistd.h>
27 #include <sys/types.h>
28 #ifdef MAJOR_IN_SYSMACROS
29 #include <sys/sysmacros.h>
30 #endif
31 #include <sys/stat.h>
32 #include <fcntl.h>
33 #if HAVE_ALLOCA_H
34 # include <alloca.h>
35 #endif
36 #include <sys/wait.h>
37
38 #include "CUnit/Basic.h"
39
40 #include "amdgpu_test.h"
41 #include "amdgpu_drm.h"
42 #include "amdgpu_internal.h"
43 #include "util_math.h"
44
45 static amdgpu_device_handle device_handle;
46 static uint32_t major_version;
47 static uint32_t minor_version;
48 static uint32_t family_id;
49 static uint32_t chip_id;
50 static uint32_t chip_rev;
51
52 static void amdgpu_query_info_test(void);
53 static void amdgpu_command_submission_gfx(void);
54 static void amdgpu_command_submission_compute(void);
55 static void amdgpu_command_submission_multi_fence(void);
56 static void amdgpu_command_submission_sdma(void);
57 static void amdgpu_userptr_test(void);
58 static void amdgpu_semaphore_test(void);
59 static void amdgpu_sync_dependency_test(void);
60 static void amdgpu_bo_eviction_test(void);
61 static void amdgpu_compute_dispatch_test(void);
62 static void amdgpu_gfx_dispatch_test(void);
63 static void amdgpu_draw_test(void);
64 static void amdgpu_gpu_reset_test(void);
65 static void amdgpu_stable_pstate_test(void);
66
67 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70 static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71 unsigned ip_type,
72 int instance, int pm4_dw, uint32_t *pm4_src,
73 int res_cnt, amdgpu_bo_handle *resources,
74 struct amdgpu_cs_ib_info *ib_info,
75 struct amdgpu_cs_request *ibs_request);
76
77 CU_TestInfo basic_tests[] = {
78 { "Query Info Test", amdgpu_query_info_test },
79 { "Userptr Test", amdgpu_userptr_test },
80 { "bo eviction Test", amdgpu_bo_eviction_test },
81 { "Command submission Test (GFX)", amdgpu_command_submission_gfx },
82 { "Command submission Test (Compute)", amdgpu_command_submission_compute },
83 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85 { "SW semaphore Test", amdgpu_semaphore_test },
86 { "Sync dependency Test", amdgpu_sync_dependency_test },
87 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test },
88 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test },
89 { "Draw Test", amdgpu_draw_test },
90 { "GPU reset Test", amdgpu_gpu_reset_test },
91 { "Stable pstate Test", amdgpu_stable_pstate_test },
92 CU_TEST_INFO_NULL,
93 };
94 #define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95 #define SDMA_PKT_HEADER_op_offset 0
96 #define SDMA_PKT_HEADER_op_mask 0x000000FF
97 #define SDMA_PKT_HEADER_op_shift 0
98 #define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99 #define SDMA_OPCODE_CONSTANT_FILL 11
100 # define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14)
101 /* 0 = byte fill
102 * 2 = DW fill
103 */
104 #define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \
105 (((sub_op) & 0xFF) << 8) | \
106 (((op) & 0xFF) << 0))
107 #define SDMA_OPCODE_WRITE 2
108 # define SDMA_WRITE_SUB_OPCODE_LINEAR 0
109 # define SDMA_WRTIE_SUB_OPCODE_TILED 1
110
111 #define SDMA_OPCODE_COPY 1
112 # define SDMA_COPY_SUB_OPCODE_LINEAR 0
113
114 #define SDMA_OPCODE_ATOMIC 10
115 # define SDMA_ATOMIC_LOOP(x) ((x) << 0)
116 /* 0 - single_pass_atomic.
117 * 1 - loop_until_compare_satisfied.
118 */
119 # define SDMA_ATOMIC_TMZ(x) ((x) << 2)
120 /* 0 - non-TMZ.
121 * 1 - TMZ.
122 */
123 # define SDMA_ATOMIC_OPCODE(x) ((x) << 9)
124 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125 * same as Packet 3
126 */
127
128 #define GFX_COMPUTE_NOP 0xffff1000
129 #define SDMA_NOP 0x0
130
131 /* PM4 */
132 #define PACKET_TYPE0 0
133 #define PACKET_TYPE1 1
134 #define PACKET_TYPE2 2
135 #define PACKET_TYPE3 3
136
137 #define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138 #define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139 #define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140 #define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141 #define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \
142 ((reg) & 0xFFFF) | \
143 ((n) & 0x3FFF) << 16)
144 #define CP_PACKET2 0x80000000
145 #define PACKET2_PAD_SHIFT 0
146 #define PACKET2_PAD_MASK (0x3fffffff << 0)
147
148 #define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149
150 #define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \
151 (((op) & 0xFF) << 8) | \
152 ((n) & 0x3FFF) << 16)
153 #define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154
155 /* Packet 3 types */
156 #define PACKET3_NOP 0x10
157
158 #define PACKET3_WRITE_DATA 0x37
159 #define WRITE_DATA_DST_SEL(x) ((x) << 8)
160 /* 0 - register
161 * 1 - memory (sync - via GRBM)
162 * 2 - gl2
163 * 3 - gds
164 * 4 - reserved
165 * 5 - memory (async - direct)
166 */
167 #define WR_ONE_ADDR (1 << 16)
168 #define WR_CONFIRM (1 << 20)
169 #define WRITE_DATA_CACHE_POLICY(x) ((x) << 25)
170 /* 0 - LRU
171 * 1 - Stream
172 */
173 #define WRITE_DATA_ENGINE_SEL(x) ((x) << 30)
174 /* 0 - me
175 * 1 - pfp
176 * 2 - ce
177 */
178
179 #define PACKET3_ATOMIC_MEM 0x1E
180 #define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
181 #define ATOMIC_MEM_COMMAND(x) ((x) << 8)
182 /* 0 - single_pass_atomic.
183 * 1 - loop_until_compare_satisfied.
184 */
185 #define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25)
186 /* 0 - lru.
187 * 1 - stream.
188 */
189 #define ATOMIC_MEM_ENGINESEL(x) ((x) << 30)
190 /* 0 - micro_engine.
191 */
192
193 #define PACKET3_DMA_DATA 0x50
194 /* 1. header
195 * 2. CONTROL
196 * 3. SRC_ADDR_LO or DATA [31:0]
197 * 4. SRC_ADDR_HI [31:0]
198 * 5. DST_ADDR_LO [31:0]
199 * 6. DST_ADDR_HI [7:0]
200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201 */
202 /* CONTROL */
203 # define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0)
204 /* 0 - ME
205 * 1 - PFP
206 */
207 # define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208 /* 0 - LRU
209 * 1 - Stream
210 * 2 - Bypass
211 */
212 # define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213 # define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20)
214 /* 0 - DST_ADDR using DAS
215 * 1 - GDS
216 * 3 - DST_ADDR using L2
217 */
218 # define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219 /* 0 - LRU
220 * 1 - Stream
221 * 2 - Bypass
222 */
223 # define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224 # define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29)
225 /* 0 - SRC_ADDR using SAS
226 * 1 - GDS
227 * 2 - DATA
228 * 3 - SRC_ADDR using L2
229 */
230 # define PACKET3_DMA_DATA_CP_SYNC (1 << 31)
231 /* COMMAND */
232 # define PACKET3_DMA_DATA_DIS_WC (1 << 21)
233 # define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234 /* 0 - none
235 * 1 - 8 in 16
236 * 2 - 8 in 32
237 * 3 - 8 in 64
238 */
239 # define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240 /* 0 - none
241 * 1 - 8 in 16
242 * 2 - 8 in 32
243 * 3 - 8 in 64
244 */
245 # define PACKET3_DMA_DATA_CMD_SAS (1 << 26)
246 /* 0 - memory
247 * 1 - register
248 */
249 # define PACKET3_DMA_DATA_CMD_DAS (1 << 27)
250 /* 0 - memory
251 * 1 - register
252 */
253 # define PACKET3_DMA_DATA_CMD_SAIC (1 << 28)
254 # define PACKET3_DMA_DATA_CMD_DAIC (1 << 29)
255 # define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30)
256
257 #define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \
258 (((b) & 0x1) << 26) | \
259 (((t) & 0x1) << 23) | \
260 (((s) & 0x1) << 22) | \
261 (((cnt) & 0xFFFFF) << 0))
262 #define SDMA_OPCODE_COPY_SI 3
263 #define SDMA_OPCODE_CONSTANT_FILL_SI 13
264 #define SDMA_NOP_SI 0xf
265 #define GFX_COMPUTE_NOP_SI 0x80000000
266 #define PACKET3_DMA_DATA_SI 0x41
267 # define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27)
268 /* 0 - ME
269 * 1 - PFP
270 */
271 # define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20)
272 /* 0 - DST_ADDR using DAS
273 * 1 - GDS
274 * 3 - DST_ADDR using L2
275 */
276 # define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29)
277 /* 0 - SRC_ADDR using SAS
278 * 1 - GDS
279 * 2 - DATA
280 * 3 - SRC_ADDR using L2
281 */
282 # define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31)
283
284
285 #define PKT3_CONTEXT_CONTROL 0x28
286 #define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
287 #define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28)
288 #define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31)
289
290 #define PKT3_CLEAR_STATE 0x12
291
292 #define PKT3_SET_SH_REG 0x76
293 #define PACKET3_SET_SH_REG_START 0x00002c00
294
295 #define PKT3_SET_SH_REG_INDEX 0x9B
296
297 #define PACKET3_DISPATCH_DIRECT 0x15
298 #define PACKET3_EVENT_WRITE 0x46
299 #define PACKET3_ACQUIRE_MEM 0x58
300 #define PACKET3_SET_CONTEXT_REG 0x69
301 #define PACKET3_SET_UCONFIG_REG 0x79
302 #define PACKET3_DRAW_INDEX_AUTO 0x2D
303 /* gfx 8 */
304 #define mmCOMPUTE_PGM_LO 0x2e0c
305 #define mmCOMPUTE_PGM_RSRC1 0x2e12
306 #define mmCOMPUTE_TMPRING_SIZE 0x2e18
307 #define mmCOMPUTE_USER_DATA_0 0x2e40
308 #define mmCOMPUTE_USER_DATA_1 0x2e41
309 #define mmCOMPUTE_RESOURCE_LIMITS 0x2e15
310 #define mmCOMPUTE_NUM_THREAD_X 0x2e07
311
312
313
314 #define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315 ((num & 0x0000ff00) << 8) | \
316 ((num & 0x00ff0000) >> 8) | \
317 ((num & 0x000000ff) << 24))
318
319
320 /* Shader code
321 * void main()
322 {
323
324 float x = some_input;
325 for (unsigned i = 0; i < 1000000; i++)
326 x = sin(x);
327
328 u[0] = 42u;
329 }
330 */
331
332 static uint32_t shader_bin[] = {
333 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337 };
338
339 #define CODE_OFFSET 512
340 #define DATA_OFFSET 1024
341
342 enum cs_type {
343 CS_BUFFERCLEAR,
344 CS_BUFFERCOPY,
345 CS_HANG,
346 CS_HANG_SLOW
347 };
348
349 static const uint32_t bufferclear_cs_shader_gfx9[] = {
350 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353 0xbf810000
354 };
355
356 static const uint32_t bufferclear_cs_shader_gfx10[] = {
357 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359 0xBF810000
360 };
361
362 static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 },
364 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 },
365 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368 };
369
370 static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371
372 static const uint32_t buffercopy_cs_shader_gfx9[] = {
373 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375 0xe01c2000, 0x80010200, 0xbf810000
376 };
377
378 static const uint32_t buffercopy_cs_shader_gfx10[] = {
379 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381 };
382
383 static const uint32_t preamblecache_gfx9[] = {
384 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
390 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403 0xc0017900, 0x24b, 0x0
404 };
405
406 static const uint32_t preamblecache_gfx10[] = {
407 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430 };
431
432 enum ps_type {
433 PS_CONST,
434 PS_TEX,
435 PS_HANG,
436 PS_HANG_SLOW
437 };
438
439 static const uint32_t ps_const_shader_gfx9[] = {
440 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441 0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442 0xC4001C0F, 0x00000100, 0xBF810000
443 };
444
445 static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446
447 static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458 }
459 };
460
461 static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462 0x00000004
463 };
464
465 static const uint32_t ps_num_sh_registers_gfx9 = 2;
466
467 static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470 };
471
472 static const uint32_t ps_num_context_registers_gfx9 = 7;
473
474 static const uint32_t ps_const_context_reg_gfx9[][2] = {
475 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
476 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 },
477 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
478 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
479 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
480 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
481 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
482 };
483
484 static const uint32_t ps_const_shader_gfx10[] = {
485 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487 0xF8001C0F, 0x00000100, 0xBF810000
488 };
489
490 static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491
492 static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497 { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498 { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499 { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500 { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501 { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503 }
504 };
505
506 static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507 0x00000004
508 };
509
510 static const uint32_t ps_num_sh_registers_gfx10 = 2;
511
512 static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513 {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515 };
516
517 static const uint32_t ps_tex_shader_gfx9[] = {
518 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522 0x00000100, 0xBF810000
523 };
524
525 static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526 0x0000000B
527 };
528
529 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530
531 static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542 }
543 };
544
545 static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548 };
549
550 static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 },
552 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 },
553 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F },
554 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 },
555 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 },
556 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */},
557 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 }
558 };
559
560 static const uint32_t ps_tex_shader_gfx10[] = {
561 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565 0xF8001C0F, 0x00000100, 0xBF810000
566 };
567
568 static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569 0x0000000C
570 };
571
572 static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573
574 static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579 { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580 { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581 { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582 { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583 { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585 }
586 };
587
588 static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596 0xC400020F, 0x05060403, 0xBF810000
597 };
598
599 static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607 0xBF810000
608 };
609
610 static const uint32_t cached_cmd_gfx9[] = {
611 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12,
615 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617 0xc0026900, 0x292, 0x20, 0x60201b8,
618 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619 };
620
621 static const uint32_t cached_cmd_gfx10[] = {
622 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628 0xc0026900, 0x292, 0x20, 0x6020000,
629 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630 };
631
632 unsigned int memcpy_ps_hang[] = {
633 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637 0xF800180F, 0x03020100, 0xBF810000
638 };
639
640 struct amdgpu_test_shader {
641 uint32_t *shader;
642 uint32_t header_length;
643 uint32_t body_length;
644 uint32_t foot_length;
645 };
646
647 unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650 };
651
652 struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653 memcpy_cs_hang_slow_ai_codes,
654 4,
655 3,
656 1
657 };
658
659 unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662 };
663
664 struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665 memcpy_cs_hang_slow_rv_codes,
666 4,
667 3,
668 1
669 };
670
671 unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674 };
675
676 struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677 memcpy_cs_hang_slow_nv_codes,
678 4,
679 3,
680 1
681 };
682
683 unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688 0x03020100, 0xbf810000
689 };
690
691 struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692 memcpy_ps_hang_slow_ai_codes,
693 7,
694 2,
695 9
696 };
697
amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev,unsigned size,unsigned alignment,unsigned heap,uint64_t alloc_flags,uint64_t mapping_flags,amdgpu_bo_handle * bo,void ** cpu,uint64_t * mc_address,amdgpu_va_handle * va_handle)698 int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699 unsigned alignment, unsigned heap, uint64_t alloc_flags,
700 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701 uint64_t *mc_address,
702 amdgpu_va_handle *va_handle)
703 {
704 struct amdgpu_bo_alloc_request request = {};
705 amdgpu_bo_handle buf_handle;
706 amdgpu_va_handle handle;
707 uint64_t vmc_addr;
708 int r;
709
710 request.alloc_size = size;
711 request.phys_alignment = alignment;
712 request.preferred_heap = heap;
713 request.flags = alloc_flags;
714
715 r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716 if (r)
717 return r;
718
719 r = amdgpu_va_range_alloc(dev,
720 amdgpu_gpu_va_range_general,
721 size, alignment, 0, &vmc_addr,
722 &handle, 0);
723 if (r)
724 goto error_va_alloc;
725
726 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr,
727 AMDGPU_VM_PAGE_READABLE |
728 AMDGPU_VM_PAGE_WRITEABLE |
729 AMDGPU_VM_PAGE_EXECUTABLE |
730 mapping_flags,
731 AMDGPU_VA_OP_MAP);
732 if (r)
733 goto error_va_map;
734
735 r = amdgpu_bo_cpu_map(buf_handle, cpu);
736 if (r)
737 goto error_cpu_map;
738
739 *bo = buf_handle;
740 *mc_address = vmc_addr;
741 *va_handle = handle;
742
743 return 0;
744
745 error_cpu_map:
746 amdgpu_bo_cpu_unmap(buf_handle);
747
748 error_va_map:
749 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750
751 error_va_alloc:
752 amdgpu_bo_free(buf_handle);
753 return r;
754 }
755
756
757
suite_basic_tests_enable(void)758 CU_BOOL suite_basic_tests_enable(void)
759 {
760
761 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762 &minor_version, &device_handle))
763 return CU_FALSE;
764
765
766 family_id = device_handle->info.family_id;
767 chip_id = device_handle->info.chip_external_rev;
768 chip_rev = device_handle->info.chip_rev;
769
770 if (amdgpu_device_deinitialize(device_handle))
771 return CU_FALSE;
772
773 /* disable gfx engine basic test cases for some asics have no CPG */
774 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775 if (amdgpu_set_test_active("Basic Tests",
776 "Command submission Test (GFX)",
777 CU_FALSE))
778 fprintf(stderr, "test deactivation failed - %s\n",
779 CU_get_error_msg());
780
781 if (amdgpu_set_test_active("Basic Tests",
782 "Command submission Test (Multi-Fence)",
783 CU_FALSE))
784 fprintf(stderr, "test deactivation failed - %s\n",
785 CU_get_error_msg());
786
787 if (amdgpu_set_test_active("Basic Tests",
788 "Sync dependency Test",
789 CU_FALSE))
790 fprintf(stderr, "test deactivation failed - %s\n",
791 CU_get_error_msg());
792 }
793
794 return CU_TRUE;
795 }
796
suite_basic_tests_init(void)797 int suite_basic_tests_init(void)
798 {
799 struct amdgpu_gpu_info gpu_info = {0};
800 int r;
801
802 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803 &minor_version, &device_handle);
804
805 if (r) {
806 if ((r == -EACCES) && (errno == EACCES))
807 printf("\n\nError:%s. "
808 "Hint:Try to run this test program as root.",
809 strerror(errno));
810 return CUE_SINIT_FAILED;
811 }
812
813 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814 if (r)
815 return CUE_SINIT_FAILED;
816
817 family_id = gpu_info.family_id;
818
819 return CUE_SUCCESS;
820 }
821
suite_basic_tests_clean(void)822 int suite_basic_tests_clean(void)
823 {
824 int r = amdgpu_device_deinitialize(device_handle);
825
826 if (r == 0)
827 return CUE_SUCCESS;
828 else
829 return CUE_SCLEAN_FAILED;
830 }
831
amdgpu_query_info_test(void)832 static void amdgpu_query_info_test(void)
833 {
834 struct amdgpu_gpu_info gpu_info = {0};
835 uint32_t version, feature;
836 int r;
837
838 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839 CU_ASSERT_EQUAL(r, 0);
840
841 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842 0, &version, &feature);
843 CU_ASSERT_EQUAL(r, 0);
844 }
845
amdgpu_command_submission_gfx_separate_ibs(void)846 static void amdgpu_command_submission_gfx_separate_ibs(void)
847 {
848 amdgpu_context_handle context_handle;
849 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850 void *ib_result_cpu, *ib_result_ce_cpu;
851 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852 struct amdgpu_cs_request ibs_request = {0};
853 struct amdgpu_cs_ib_info ib_info[2];
854 struct amdgpu_cs_fence fence_status = {0};
855 uint32_t *ptr;
856 uint32_t expired;
857 amdgpu_bo_list_handle bo_list;
858 amdgpu_va_handle va_handle, va_handle_ce;
859 int r, i = 0;
860 struct drm_amdgpu_info_hw_ip info;
861
862 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
863 CU_ASSERT_EQUAL(r, 0);
864
865 if (info.hw_ip_version_major >= 11)
866 return;
867
868 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
869 CU_ASSERT_EQUAL(r, 0);
870
871 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
872 AMDGPU_GEM_DOMAIN_GTT, 0,
873 &ib_result_handle, &ib_result_cpu,
874 &ib_result_mc_address, &va_handle);
875 CU_ASSERT_EQUAL(r, 0);
876
877 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
878 AMDGPU_GEM_DOMAIN_GTT, 0,
879 &ib_result_ce_handle, &ib_result_ce_cpu,
880 &ib_result_ce_mc_address, &va_handle_ce);
881 CU_ASSERT_EQUAL(r, 0);
882
883 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
884 ib_result_ce_handle, &bo_list);
885 CU_ASSERT_EQUAL(r, 0);
886
887 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
888
889 /* IT_SET_CE_DE_COUNTERS */
890 ptr = ib_result_ce_cpu;
891 if (family_id != AMDGPU_FAMILY_SI) {
892 ptr[i++] = 0xc0008900;
893 ptr[i++] = 0;
894 }
895 ptr[i++] = 0xc0008400;
896 ptr[i++] = 1;
897 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
898 ib_info[0].size = i;
899 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
900
901 /* IT_WAIT_ON_CE_COUNTER */
902 ptr = ib_result_cpu;
903 ptr[0] = 0xc0008600;
904 ptr[1] = 0x00000001;
905 ib_info[1].ib_mc_address = ib_result_mc_address;
906 ib_info[1].size = 2;
907
908 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
909 ibs_request.number_of_ibs = 2;
910 ibs_request.ibs = ib_info;
911 ibs_request.resources = bo_list;
912 ibs_request.fence_info.handle = NULL;
913
914 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
915
916 CU_ASSERT_EQUAL(r, 0);
917
918 fence_status.context = context_handle;
919 fence_status.ip_type = AMDGPU_HW_IP_GFX;
920 fence_status.ip_instance = 0;
921 fence_status.fence = ibs_request.seq_no;
922
923 r = amdgpu_cs_query_fence_status(&fence_status,
924 AMDGPU_TIMEOUT_INFINITE,
925 0, &expired);
926 CU_ASSERT_EQUAL(r, 0);
927
928 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
929 ib_result_mc_address, 4096);
930 CU_ASSERT_EQUAL(r, 0);
931
932 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
933 ib_result_ce_mc_address, 4096);
934 CU_ASSERT_EQUAL(r, 0);
935
936 r = amdgpu_bo_list_destroy(bo_list);
937 CU_ASSERT_EQUAL(r, 0);
938
939 r = amdgpu_cs_ctx_free(context_handle);
940 CU_ASSERT_EQUAL(r, 0);
941
942 }
943
amdgpu_command_submission_gfx_shared_ib(void)944 static void amdgpu_command_submission_gfx_shared_ib(void)
945 {
946 amdgpu_context_handle context_handle;
947 amdgpu_bo_handle ib_result_handle;
948 void *ib_result_cpu;
949 uint64_t ib_result_mc_address;
950 struct amdgpu_cs_request ibs_request = {0};
951 struct amdgpu_cs_ib_info ib_info[2];
952 struct amdgpu_cs_fence fence_status = {0};
953 uint32_t *ptr;
954 uint32_t expired;
955 amdgpu_bo_list_handle bo_list;
956 amdgpu_va_handle va_handle;
957 int r, i = 0;
958 struct drm_amdgpu_info_hw_ip info;
959
960 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
961 CU_ASSERT_EQUAL(r, 0);
962
963 if (info.hw_ip_version_major >= 11)
964 return;
965
966 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
967 CU_ASSERT_EQUAL(r, 0);
968
969 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
970 AMDGPU_GEM_DOMAIN_GTT, 0,
971 &ib_result_handle, &ib_result_cpu,
972 &ib_result_mc_address, &va_handle);
973 CU_ASSERT_EQUAL(r, 0);
974
975 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
976 &bo_list);
977 CU_ASSERT_EQUAL(r, 0);
978
979 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
980
981 /* IT_SET_CE_DE_COUNTERS */
982 ptr = ib_result_cpu;
983 if (family_id != AMDGPU_FAMILY_SI) {
984 ptr[i++] = 0xc0008900;
985 ptr[i++] = 0;
986 }
987 ptr[i++] = 0xc0008400;
988 ptr[i++] = 1;
989 ib_info[0].ib_mc_address = ib_result_mc_address;
990 ib_info[0].size = i;
991 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
992
993 ptr = (uint32_t *)ib_result_cpu + 4;
994 ptr[0] = 0xc0008600;
995 ptr[1] = 0x00000001;
996 ib_info[1].ib_mc_address = ib_result_mc_address + 16;
997 ib_info[1].size = 2;
998
999 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
1000 ibs_request.number_of_ibs = 2;
1001 ibs_request.ibs = ib_info;
1002 ibs_request.resources = bo_list;
1003 ibs_request.fence_info.handle = NULL;
1004
1005 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
1006
1007 CU_ASSERT_EQUAL(r, 0);
1008
1009 fence_status.context = context_handle;
1010 fence_status.ip_type = AMDGPU_HW_IP_GFX;
1011 fence_status.ip_instance = 0;
1012 fence_status.fence = ibs_request.seq_no;
1013
1014 r = amdgpu_cs_query_fence_status(&fence_status,
1015 AMDGPU_TIMEOUT_INFINITE,
1016 0, &expired);
1017 CU_ASSERT_EQUAL(r, 0);
1018
1019 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1020 ib_result_mc_address, 4096);
1021 CU_ASSERT_EQUAL(r, 0);
1022
1023 r = amdgpu_bo_list_destroy(bo_list);
1024 CU_ASSERT_EQUAL(r, 0);
1025
1026 r = amdgpu_cs_ctx_free(context_handle);
1027 CU_ASSERT_EQUAL(r, 0);
1028 }
1029
amdgpu_command_submission_gfx_cp_write_data(void)1030 static void amdgpu_command_submission_gfx_cp_write_data(void)
1031 {
1032 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1033 }
1034
amdgpu_command_submission_gfx_cp_const_fill(void)1035 static void amdgpu_command_submission_gfx_cp_const_fill(void)
1036 {
1037 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1038 }
1039
amdgpu_command_submission_gfx_cp_copy_data(void)1040 static void amdgpu_command_submission_gfx_cp_copy_data(void)
1041 {
1042 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1043 }
1044
amdgpu_bo_eviction_test(void)1045 static void amdgpu_bo_eviction_test(void)
1046 {
1047 const int sdma_write_length = 1024;
1048 const int pm4_dw = 256;
1049 amdgpu_context_handle context_handle;
1050 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1051 amdgpu_bo_handle *resources;
1052 uint32_t *pm4;
1053 struct amdgpu_cs_ib_info *ib_info;
1054 struct amdgpu_cs_request *ibs_request;
1055 uint64_t bo1_mc, bo2_mc;
1056 volatile unsigned char *bo1_cpu, *bo2_cpu;
1057 int i, j, r, loop1, loop2;
1058 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1059 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1060 struct amdgpu_heap_info vram_info, gtt_info;
1061
1062 pm4 = calloc(pm4_dw, sizeof(*pm4));
1063 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1064
1065 ib_info = calloc(1, sizeof(*ib_info));
1066 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1067
1068 ibs_request = calloc(1, sizeof(*ibs_request));
1069 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1070
1071 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1072 CU_ASSERT_EQUAL(r, 0);
1073
1074 /* prepare resource */
1075 resources = calloc(4, sizeof(amdgpu_bo_handle));
1076 CU_ASSERT_NOT_EQUAL(resources, NULL);
1077
1078 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1079 0, &vram_info);
1080 CU_ASSERT_EQUAL(r, 0);
1081
1082 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1083 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1084 CU_ASSERT_EQUAL(r, 0);
1085 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1086 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1087 CU_ASSERT_EQUAL(r, 0);
1088
1089 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1090 0, >t_info);
1091 CU_ASSERT_EQUAL(r, 0);
1092
1093 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1094 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]);
1095 CU_ASSERT_EQUAL(r, 0);
1096 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1097 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]);
1098 CU_ASSERT_EQUAL(r, 0);
1099
1100
1101
1102 loop1 = loop2 = 0;
1103 /* run 9 circle to test all mapping combination */
1104 while(loop1 < 2) {
1105 while(loop2 < 2) {
1106 /* allocate UC bo1for sDMA use */
1107 r = amdgpu_bo_alloc_and_map(device_handle,
1108 sdma_write_length, 4096,
1109 AMDGPU_GEM_DOMAIN_GTT,
1110 gtt_flags[loop1], &bo1,
1111 (void**)&bo1_cpu, &bo1_mc,
1112 &bo1_va_handle);
1113 CU_ASSERT_EQUAL(r, 0);
1114
1115 /* set bo1 */
1116 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1117
1118 /* allocate UC bo2 for sDMA use */
1119 r = amdgpu_bo_alloc_and_map(device_handle,
1120 sdma_write_length, 4096,
1121 AMDGPU_GEM_DOMAIN_GTT,
1122 gtt_flags[loop2], &bo2,
1123 (void**)&bo2_cpu, &bo2_mc,
1124 &bo2_va_handle);
1125 CU_ASSERT_EQUAL(r, 0);
1126
1127 /* clear bo2 */
1128 memset((void*)bo2_cpu, 0, sdma_write_length);
1129
1130 resources[0] = bo1;
1131 resources[1] = bo2;
1132 resources[2] = vram_max[loop2];
1133 resources[3] = gtt_max[loop2];
1134
1135 /* fulfill PM4: test DMA copy linear */
1136 i = j = 0;
1137 if (family_id == AMDGPU_FAMILY_SI) {
1138 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1139 sdma_write_length);
1140 pm4[i++] = 0xffffffff & bo2_mc;
1141 pm4[i++] = 0xffffffff & bo1_mc;
1142 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1143 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1144 } else {
1145 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1146 if (family_id >= AMDGPU_FAMILY_AI)
1147 pm4[i++] = sdma_write_length - 1;
1148 else
1149 pm4[i++] = sdma_write_length;
1150 pm4[i++] = 0;
1151 pm4[i++] = 0xffffffff & bo1_mc;
1152 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1153 pm4[i++] = 0xffffffff & bo2_mc;
1154 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1155 }
1156
1157 amdgpu_test_exec_cs_helper(context_handle,
1158 AMDGPU_HW_IP_DMA, 0,
1159 i, pm4,
1160 4, resources,
1161 ib_info, ibs_request);
1162
1163 /* verify if SDMA test result meets with expected */
1164 i = 0;
1165 while(i < sdma_write_length) {
1166 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1167 }
1168 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1169 sdma_write_length);
1170 CU_ASSERT_EQUAL(r, 0);
1171 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1172 sdma_write_length);
1173 CU_ASSERT_EQUAL(r, 0);
1174 loop2++;
1175 }
1176 loop2 = 0;
1177 loop1++;
1178 }
1179 amdgpu_bo_free(vram_max[0]);
1180 amdgpu_bo_free(vram_max[1]);
1181 amdgpu_bo_free(gtt_max[0]);
1182 amdgpu_bo_free(gtt_max[1]);
1183 /* clean resources */
1184 free(resources);
1185 free(ibs_request);
1186 free(ib_info);
1187 free(pm4);
1188
1189 /* end of test */
1190 r = amdgpu_cs_ctx_free(context_handle);
1191 CU_ASSERT_EQUAL(r, 0);
1192 }
1193
1194
amdgpu_command_submission_gfx(void)1195 static void amdgpu_command_submission_gfx(void)
1196 {
1197 /* write data using the CP */
1198 amdgpu_command_submission_gfx_cp_write_data();
1199 /* const fill using the CP */
1200 amdgpu_command_submission_gfx_cp_const_fill();
1201 /* copy data using the CP */
1202 amdgpu_command_submission_gfx_cp_copy_data();
1203 /* separate IB buffers for multi-IB submission */
1204 amdgpu_command_submission_gfx_separate_ibs();
1205 /* shared IB buffer for multi-IB submission */
1206 amdgpu_command_submission_gfx_shared_ib();
1207 }
1208
amdgpu_semaphore_test(void)1209 static void amdgpu_semaphore_test(void)
1210 {
1211 amdgpu_context_handle context_handle[2];
1212 amdgpu_semaphore_handle sem;
1213 amdgpu_bo_handle ib_result_handle[2];
1214 void *ib_result_cpu[2];
1215 uint64_t ib_result_mc_address[2];
1216 struct amdgpu_cs_request ibs_request[2] = {0};
1217 struct amdgpu_cs_ib_info ib_info[2] = {0};
1218 struct amdgpu_cs_fence fence_status = {0};
1219 uint32_t *ptr;
1220 uint32_t expired;
1221 uint32_t sdma_nop, gfx_nop;
1222 amdgpu_bo_list_handle bo_list[2];
1223 amdgpu_va_handle va_handle[2];
1224 int r, i;
1225 struct amdgpu_gpu_info gpu_info = {0};
1226 unsigned gc_ip_type;
1227
1228 r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1229 CU_ASSERT_EQUAL(r, 0);
1230
1231 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1232 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1233
1234 if (family_id == AMDGPU_FAMILY_SI) {
1235 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1236 gfx_nop = GFX_COMPUTE_NOP_SI;
1237 } else {
1238 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1239 gfx_nop = GFX_COMPUTE_NOP;
1240 }
1241
1242 r = amdgpu_cs_create_semaphore(&sem);
1243 CU_ASSERT_EQUAL(r, 0);
1244 for (i = 0; i < 2; i++) {
1245 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1246 CU_ASSERT_EQUAL(r, 0);
1247
1248 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1249 AMDGPU_GEM_DOMAIN_GTT, 0,
1250 &ib_result_handle[i], &ib_result_cpu[i],
1251 &ib_result_mc_address[i], &va_handle[i]);
1252 CU_ASSERT_EQUAL(r, 0);
1253
1254 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1255 NULL, &bo_list[i]);
1256 CU_ASSERT_EQUAL(r, 0);
1257 }
1258
1259 /* 1. same context different engine */
1260 ptr = ib_result_cpu[0];
1261 ptr[0] = sdma_nop;
1262 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1263 ib_info[0].size = 1;
1264
1265 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1266 ibs_request[0].number_of_ibs = 1;
1267 ibs_request[0].ibs = &ib_info[0];
1268 ibs_request[0].resources = bo_list[0];
1269 ibs_request[0].fence_info.handle = NULL;
1270 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1271 CU_ASSERT_EQUAL(r, 0);
1272 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1273 CU_ASSERT_EQUAL(r, 0);
1274
1275 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1276 CU_ASSERT_EQUAL(r, 0);
1277 ptr = ib_result_cpu[1];
1278 ptr[0] = gfx_nop;
1279 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1280 ib_info[1].size = 1;
1281
1282 ibs_request[1].ip_type = gc_ip_type;
1283 ibs_request[1].number_of_ibs = 1;
1284 ibs_request[1].ibs = &ib_info[1];
1285 ibs_request[1].resources = bo_list[1];
1286 ibs_request[1].fence_info.handle = NULL;
1287
1288 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1289 CU_ASSERT_EQUAL(r, 0);
1290
1291 fence_status.context = context_handle[0];
1292 fence_status.ip_type = gc_ip_type;
1293 fence_status.ip_instance = 0;
1294 fence_status.fence = ibs_request[1].seq_no;
1295 r = amdgpu_cs_query_fence_status(&fence_status,
1296 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
1297 CU_ASSERT_EQUAL(r, 0);
1298 CU_ASSERT_EQUAL(expired, true);
1299
1300 /* 2. same engine different context */
1301 ptr = ib_result_cpu[0];
1302 ptr[0] = gfx_nop;
1303 ib_info[0].ib_mc_address = ib_result_mc_address[0];
1304 ib_info[0].size = 1;
1305
1306 ibs_request[0].ip_type = gc_ip_type;
1307 ibs_request[0].number_of_ibs = 1;
1308 ibs_request[0].ibs = &ib_info[0];
1309 ibs_request[0].resources = bo_list[0];
1310 ibs_request[0].fence_info.handle = NULL;
1311 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1312 CU_ASSERT_EQUAL(r, 0);
1313 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1314 CU_ASSERT_EQUAL(r, 0);
1315
1316 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1317 CU_ASSERT_EQUAL(r, 0);
1318 ptr = ib_result_cpu[1];
1319 ptr[0] = gfx_nop;
1320 ib_info[1].ib_mc_address = ib_result_mc_address[1];
1321 ib_info[1].size = 1;
1322
1323 ibs_request[1].ip_type = gc_ip_type;
1324 ibs_request[1].number_of_ibs = 1;
1325 ibs_request[1].ibs = &ib_info[1];
1326 ibs_request[1].resources = bo_list[1];
1327 ibs_request[1].fence_info.handle = NULL;
1328 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1329
1330 CU_ASSERT_EQUAL(r, 0);
1331
1332 fence_status.context = context_handle[1];
1333 fence_status.ip_type = gc_ip_type;
1334 fence_status.ip_instance = 0;
1335 fence_status.fence = ibs_request[1].seq_no;
1336 r = amdgpu_cs_query_fence_status(&fence_status,
1337 AMDGPU_TIMEOUT_INFINITE, 0, &expired);
1338 CU_ASSERT_EQUAL(r, 0);
1339 CU_ASSERT_EQUAL(expired, true);
1340
1341 for (i = 0; i < 2; i++) {
1342 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1343 ib_result_mc_address[i], 4096);
1344 CU_ASSERT_EQUAL(r, 0);
1345
1346 r = amdgpu_bo_list_destroy(bo_list[i]);
1347 CU_ASSERT_EQUAL(r, 0);
1348
1349 r = amdgpu_cs_ctx_free(context_handle[i]);
1350 CU_ASSERT_EQUAL(r, 0);
1351 }
1352
1353 r = amdgpu_cs_destroy_semaphore(sem);
1354 CU_ASSERT_EQUAL(r, 0);
1355 }
1356
amdgpu_command_submission_compute_nop(void)1357 static void amdgpu_command_submission_compute_nop(void)
1358 {
1359 amdgpu_context_handle context_handle;
1360 amdgpu_bo_handle ib_result_handle;
1361 void *ib_result_cpu;
1362 uint64_t ib_result_mc_address;
1363 struct amdgpu_cs_request ibs_request;
1364 struct amdgpu_cs_ib_info ib_info;
1365 struct amdgpu_cs_fence fence_status;
1366 uint32_t *ptr;
1367 uint32_t expired;
1368 int r, instance;
1369 amdgpu_bo_list_handle bo_list;
1370 amdgpu_va_handle va_handle;
1371 struct drm_amdgpu_info_hw_ip info;
1372
1373 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1374 CU_ASSERT_EQUAL(r, 0);
1375
1376 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1377 CU_ASSERT_EQUAL(r, 0);
1378
1379 for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1380 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1381 AMDGPU_GEM_DOMAIN_GTT, 0,
1382 &ib_result_handle, &ib_result_cpu,
1383 &ib_result_mc_address, &va_handle);
1384 CU_ASSERT_EQUAL(r, 0);
1385
1386 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1387 &bo_list);
1388 CU_ASSERT_EQUAL(r, 0);
1389
1390 ptr = ib_result_cpu;
1391 memset(ptr, 0, 16);
1392 ptr[0]=PACKET3(PACKET3_NOP, 14);
1393
1394 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1395 ib_info.ib_mc_address = ib_result_mc_address;
1396 ib_info.size = 16;
1397
1398 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1399 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1400 ibs_request.ring = instance;
1401 ibs_request.number_of_ibs = 1;
1402 ibs_request.ibs = &ib_info;
1403 ibs_request.resources = bo_list;
1404 ibs_request.fence_info.handle = NULL;
1405
1406 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1407 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1408 CU_ASSERT_EQUAL(r, 0);
1409
1410 fence_status.context = context_handle;
1411 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1412 fence_status.ip_instance = 0;
1413 fence_status.ring = instance;
1414 fence_status.fence = ibs_request.seq_no;
1415
1416 r = amdgpu_cs_query_fence_status(&fence_status,
1417 AMDGPU_TIMEOUT_INFINITE,
1418 0, &expired);
1419 CU_ASSERT_EQUAL(r, 0);
1420
1421 r = amdgpu_bo_list_destroy(bo_list);
1422 CU_ASSERT_EQUAL(r, 0);
1423
1424 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1425 ib_result_mc_address, 4096);
1426 CU_ASSERT_EQUAL(r, 0);
1427 }
1428
1429 r = amdgpu_cs_ctx_free(context_handle);
1430 CU_ASSERT_EQUAL(r, 0);
1431 }
1432
amdgpu_command_submission_compute_cp_write_data(void)1433 static void amdgpu_command_submission_compute_cp_write_data(void)
1434 {
1435 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1436 }
1437
amdgpu_command_submission_compute_cp_const_fill(void)1438 static void amdgpu_command_submission_compute_cp_const_fill(void)
1439 {
1440 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1441 }
1442
amdgpu_command_submission_compute_cp_copy_data(void)1443 static void amdgpu_command_submission_compute_cp_copy_data(void)
1444 {
1445 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1446 }
1447
amdgpu_command_submission_compute(void)1448 static void amdgpu_command_submission_compute(void)
1449 {
1450 /* write data using the CP */
1451 amdgpu_command_submission_compute_cp_write_data();
1452 /* const fill using the CP */
1453 amdgpu_command_submission_compute_cp_const_fill();
1454 /* copy data using the CP */
1455 amdgpu_command_submission_compute_cp_copy_data();
1456 /* nop test */
1457 amdgpu_command_submission_compute_nop();
1458 }
1459
1460 /*
1461 * caller need create/release:
1462 * pm4_src, resources, ib_info, and ibs_request
1463 * submit command stream described in ibs_request and wait for this IB accomplished
1464 */
1465 void
amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request,bool secure)1466 amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1467 amdgpu_context_handle context_handle,
1468 unsigned ip_type, int instance, int pm4_dw,
1469 uint32_t *pm4_src, int res_cnt,
1470 amdgpu_bo_handle *resources,
1471 struct amdgpu_cs_ib_info *ib_info,
1472 struct amdgpu_cs_request *ibs_request,
1473 bool secure)
1474 {
1475 int r;
1476 uint32_t expired;
1477 uint32_t *ring_ptr;
1478 amdgpu_bo_handle ib_result_handle;
1479 void *ib_result_cpu;
1480 uint64_t ib_result_mc_address;
1481 struct amdgpu_cs_fence fence_status = {0};
1482 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1483 amdgpu_va_handle va_handle;
1484
1485 /* prepare CS */
1486 CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1487 CU_ASSERT_NOT_EQUAL(resources, NULL);
1488 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1489 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1490 CU_ASSERT_TRUE(pm4_dw <= 1024);
1491
1492 /* allocate IB */
1493 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1494 AMDGPU_GEM_DOMAIN_GTT, 0,
1495 &ib_result_handle, &ib_result_cpu,
1496 &ib_result_mc_address, &va_handle);
1497 CU_ASSERT_EQUAL(r, 0);
1498
1499 /* copy PM4 packet to ring from caller */
1500 ring_ptr = ib_result_cpu;
1501 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1502
1503 ib_info->ib_mc_address = ib_result_mc_address;
1504 ib_info->size = pm4_dw;
1505 if (secure)
1506 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1507
1508 ibs_request->ip_type = ip_type;
1509 ibs_request->ring = instance;
1510 ibs_request->number_of_ibs = 1;
1511 ibs_request->ibs = ib_info;
1512 ibs_request->fence_info.handle = NULL;
1513
1514 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1515 all_res[res_cnt] = ib_result_handle;
1516
1517 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1518 NULL, &ibs_request->resources);
1519 CU_ASSERT_EQUAL(r, 0);
1520
1521 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1522
1523 /* submit CS */
1524 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1525 CU_ASSERT_EQUAL(r, 0);
1526
1527 r = amdgpu_bo_list_destroy(ibs_request->resources);
1528 CU_ASSERT_EQUAL(r, 0);
1529
1530 fence_status.ip_type = ip_type;
1531 fence_status.ip_instance = 0;
1532 fence_status.ring = ibs_request->ring;
1533 fence_status.context = context_handle;
1534 fence_status.fence = ibs_request->seq_no;
1535
1536 /* wait for IB accomplished */
1537 r = amdgpu_cs_query_fence_status(&fence_status,
1538 AMDGPU_TIMEOUT_INFINITE,
1539 0, &expired);
1540 CU_ASSERT_EQUAL(r, 0);
1541 CU_ASSERT_EQUAL(expired, true);
1542
1543 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1544 ib_result_mc_address, 4096);
1545 CU_ASSERT_EQUAL(r, 0);
1546 }
1547
1548 static void
amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,unsigned ip_type,int instance,int pm4_dw,uint32_t * pm4_src,int res_cnt,amdgpu_bo_handle * resources,struct amdgpu_cs_ib_info * ib_info,struct amdgpu_cs_request * ibs_request)1549 amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1550 unsigned ip_type, int instance, int pm4_dw,
1551 uint32_t *pm4_src, int res_cnt,
1552 amdgpu_bo_handle *resources,
1553 struct amdgpu_cs_ib_info *ib_info,
1554 struct amdgpu_cs_request *ibs_request)
1555 {
1556 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1557 ip_type, instance, pm4_dw, pm4_src,
1558 res_cnt, resources, ib_info,
1559 ibs_request, false);
1560 }
1561
1562 void
amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle device,unsigned ip_type,bool secure)1563 amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1564 device, unsigned
1565 ip_type, bool secure)
1566 {
1567 const int sdma_write_length = 128;
1568 const int pm4_dw = 256;
1569 amdgpu_context_handle context_handle;
1570 amdgpu_bo_handle bo;
1571 amdgpu_bo_handle *resources;
1572 uint32_t *pm4;
1573 struct amdgpu_cs_ib_info *ib_info;
1574 struct amdgpu_cs_request *ibs_request;
1575 uint64_t bo_mc;
1576 volatile uint32_t *bo_cpu;
1577 uint32_t bo_cpu_origin;
1578 int i, j, r, loop, ring_id;
1579 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1580 amdgpu_va_handle va_handle;
1581 struct drm_amdgpu_info_hw_ip hw_ip_info;
1582
1583 pm4 = calloc(pm4_dw, sizeof(*pm4));
1584 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1585
1586 ib_info = calloc(1, sizeof(*ib_info));
1587 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1588
1589 ibs_request = calloc(1, sizeof(*ibs_request));
1590 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1591
1592 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1593 CU_ASSERT_EQUAL(r, 0);
1594
1595 for (i = 0; secure && (i < 2); i++)
1596 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1597
1598 r = amdgpu_cs_ctx_create(device, &context_handle);
1599
1600 CU_ASSERT_EQUAL(r, 0);
1601
1602 /* prepare resource */
1603 resources = calloc(1, sizeof(amdgpu_bo_handle));
1604 CU_ASSERT_NOT_EQUAL(resources, NULL);
1605
1606 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1607 loop = 0;
1608 while(loop < 2) {
1609 /* allocate UC bo for sDMA use */
1610 r = amdgpu_bo_alloc_and_map(device,
1611 sdma_write_length * sizeof(uint32_t),
1612 4096, AMDGPU_GEM_DOMAIN_GTT,
1613 gtt_flags[loop], &bo, (void**)&bo_cpu,
1614 &bo_mc, &va_handle);
1615 CU_ASSERT_EQUAL(r, 0);
1616
1617 /* clear bo */
1618 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1619
1620 resources[0] = bo;
1621
1622 /* fulfill PM4: test DMA write-linear */
1623 i = j = 0;
1624 if (ip_type == AMDGPU_HW_IP_DMA) {
1625 if (family_id == AMDGPU_FAMILY_SI)
1626 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1627 sdma_write_length);
1628 else
1629 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1630 SDMA_WRITE_SUB_OPCODE_LINEAR,
1631 secure ? SDMA_ATOMIC_TMZ(1) : 0);
1632 pm4[i++] = 0xfffffffc & bo_mc;
1633 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1634 if (family_id >= AMDGPU_FAMILY_AI)
1635 pm4[i++] = sdma_write_length - 1;
1636 else if (family_id != AMDGPU_FAMILY_SI)
1637 pm4[i++] = sdma_write_length;
1638 while(j++ < sdma_write_length)
1639 pm4[i++] = 0xdeadbeaf;
1640 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1641 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1642 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1643 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1644 pm4[i++] = 0xfffffffc & bo_mc;
1645 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1646 while(j++ < sdma_write_length)
1647 pm4[i++] = 0xdeadbeaf;
1648 }
1649
1650 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1651 ip_type, ring_id, i, pm4,
1652 1, resources, ib_info,
1653 ibs_request, secure);
1654
1655 /* verify if SDMA test result meets with expected */
1656 i = 0;
1657 if (!secure) {
1658 while(i < sdma_write_length) {
1659 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1660 }
1661 } else if (ip_type == AMDGPU_HW_IP_GFX) {
1662 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1663 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1664 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1665 * command, 1-loop_until_compare_satisfied.
1666 * single_pass_atomic, 0-lru
1667 * engine_sel, 0-micro_engine
1668 */
1669 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1670 ATOMIC_MEM_COMMAND(1) |
1671 ATOMIC_MEM_CACHEPOLICAY(0) |
1672 ATOMIC_MEM_ENGINESEL(0));
1673 pm4[i++] = 0xfffffffc & bo_mc;
1674 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1675 pm4[i++] = 0x12345678;
1676 pm4[i++] = 0x0;
1677 pm4[i++] = 0xdeadbeaf;
1678 pm4[i++] = 0x0;
1679 pm4[i++] = 0x100;
1680 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1681 ip_type, ring_id, i, pm4,
1682 1, resources, ib_info,
1683 ibs_request, true);
1684 } else if (ip_type == AMDGPU_HW_IP_DMA) {
1685 /* restore the bo_cpu to compare */
1686 bo_cpu_origin = bo_cpu[0];
1687 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1688 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1689 * loop, 1-loop_until_compare_satisfied.
1690 * single_pass_atomic, 0-lru
1691 */
1692 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1693 0,
1694 SDMA_ATOMIC_LOOP(1) |
1695 SDMA_ATOMIC_TMZ(1) |
1696 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1697 pm4[i++] = 0xfffffffc & bo_mc;
1698 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1699 pm4[i++] = 0x12345678;
1700 pm4[i++] = 0x0;
1701 pm4[i++] = 0xdeadbeaf;
1702 pm4[i++] = 0x0;
1703 pm4[i++] = 0x100;
1704 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1705 ip_type, ring_id, i, pm4,
1706 1, resources, ib_info,
1707 ibs_request, true);
1708 /* DMA's atomic behavir is unlike GFX
1709 * If the comparing data is not equal to destination data,
1710 * For GFX, loop again till gfx timeout(system hang).
1711 * For DMA, loop again till timer expired and then send interrupt.
1712 * So testcase can't use interrupt mechanism.
1713 * We take another way to verify. When the comparing data is not
1714 * equal to destination data, overwrite the source data to the destination
1715 * buffer. Otherwise, original destination data unchanged.
1716 * So if the bo_cpu data is overwritten, the result is passed.
1717 */
1718 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1719
1720 /* compare again for the case of dest_data != cmp_data */
1721 i = 0;
1722 /* restore again, here dest_data should be */
1723 bo_cpu_origin = bo_cpu[0];
1724 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1725 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1726 0,
1727 SDMA_ATOMIC_LOOP(1) |
1728 SDMA_ATOMIC_TMZ(1) |
1729 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1730 pm4[i++] = 0xfffffffc & bo_mc;
1731 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1732 pm4[i++] = 0x87654321;
1733 pm4[i++] = 0x0;
1734 pm4[i++] = 0xdeadbeaf;
1735 pm4[i++] = 0x0;
1736 pm4[i++] = 0x100;
1737 amdgpu_test_exec_cs_helper_raw(device, context_handle,
1738 ip_type, ring_id, i, pm4,
1739 1, resources, ib_info,
1740 ibs_request, true);
1741 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1742 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1743 }
1744
1745 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1746 sdma_write_length * sizeof(uint32_t));
1747 CU_ASSERT_EQUAL(r, 0);
1748 loop++;
1749 }
1750 }
1751 /* clean resources */
1752 free(resources);
1753 free(ibs_request);
1754 free(ib_info);
1755 free(pm4);
1756
1757 /* end of test */
1758 r = amdgpu_cs_ctx_free(context_handle);
1759 CU_ASSERT_EQUAL(r, 0);
1760 }
1761
amdgpu_command_submission_write_linear_helper(unsigned ip_type)1762 static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1763 {
1764 amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1765 ip_type,
1766 false);
1767 }
1768
amdgpu_command_submission_sdma_write_linear(void)1769 static void amdgpu_command_submission_sdma_write_linear(void)
1770 {
1771 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1772 }
1773
amdgpu_command_submission_const_fill_helper(unsigned ip_type)1774 static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1775 {
1776 const int sdma_write_length = 1024 * 1024;
1777 const int pm4_dw = 256;
1778 amdgpu_context_handle context_handle;
1779 amdgpu_bo_handle bo;
1780 amdgpu_bo_handle *resources;
1781 uint32_t *pm4;
1782 struct amdgpu_cs_ib_info *ib_info;
1783 struct amdgpu_cs_request *ibs_request;
1784 uint64_t bo_mc;
1785 volatile uint32_t *bo_cpu;
1786 int i, j, r, loop, ring_id;
1787 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1788 amdgpu_va_handle va_handle;
1789 struct drm_amdgpu_info_hw_ip hw_ip_info;
1790
1791 pm4 = calloc(pm4_dw, sizeof(*pm4));
1792 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1793
1794 ib_info = calloc(1, sizeof(*ib_info));
1795 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1796
1797 ibs_request = calloc(1, sizeof(*ibs_request));
1798 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1799
1800 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1801 CU_ASSERT_EQUAL(r, 0);
1802
1803 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1804 CU_ASSERT_EQUAL(r, 0);
1805
1806 /* prepare resource */
1807 resources = calloc(1, sizeof(amdgpu_bo_handle));
1808 CU_ASSERT_NOT_EQUAL(resources, NULL);
1809
1810 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1811 loop = 0;
1812 while(loop < 2) {
1813 /* allocate UC bo for sDMA use */
1814 r = amdgpu_bo_alloc_and_map(device_handle,
1815 sdma_write_length, 4096,
1816 AMDGPU_GEM_DOMAIN_GTT,
1817 gtt_flags[loop], &bo, (void**)&bo_cpu,
1818 &bo_mc, &va_handle);
1819 CU_ASSERT_EQUAL(r, 0);
1820
1821 /* clear bo */
1822 memset((void*)bo_cpu, 0, sdma_write_length);
1823
1824 resources[0] = bo;
1825
1826 /* fulfill PM4: test DMA const fill */
1827 i = j = 0;
1828 if (ip_type == AMDGPU_HW_IP_DMA) {
1829 if (family_id == AMDGPU_FAMILY_SI) {
1830 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1831 0, 0, 0,
1832 sdma_write_length / 4);
1833 pm4[i++] = 0xfffffffc & bo_mc;
1834 pm4[i++] = 0xdeadbeaf;
1835 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1836 } else {
1837 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1838 SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1839 pm4[i++] = 0xffffffff & bo_mc;
1840 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1841 pm4[i++] = 0xdeadbeaf;
1842 if (family_id >= AMDGPU_FAMILY_AI)
1843 pm4[i++] = sdma_write_length - 1;
1844 else
1845 pm4[i++] = sdma_write_length;
1846 }
1847 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1848 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1849 if (family_id == AMDGPU_FAMILY_SI) {
1850 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1851 pm4[i++] = 0xdeadbeaf;
1852 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1853 PACKET3_DMA_DATA_SI_DST_SEL(0) |
1854 PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1855 PACKET3_DMA_DATA_SI_CP_SYNC;
1856 pm4[i++] = 0xffffffff & bo_mc;
1857 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1858 pm4[i++] = sdma_write_length;
1859 } else {
1860 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1861 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1862 PACKET3_DMA_DATA_DST_SEL(0) |
1863 PACKET3_DMA_DATA_SRC_SEL(2) |
1864 PACKET3_DMA_DATA_CP_SYNC;
1865 pm4[i++] = 0xdeadbeaf;
1866 pm4[i++] = 0;
1867 pm4[i++] = 0xfffffffc & bo_mc;
1868 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1869 pm4[i++] = sdma_write_length;
1870 }
1871 }
1872
1873 amdgpu_test_exec_cs_helper(context_handle,
1874 ip_type, ring_id,
1875 i, pm4,
1876 1, resources,
1877 ib_info, ibs_request);
1878
1879 /* verify if SDMA test result meets with expected */
1880 i = 0;
1881 while(i < (sdma_write_length / 4)) {
1882 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1883 }
1884
1885 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1886 sdma_write_length);
1887 CU_ASSERT_EQUAL(r, 0);
1888 loop++;
1889 }
1890 }
1891 /* clean resources */
1892 free(resources);
1893 free(ibs_request);
1894 free(ib_info);
1895 free(pm4);
1896
1897 /* end of test */
1898 r = amdgpu_cs_ctx_free(context_handle);
1899 CU_ASSERT_EQUAL(r, 0);
1900 }
1901
amdgpu_command_submission_sdma_const_fill(void)1902 static void amdgpu_command_submission_sdma_const_fill(void)
1903 {
1904 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1905 }
1906
amdgpu_command_submission_copy_linear_helper(unsigned ip_type)1907 static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1908 {
1909 const int sdma_write_length = 1024;
1910 const int pm4_dw = 256;
1911 amdgpu_context_handle context_handle;
1912 amdgpu_bo_handle bo1, bo2;
1913 amdgpu_bo_handle *resources;
1914 uint32_t *pm4;
1915 struct amdgpu_cs_ib_info *ib_info;
1916 struct amdgpu_cs_request *ibs_request;
1917 uint64_t bo1_mc, bo2_mc;
1918 volatile unsigned char *bo1_cpu, *bo2_cpu;
1919 int i, j, r, loop1, loop2, ring_id;
1920 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1921 amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1922 struct drm_amdgpu_info_hw_ip hw_ip_info;
1923
1924 pm4 = calloc(pm4_dw, sizeof(*pm4));
1925 CU_ASSERT_NOT_EQUAL(pm4, NULL);
1926
1927 ib_info = calloc(1, sizeof(*ib_info));
1928 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1929
1930 ibs_request = calloc(1, sizeof(*ibs_request));
1931 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1932
1933 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1934 CU_ASSERT_EQUAL(r, 0);
1935
1936 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1937 CU_ASSERT_EQUAL(r, 0);
1938
1939 /* prepare resource */
1940 resources = calloc(2, sizeof(amdgpu_bo_handle));
1941 CU_ASSERT_NOT_EQUAL(resources, NULL);
1942
1943 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1944 loop1 = loop2 = 0;
1945 /* run 9 circle to test all mapping combination */
1946 while(loop1 < 2) {
1947 while(loop2 < 2) {
1948 /* allocate UC bo1for sDMA use */
1949 r = amdgpu_bo_alloc_and_map(device_handle,
1950 sdma_write_length, 4096,
1951 AMDGPU_GEM_DOMAIN_GTT,
1952 gtt_flags[loop1], &bo1,
1953 (void**)&bo1_cpu, &bo1_mc,
1954 &bo1_va_handle);
1955 CU_ASSERT_EQUAL(r, 0);
1956
1957 /* set bo1 */
1958 memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1959
1960 /* allocate UC bo2 for sDMA use */
1961 r = amdgpu_bo_alloc_and_map(device_handle,
1962 sdma_write_length, 4096,
1963 AMDGPU_GEM_DOMAIN_GTT,
1964 gtt_flags[loop2], &bo2,
1965 (void**)&bo2_cpu, &bo2_mc,
1966 &bo2_va_handle);
1967 CU_ASSERT_EQUAL(r, 0);
1968
1969 /* clear bo2 */
1970 memset((void*)bo2_cpu, 0, sdma_write_length);
1971
1972 resources[0] = bo1;
1973 resources[1] = bo2;
1974
1975 /* fulfill PM4: test DMA copy linear */
1976 i = j = 0;
1977 if (ip_type == AMDGPU_HW_IP_DMA) {
1978 if (family_id == AMDGPU_FAMILY_SI) {
1979 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1980 0, 0, 0,
1981 sdma_write_length);
1982 pm4[i++] = 0xffffffff & bo2_mc;
1983 pm4[i++] = 0xffffffff & bo1_mc;
1984 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1986 } else {
1987 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1988 SDMA_COPY_SUB_OPCODE_LINEAR,
1989 0);
1990 if (family_id >= AMDGPU_FAMILY_AI)
1991 pm4[i++] = sdma_write_length - 1;
1992 else
1993 pm4[i++] = sdma_write_length;
1994 pm4[i++] = 0;
1995 pm4[i++] = 0xffffffff & bo1_mc;
1996 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1997 pm4[i++] = 0xffffffff & bo2_mc;
1998 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1999 }
2000 } else if ((ip_type == AMDGPU_HW_IP_GFX) ||
2001 (ip_type == AMDGPU_HW_IP_COMPUTE)) {
2002 if (family_id == AMDGPU_FAMILY_SI) {
2003 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
2004 pm4[i++] = 0xfffffffc & bo1_mc;
2005 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
2006 PACKET3_DMA_DATA_SI_DST_SEL(0) |
2007 PACKET3_DMA_DATA_SI_SRC_SEL(0) |
2008 PACKET3_DMA_DATA_SI_CP_SYNC |
2009 (0xffff00000000 & bo1_mc) >> 32;
2010 pm4[i++] = 0xfffffffc & bo2_mc;
2011 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2012 pm4[i++] = sdma_write_length;
2013 } else {
2014 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2015 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2016 PACKET3_DMA_DATA_DST_SEL(0) |
2017 PACKET3_DMA_DATA_SRC_SEL(0) |
2018 PACKET3_DMA_DATA_CP_SYNC;
2019 pm4[i++] = 0xfffffffc & bo1_mc;
2020 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2021 pm4[i++] = 0xfffffffc & bo2_mc;
2022 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2023 pm4[i++] = sdma_write_length;
2024 }
2025 }
2026
2027 amdgpu_test_exec_cs_helper(context_handle,
2028 ip_type, ring_id,
2029 i, pm4,
2030 2, resources,
2031 ib_info, ibs_request);
2032
2033 /* verify if SDMA test result meets with expected */
2034 i = 0;
2035 while(i < sdma_write_length) {
2036 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2037 }
2038 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2039 sdma_write_length);
2040 CU_ASSERT_EQUAL(r, 0);
2041 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2042 sdma_write_length);
2043 CU_ASSERT_EQUAL(r, 0);
2044 loop2++;
2045 }
2046 loop1++;
2047 }
2048 }
2049 /* clean resources */
2050 free(resources);
2051 free(ibs_request);
2052 free(ib_info);
2053 free(pm4);
2054
2055 /* end of test */
2056 r = amdgpu_cs_ctx_free(context_handle);
2057 CU_ASSERT_EQUAL(r, 0);
2058 }
2059
amdgpu_command_submission_sdma_copy_linear(void)2060 static void amdgpu_command_submission_sdma_copy_linear(void)
2061 {
2062 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2063 }
2064
amdgpu_command_submission_sdma(void)2065 static void amdgpu_command_submission_sdma(void)
2066 {
2067 amdgpu_command_submission_sdma_write_linear();
2068 amdgpu_command_submission_sdma_const_fill();
2069 amdgpu_command_submission_sdma_copy_linear();
2070 }
2071
amdgpu_command_submission_multi_fence_wait_all(bool wait_all)2072 static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2073 {
2074 amdgpu_context_handle context_handle;
2075 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2076 void *ib_result_cpu, *ib_result_ce_cpu;
2077 uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2078 struct amdgpu_cs_request ibs_request[2] = {0};
2079 struct amdgpu_cs_ib_info ib_info[2];
2080 struct amdgpu_cs_fence fence_status[2] = {0};
2081 uint32_t *ptr;
2082 uint32_t expired;
2083 amdgpu_bo_list_handle bo_list;
2084 amdgpu_va_handle va_handle, va_handle_ce;
2085 int r;
2086 int i = 0, ib_cs_num = 2;
2087 struct drm_amdgpu_info_hw_ip info;
2088
2089 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2090 CU_ASSERT_EQUAL(r, 0);
2091
2092 if (info.hw_ip_version_major >= 11)
2093 return;
2094
2095 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2096 CU_ASSERT_EQUAL(r, 0);
2097
2098 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2099 AMDGPU_GEM_DOMAIN_GTT, 0,
2100 &ib_result_handle, &ib_result_cpu,
2101 &ib_result_mc_address, &va_handle);
2102 CU_ASSERT_EQUAL(r, 0);
2103
2104 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2105 AMDGPU_GEM_DOMAIN_GTT, 0,
2106 &ib_result_ce_handle, &ib_result_ce_cpu,
2107 &ib_result_ce_mc_address, &va_handle_ce);
2108 CU_ASSERT_EQUAL(r, 0);
2109
2110 r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2111 ib_result_ce_handle, &bo_list);
2112 CU_ASSERT_EQUAL(r, 0);
2113
2114 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2115
2116 /* IT_SET_CE_DE_COUNTERS */
2117 ptr = ib_result_ce_cpu;
2118 if (family_id != AMDGPU_FAMILY_SI) {
2119 ptr[i++] = 0xc0008900;
2120 ptr[i++] = 0;
2121 }
2122 ptr[i++] = 0xc0008400;
2123 ptr[i++] = 1;
2124 ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2125 ib_info[0].size = i;
2126 ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2127
2128 /* IT_WAIT_ON_CE_COUNTER */
2129 ptr = ib_result_cpu;
2130 ptr[0] = 0xc0008600;
2131 ptr[1] = 0x00000001;
2132 ib_info[1].ib_mc_address = ib_result_mc_address;
2133 ib_info[1].size = 2;
2134
2135 for (i = 0; i < ib_cs_num; i++) {
2136 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2137 ibs_request[i].number_of_ibs = 2;
2138 ibs_request[i].ibs = ib_info;
2139 ibs_request[i].resources = bo_list;
2140 ibs_request[i].fence_info.handle = NULL;
2141 }
2142
2143 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2144
2145 CU_ASSERT_EQUAL(r, 0);
2146
2147 for (i = 0; i < ib_cs_num; i++) {
2148 fence_status[i].context = context_handle;
2149 fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2150 fence_status[i].fence = ibs_request[i].seq_no;
2151 }
2152
2153 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2154 AMDGPU_TIMEOUT_INFINITE,
2155 &expired, NULL);
2156 CU_ASSERT_EQUAL(r, 0);
2157
2158 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2159 ib_result_mc_address, 4096);
2160 CU_ASSERT_EQUAL(r, 0);
2161
2162 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2163 ib_result_ce_mc_address, 4096);
2164 CU_ASSERT_EQUAL(r, 0);
2165
2166 r = amdgpu_bo_list_destroy(bo_list);
2167 CU_ASSERT_EQUAL(r, 0);
2168
2169 r = amdgpu_cs_ctx_free(context_handle);
2170 CU_ASSERT_EQUAL(r, 0);
2171 }
2172
amdgpu_command_submission_multi_fence(void)2173 static void amdgpu_command_submission_multi_fence(void)
2174 {
2175 amdgpu_command_submission_multi_fence_wait_all(true);
2176 amdgpu_command_submission_multi_fence_wait_all(false);
2177 }
2178
amdgpu_userptr_test(void)2179 static void amdgpu_userptr_test(void)
2180 {
2181 int i, r, j;
2182 uint32_t *pm4 = NULL;
2183 uint64_t bo_mc;
2184 void *ptr = NULL;
2185 int pm4_dw = 256;
2186 int sdma_write_length = 4;
2187 amdgpu_bo_handle handle;
2188 amdgpu_context_handle context_handle;
2189 struct amdgpu_cs_ib_info *ib_info;
2190 struct amdgpu_cs_request *ibs_request;
2191 amdgpu_bo_handle buf_handle;
2192 amdgpu_va_handle va_handle;
2193
2194 pm4 = calloc(pm4_dw, sizeof(*pm4));
2195 CU_ASSERT_NOT_EQUAL(pm4, NULL);
2196
2197 ib_info = calloc(1, sizeof(*ib_info));
2198 CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2199
2200 ibs_request = calloc(1, sizeof(*ibs_request));
2201 CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2202
2203 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2204 CU_ASSERT_EQUAL(r, 0);
2205
2206 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2207 CU_ASSERT_NOT_EQUAL(ptr, NULL);
2208 memset(ptr, 0, BUFFER_SIZE);
2209
2210 r = amdgpu_create_bo_from_user_mem(device_handle,
2211 ptr, BUFFER_SIZE, &buf_handle);
2212 CU_ASSERT_EQUAL(r, 0);
2213
2214 r = amdgpu_va_range_alloc(device_handle,
2215 amdgpu_gpu_va_range_general,
2216 BUFFER_SIZE, 1, 0, &bo_mc,
2217 &va_handle, 0);
2218 CU_ASSERT_EQUAL(r, 0);
2219
2220 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2221 CU_ASSERT_EQUAL(r, 0);
2222
2223 handle = buf_handle;
2224
2225 j = i = 0;
2226
2227 if (family_id == AMDGPU_FAMILY_SI)
2228 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2229 sdma_write_length);
2230 else
2231 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2232 SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2233 pm4[i++] = 0xffffffff & bo_mc;
2234 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2235 if (family_id >= AMDGPU_FAMILY_AI)
2236 pm4[i++] = sdma_write_length - 1;
2237 else if (family_id != AMDGPU_FAMILY_SI)
2238 pm4[i++] = sdma_write_length;
2239
2240 while (j++ < sdma_write_length)
2241 pm4[i++] = 0xdeadbeaf;
2242
2243 if (!fork()) {
2244 pm4[0] = 0x0;
2245 exit(0);
2246 }
2247
2248 amdgpu_test_exec_cs_helper(context_handle,
2249 AMDGPU_HW_IP_DMA, 0,
2250 i, pm4,
2251 1, &handle,
2252 ib_info, ibs_request);
2253 i = 0;
2254 while (i < sdma_write_length) {
2255 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2256 }
2257 free(ibs_request);
2258 free(ib_info);
2259 free(pm4);
2260
2261 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2262 CU_ASSERT_EQUAL(r, 0);
2263 r = amdgpu_va_range_free(va_handle);
2264 CU_ASSERT_EQUAL(r, 0);
2265 r = amdgpu_bo_free(buf_handle);
2266 CU_ASSERT_EQUAL(r, 0);
2267 free(ptr);
2268
2269 r = amdgpu_cs_ctx_free(context_handle);
2270 CU_ASSERT_EQUAL(r, 0);
2271
2272 wait(NULL);
2273 }
2274
amdgpu_sync_dependency_test(void)2275 static void amdgpu_sync_dependency_test(void)
2276 {
2277 amdgpu_context_handle context_handle[2];
2278 amdgpu_bo_handle ib_result_handle;
2279 void *ib_result_cpu;
2280 uint64_t ib_result_mc_address;
2281 struct amdgpu_cs_request ibs_request;
2282 struct amdgpu_cs_ib_info ib_info;
2283 struct amdgpu_cs_fence fence_status;
2284 uint32_t expired;
2285 int i, j, r;
2286 amdgpu_bo_list_handle bo_list;
2287 amdgpu_va_handle va_handle;
2288 static uint32_t *ptr;
2289 uint64_t seq_no;
2290
2291 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2292 CU_ASSERT_EQUAL(r, 0);
2293 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2294 CU_ASSERT_EQUAL(r, 0);
2295
2296 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2297 AMDGPU_GEM_DOMAIN_GTT, 0,
2298 &ib_result_handle, &ib_result_cpu,
2299 &ib_result_mc_address, &va_handle);
2300 CU_ASSERT_EQUAL(r, 0);
2301
2302 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2303 &bo_list);
2304 CU_ASSERT_EQUAL(r, 0);
2305
2306 ptr = ib_result_cpu;
2307 i = 0;
2308
2309 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2310
2311 /* Dispatch minimal init config and verify it's executed */
2312 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2313 ptr[i++] = 0x80000000;
2314 ptr[i++] = 0x80000000;
2315
2316 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2317 ptr[i++] = 0x80000000;
2318
2319
2320 /* Program compute regs */
2321 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2322 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2323 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2324 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2325
2326
2327 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2328 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2329 /*
2330 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0
2331 SGPRS = 1
2332 PRIORITY = 0
2333 FLOAT_MODE = 192 (0xc0)
2334 PRIV = 0
2335 DX10_CLAMP = 1
2336 DEBUG_MODE = 0
2337 IEEE_MODE = 0
2338 BULKY = 0
2339 CDBG_USER = 0
2340 *
2341 */
2342 ptr[i++] = 0x002c0040;
2343
2344
2345 /*
2346 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2347 USER_SGPR = 8
2348 TRAP_PRESENT = 0
2349 TGID_X_EN = 0
2350 TGID_Y_EN = 0
2351 TGID_Z_EN = 0
2352 TG_SIZE_EN = 0
2353 TIDIG_COMP_CNT = 0
2354 EXCP_EN_MSB = 0
2355 LDS_SIZE = 0
2356 EXCP_EN = 0
2357 *
2358 */
2359 ptr[i++] = 0x00000010;
2360
2361
2362 /*
2363 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2364 WAVESIZE = 0
2365 *
2366 */
2367 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2368 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2369 ptr[i++] = 0x00000100;
2370
2371 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2372 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2373 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2374 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2375
2376 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2377 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2378 ptr[i++] = 0;
2379
2380 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2381 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2382 ptr[i++] = 1;
2383 ptr[i++] = 1;
2384 ptr[i++] = 1;
2385
2386
2387 /* Dispatch */
2388 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2389 ptr[i++] = 1;
2390 ptr[i++] = 1;
2391 ptr[i++] = 1;
2392 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2393
2394
2395 while (i & 7)
2396 ptr[i++] = 0xffff1000; /* type3 nop packet */
2397
2398 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2399 ib_info.ib_mc_address = ib_result_mc_address;
2400 ib_info.size = i;
2401
2402 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2403 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2404 ibs_request.ring = 0;
2405 ibs_request.number_of_ibs = 1;
2406 ibs_request.ibs = &ib_info;
2407 ibs_request.resources = bo_list;
2408 ibs_request.fence_info.handle = NULL;
2409
2410 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2411 CU_ASSERT_EQUAL(r, 0);
2412 seq_no = ibs_request.seq_no;
2413
2414
2415
2416 /* Prepare second command with dependency on the first */
2417 j = i;
2418 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2419 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2420 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2421 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2422 ptr[i++] = 99;
2423
2424 while (i & 7)
2425 ptr[i++] = 0xffff1000; /* type3 nop packet */
2426
2427 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2428 ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2429 ib_info.size = i - j;
2430
2431 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2432 ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2433 ibs_request.ring = 0;
2434 ibs_request.number_of_ibs = 1;
2435 ibs_request.ibs = &ib_info;
2436 ibs_request.resources = bo_list;
2437 ibs_request.fence_info.handle = NULL;
2438
2439 ibs_request.number_of_dependencies = 1;
2440
2441 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2442 ibs_request.dependencies[0].context = context_handle[1];
2443 ibs_request.dependencies[0].ip_instance = 0;
2444 ibs_request.dependencies[0].ring = 0;
2445 ibs_request.dependencies[0].fence = seq_no;
2446
2447
2448 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2449 CU_ASSERT_EQUAL(r, 0);
2450
2451
2452 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2453 fence_status.context = context_handle[0];
2454 fence_status.ip_type = AMDGPU_HW_IP_GFX;
2455 fence_status.ip_instance = 0;
2456 fence_status.ring = 0;
2457 fence_status.fence = ibs_request.seq_no;
2458
2459 r = amdgpu_cs_query_fence_status(&fence_status,
2460 AMDGPU_TIMEOUT_INFINITE,0, &expired);
2461 CU_ASSERT_EQUAL(r, 0);
2462
2463 /* Expect the second command to wait for shader to complete */
2464 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2465
2466 r = amdgpu_bo_list_destroy(bo_list);
2467 CU_ASSERT_EQUAL(r, 0);
2468
2469 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2470 ib_result_mc_address, 4096);
2471 CU_ASSERT_EQUAL(r, 0);
2472
2473 r = amdgpu_cs_ctx_free(context_handle[0]);
2474 CU_ASSERT_EQUAL(r, 0);
2475 r = amdgpu_cs_ctx_free(context_handle[1]);
2476 CU_ASSERT_EQUAL(r, 0);
2477
2478 free(ibs_request.dependencies);
2479 }
2480
amdgpu_compute_dispatch_test(void)2481 static void amdgpu_compute_dispatch_test(void)
2482 {
2483 amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_COMPUTE);
2484 }
amdgpu_gfx_dispatch_test(void)2485 static void amdgpu_gfx_dispatch_test(void)
2486 {
2487 amdgpu_test_dispatch_helper(device_handle, AMDGPU_HW_IP_GFX);
2488 }
2489
amdgpu_draw_test(void)2490 static void amdgpu_draw_test(void)
2491 {
2492 amdgpu_test_draw_helper(device_handle);
2493 }
amdgpu_gpu_reset_test(void)2494 static void amdgpu_gpu_reset_test(void)
2495 {
2496 int r;
2497 char debugfs_path[256], tmp[10];
2498 int fd;
2499 struct stat sbuf;
2500 amdgpu_context_handle context_handle;
2501 uint32_t hang_state, hangs;
2502
2503 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2504 CU_ASSERT_EQUAL(r, 0);
2505
2506 r = fstat(drm_amdgpu[0], &sbuf);
2507 CU_ASSERT_EQUAL(r, 0);
2508
2509 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
2510 fd = open(debugfs_path, O_RDONLY);
2511 CU_ASSERT(fd >= 0);
2512
2513 r = read(fd, tmp, sizeof(tmp)/sizeof(char));
2514 CU_ASSERT(r > 0);
2515
2516 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2517 CU_ASSERT_EQUAL(r, 0);
2518 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2519
2520 close(fd);
2521 r = amdgpu_cs_ctx_free(context_handle);
2522 CU_ASSERT_EQUAL(r, 0);
2523
2524 amdgpu_compute_dispatch_test();
2525 amdgpu_gfx_dispatch_test();
2526 }
2527
amdgpu_stable_pstate_test(void)2528 static void amdgpu_stable_pstate_test(void)
2529 {
2530 int r;
2531 amdgpu_context_handle context_handle;
2532 uint32_t current_pstate = 0, new_pstate = 0;
2533
2534 r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2535 CU_ASSERT_EQUAL(r, 0);
2536
2537 r = amdgpu_cs_ctx_stable_pstate(context_handle,
2538 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2539 0, ¤t_pstate);
2540 CU_ASSERT_EQUAL(r, 0);
2541 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
2542
2543 r = amdgpu_cs_ctx_stable_pstate(context_handle,
2544 AMDGPU_CTX_OP_SET_STABLE_PSTATE,
2545 AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
2546 CU_ASSERT_EQUAL(r, 0);
2547
2548 r = amdgpu_cs_ctx_stable_pstate(context_handle,
2549 AMDGPU_CTX_OP_GET_STABLE_PSTATE,
2550 0, &new_pstate);
2551 CU_ASSERT_EQUAL(r, 0);
2552 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
2553
2554 r = amdgpu_cs_ctx_free(context_handle);
2555 CU_ASSERT_EQUAL(r, 0);
2556 }
2557