xref: /aosp_15_r20/external/libdrm/tests/amdgpu/amdgpu_stress.c (revision 7688df22e49036ff52a766b7101da3a49edadb8c)
1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23 
24 #include <stdio.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <fcntl.h>
28 #include <stdarg.h>
29 #include <string.h>
30 #include <errno.h>
31 #include <unistd.h>
32 #include <stdlib.h>
33 #include <inttypes.h>
34 
35 #include "drm.h"
36 #include "xf86drmMode.h"
37 #include "xf86drm.h"
38 #include "amdgpu.h"
39 #include "amdgpu_drm.h"
40 #include "amdgpu_internal.h"
41 
42 #define MAX_CARDS_SUPPORTED	4
43 #define NUM_BUFFER_OBJECTS	1024
44 
45 #define SDMA_PACKET(op, sub_op, e)      ((((e) & 0xFFFF) << 16) |  \
46 					(((sub_op) & 0xFF) << 8) | \
47 					(((op) & 0xFF) << 0))
48 
49 #define SDMA_OPCODE_COPY				  1
50 #       define SDMA_COPY_SUB_OPCODE_LINEAR		0
51 
52 
53 #define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) | \
54 						(((b) & 0x1) << 26) |	\
55 						(((t) & 0x1) << 23) |	\
56 						(((s) & 0x1) << 22) |	\
57 						(((cnt) & 0xFFFFF) << 0))
58 #define SDMA_OPCODE_COPY_SI     3
59 
60 
61 /** Help string for command line parameters */
62 static const char usage[] =
63 	"Usage: %s [-?h] [-b v|g|vg size] "
64 	"[-c from to size count]\n"
65 	"where:\n"
66 	"	b - Allocate a BO in VRAM, GTT or VRAM|GTT of size bytes.\n"
67 	"	    This flag can be used multiple times. The first bo will\n"
68 	"	    have id `1`, then second id `2`, ...\n"
69 	"       c - Copy size bytes from BO (bo_id1) to BO (bo_id2), count times\n"
70 	"       h - Display this help\n"
71 	"\n"
72 	"Sizes can be postfixes with k, m or g for kilo, mega and gigabyte scaling\n";
73 
74 /** Specified options strings for getopt */
75 static const char options[]   = "?hb:c:";
76 
77 /* Open AMD devices.
78  * Returns the fd of the first device it could open.
79  */
amdgpu_open_device(void)80 static int amdgpu_open_device(void)
81 {
82 	drmDevicePtr devices[MAX_CARDS_SUPPORTED];
83 	unsigned int i;
84 	int drm_count;
85 
86 	drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED);
87 	if (drm_count < 0) {
88 		fprintf(stderr, "drmGetDevices2() returned an error %d\n",
89 			drm_count);
90 		return drm_count;
91 	}
92 
93 	for (i = 0; i < drm_count; i++) {
94 		drmVersionPtr version;
95 		int fd;
96 
97 		/* If this is not PCI device, skip*/
98 		if (devices[i]->bustype != DRM_BUS_PCI)
99 			continue;
100 
101 		/* If this is not AMD GPU vender ID, skip*/
102 		if (devices[i]->deviceinfo.pci->vendor_id != 0x1002)
103 			continue;
104 
105 		if (!(devices[i]->available_nodes & 1 << DRM_NODE_RENDER))
106 			continue;
107 
108 		fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
109 
110 		/* This node is not available. */
111 		if (fd < 0) continue;
112 
113 		version = drmGetVersion(fd);
114 		if (!version) {
115 			fprintf(stderr,
116 				"Warning: Cannot get version for %s."
117 				"Error is %s\n",
118 				devices[i]->nodes[DRM_NODE_RENDER],
119 				strerror(errno));
120 			close(fd);
121 			continue;
122 		}
123 
124 		if (strcmp(version->name, "amdgpu")) {
125 			/* This is not AMDGPU driver, skip.*/
126 			drmFreeVersion(version);
127 			close(fd);
128 			continue;
129 		}
130 
131 		drmFreeVersion(version);
132 		drmFreeDevices(devices, drm_count);
133 		return fd;
134 	}
135 
136 	return -1;
137 }
138 
139 amdgpu_device_handle device_handle;
140 amdgpu_context_handle context_handle;
141 
142 amdgpu_bo_handle resources[NUM_BUFFER_OBJECTS];
143 uint64_t virtual[NUM_BUFFER_OBJECTS];
144 unsigned int num_buffers;
145 uint32_t *pm4;
146 
alloc_bo(uint32_t domain,uint64_t size)147 int alloc_bo(uint32_t domain, uint64_t size)
148 {
149 	struct amdgpu_bo_alloc_request request = {};
150 	amdgpu_bo_handle bo;
151 	amdgpu_va_handle va;
152 	uint64_t addr;
153 	int r;
154 
155 	if (num_buffers >= NUM_BUFFER_OBJECTS)
156 		return -ENOSPC;
157 
158 	request.alloc_size = size;
159 	request.phys_alignment = 0;
160 	request.preferred_heap = domain;
161 	request.flags = 0;
162 	r = amdgpu_bo_alloc(device_handle, &request, &bo);
163 	if (r)
164 		return r;
165 
166 	r = amdgpu_va_range_alloc(device_handle, amdgpu_gpu_va_range_general,
167 				  size, 0, 0, &addr, &va, 0);
168 	if (r)
169 		return r;
170 
171 	r = amdgpu_bo_va_op_raw(device_handle, bo, 0, size, addr,
172 				AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
173 				AMDGPU_VM_PAGE_EXECUTABLE, AMDGPU_VA_OP_MAP);
174 	if (r)
175 		return r;
176 
177 	resources[num_buffers] = bo;
178 	virtual[num_buffers] = addr;
179 	fprintf(stdout, "Allocated BO number %u at 0x%" PRIx64 ", domain 0x%x, size %" PRIu64 "\n",
180 		num_buffers++, addr, domain, size);
181 	return 0;
182 }
183 
submit_ib(uint32_t from,uint32_t to,uint64_t size,uint32_t count)184 int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count)
185 {
186 	struct amdgpu_cs_request ibs_request;
187 	struct amdgpu_cs_fence fence_status;
188 	struct amdgpu_cs_ib_info ib_info;
189 	uint64_t copied = size, delta;
190 	struct timespec start, stop;
191 
192 	uint64_t src = virtual[from];
193 	uint64_t dst = virtual[to];
194 	uint32_t expired;
195 	int i, r;
196 
197 	i = 0;
198 	while (size) {
199 		uint64_t bytes = size < 0x40000 ? size : 0x40000;
200 
201 		if (device_handle->info.family_id == AMDGPU_FAMILY_SI) {
202 			pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
203 						  bytes);
204 			pm4[i++] = 0xffffffff & dst;
205 			pm4[i++] = 0xffffffff & src;
206 			pm4[i++] = (0xffffffff00000000 & dst) >> 32;
207 			pm4[i++] = (0xffffffff00000000 & src) >> 32;
208 		} else {
209 			pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
210 					       SDMA_COPY_SUB_OPCODE_LINEAR,
211 					       0);
212 			if ( device_handle->info.family_id >= AMDGPU_FAMILY_AI)
213 				pm4[i++] = bytes - 1;
214 			else
215 				pm4[i++] = bytes;
216 			pm4[i++] = 0;
217 			pm4[i++] = 0xffffffff & src;
218 			pm4[i++] = (0xffffffff00000000 & src) >> 32;
219 			pm4[i++] = 0xffffffff & dst;
220 			pm4[i++] = (0xffffffff00000000 & dst) >> 32;
221 		}
222 
223 		size -= bytes;
224 		src += bytes;
225 		dst += bytes;
226 	}
227 
228 	memset(&ib_info, 0, sizeof(ib_info));
229 	ib_info.ib_mc_address = virtual[0];
230 	ib_info.size = i;
231 
232 	memset(&ibs_request, 0, sizeof(ibs_request));
233 	ibs_request.ip_type = AMDGPU_HW_IP_DMA;
234 	ibs_request.ring = 0;
235 	ibs_request.number_of_ibs = 1;
236 	ibs_request.ibs = &ib_info;
237 	ibs_request.fence_info.handle = NULL;
238 
239 	r = clock_gettime(CLOCK_MONOTONIC, &start);
240 	if (r)
241 		return errno;
242 
243 	r = amdgpu_bo_list_create(device_handle, num_buffers, resources, NULL,
244 				  &ibs_request.resources);
245 	if (r)
246 		return r;
247 
248 	for (i = 0; i < count; ++i) {
249 		r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
250 		if (r)
251 			return r;
252 	}
253 
254 	r = amdgpu_bo_list_destroy(ibs_request.resources);
255 	if (r)
256 		return r;
257 
258 	memset(&fence_status, 0, sizeof(fence_status));
259 	fence_status.ip_type = ibs_request.ip_type;
260 	fence_status.ip_instance = 0;
261 	fence_status.ring = ibs_request.ring;
262 	fence_status.context = context_handle;
263 	fence_status.fence = ibs_request.seq_no;
264 	r = amdgpu_cs_query_fence_status(&fence_status,
265 					 AMDGPU_TIMEOUT_INFINITE,
266 					 0, &expired);
267 	if (r)
268 		return r;
269 
270 	r = clock_gettime(CLOCK_MONOTONIC, &stop);
271 	if (r)
272 		return errno;
273 
274 	delta = stop.tv_nsec + stop.tv_sec * 1000000000UL;
275 	delta -= start.tv_nsec + start.tv_sec * 1000000000UL;
276 
277 	fprintf(stdout, "Submitted %u IBs to copy from %u(%" PRIx64 ") to %u(%" PRIx64 ") %" PRIu64 " bytes took %" PRIu64 " usec\n",
278 		count, from, virtual[from], to, virtual[to], copied, delta / 1000);
279 	return 0;
280 }
281 
next_arg(int argc,char ** argv,const char * msg)282 void next_arg(int argc, char **argv, const char *msg)
283 {
284 	optarg = argv[optind++];
285 	if (optind > argc || optarg[0] == '-') {
286 		fprintf(stderr, "%s\n", msg);
287 		exit(EXIT_FAILURE);
288 	}
289 }
290 
parse_size(void)291 uint64_t parse_size(void)
292 {
293 	uint64_t size;
294 	char ext[2];
295 
296 	ext[0] = 0;
297 	if (sscanf(optarg, "%" PRIi64 "%1[kmgKMG]", &size, ext) < 1) {
298 		fprintf(stderr, "Can't parse size arg: %s\n", optarg);
299 		exit(EXIT_FAILURE);
300 	}
301 	switch (ext[0]) {
302 	case 'k':
303 	case 'K':
304 		size *= 1024;
305 		break;
306 	case 'm':
307 	case 'M':
308 		size *= 1024 * 1024;
309 		break;
310 	case 'g':
311 	case 'G':
312 		size *= 1024 * 1024 * 1024;
313 		break;
314 	default:
315 		break;
316 	}
317 	return size;
318 }
319 
main(int argc,char ** argv)320 int main(int argc, char **argv)
321 {
322 	uint32_t major_version, minor_version;
323 	uint32_t domain, from, to, count;
324        	uint64_t size;
325 	int fd, r, c;
326 
327 	fd = amdgpu_open_device();
328        	if (fd < 0) {
329 		perror("Cannot open AMDGPU device");
330 		exit(EXIT_FAILURE);
331 	}
332 
333 	r = amdgpu_device_initialize(fd, &major_version, &minor_version, &device_handle);
334 	if (r) {
335 		fprintf(stderr, "amdgpu_device_initialize returned %d\n", r);
336 		exit(EXIT_FAILURE);
337 	}
338 
339 	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
340 	if (r) {
341 		fprintf(stderr, "amdgpu_cs_ctx_create returned %d\n", r);
342 		exit(EXIT_FAILURE);
343 	}
344 
345 	if (argc == 1) {
346 		fprintf(stderr, usage, argv[0]);
347 		exit(EXIT_FAILURE);
348 	}
349 
350 	r = alloc_bo(AMDGPU_GEM_DOMAIN_GTT, 2ULL * 1024 * 1024);
351 	if (r) {
352 		fprintf(stderr, "Buffer allocation failed with %d\n", r);
353 		exit(EXIT_FAILURE);
354 	}
355 
356 	r = amdgpu_bo_cpu_map(resources[0], (void **)&pm4);
357 	if (r) {
358 		fprintf(stderr, "Buffer mapping failed with %d\n", r);
359 		exit(EXIT_FAILURE);
360 	}
361 
362 	opterr = 0;
363 	while ((c = getopt(argc, argv, options)) != -1) {
364 		switch (c) {
365 		case 'b':
366 			if (!strcmp(optarg, "v"))
367 				domain = AMDGPU_GEM_DOMAIN_VRAM;
368 			else if (!strcmp(optarg, "g"))
369 				domain = AMDGPU_GEM_DOMAIN_GTT;
370 			else if (!strcmp(optarg, "vg"))
371 				domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT;
372 			else {
373 				fprintf(stderr, "Invalid domain: %s\n", optarg);
374 				exit(EXIT_FAILURE);
375 			}
376 			next_arg(argc, argv, "Missing buffer size");
377 			size = parse_size();
378 			if (size < getpagesize()) {
379 				fprintf(stderr, "Buffer size to small %" PRIu64 "\n", size);
380 				exit(EXIT_FAILURE);
381 			}
382 			r = alloc_bo(domain, size);
383 			if (r) {
384 				fprintf(stderr, "Buffer allocation failed with %d\n", r);
385 				exit(EXIT_FAILURE);
386 			}
387 			break;
388 		case 'c':
389 			if (sscanf(optarg, "%u", &from) != 1) {
390 				fprintf(stderr, "Can't parse from buffer: %s\n", optarg);
391 				exit(EXIT_FAILURE);
392 			}
393 			next_arg(argc, argv, "Missing to buffer");
394 			if (sscanf(optarg, "%u", &to) != 1) {
395 				fprintf(stderr, "Can't parse to buffer: %s\n", optarg);
396 				exit(EXIT_FAILURE);
397 			}
398 			next_arg(argc, argv, "Missing size");
399 			size = parse_size();
400 			next_arg(argc, argv, "Missing count");
401 			count = parse_size();
402 			r = submit_ib(from, to, size, count);
403 			if (r) {
404 				fprintf(stderr, "IB submission failed with %d\n", r);
405 				exit(EXIT_FAILURE);
406 			}
407 			break;
408 		case '?':
409 		case 'h':
410 			fprintf(stderr, usage, argv[0]);
411 			exit(EXIT_SUCCESS);
412 		default:
413 			fprintf(stderr, usage, argv[0]);
414 			exit(EXIT_FAILURE);
415 		}
416 	}
417 
418 	return EXIT_SUCCESS;
419 }
420