1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include <libgen.h>
22 #if FD_REPLAY_KGSL
23 #include "../vulkan/msm_kgsl.h"
24 #elif FD_REPLAY_MSM
25 #include <xf86drm.h>
26 #include "drm-uapi/msm_drm.h"
27 #elif FD_REPLAY_WSL
28 #define __KERNEL__
29 #include "drm-uapi/d3dkmthk.h"
30 #endif
31
32 #include <sys/ioctl.h>
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37
38 #include "util/os_time.h"
39 #include "util/rb_tree.h"
40 #include "util/u_vector.h"
41 #include "util/vma.h"
42 #include "buffers.h"
43 #include "cffdec.h"
44 #include "io.h"
45 #include "redump.h"
46 #include "rdutil.h"
47
48 /**
49 * Replay command stream obtained from:
50 * - /sys/kernel/debug/dri/0/rd
51 * - /sys/kernel/debug/dri/0/hangrd
52 * !!! Command stream capture should be done with ALL buffers:
53 * - echo 1 > /sys/module/msm/parameters/rd_full
54 *
55 * Requires kernel with MSM_INFO_SET_IOVA support.
56 * In case userspace IOVAs are not supported, like on KGSL, we have to
57 * pre-allocate a single buffer and hope it always allocated starting
58 * from the same address.
59 *
60 * TODO: Misrendering, would require marking framebuffer images
61 * at each renderpass in order to fetch and decode them.
62 *
63 * Code from Freedreno/Turnip is not re-used here since the relevant
64 * pieces may introduce additional allocations which cannot be allowed
65 * during the replay.
66 *
67 * For how-to see freedreno.rst
68 */
69
70 static const char *exename = NULL;
71
72 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
73
74 static int handle_file(const char *filename, uint32_t first_submit,
75 uint32_t last_submit, uint32_t submit_to_override,
76 uint64_t base_addr, const char *cmdstreamgen);
77
78 static void
print_usage(const char * name,const char * default_csgen)79 print_usage(const char *name, const char *default_csgen)
80 {
81 /* clang-format off */
82 fprintf(stderr, "Usage:\n\n"
83 "\t%s [OPTIONS]... FILE...\n\n"
84 "Options:\n"
85 "\t-e, --exe=NAME - only use cmdstream from named process\n"
86 "\t-o --override=submit - № of the submit to override\n"
87 "\t-g --generator=path - executable which generate cmdstream for override (default: %s)\n"
88 "\t-f --first=submit - first submit № to replay\n"
89 "\t-l --last=submit - last submit № to replay\n"
90 "\t-a --address=address - base iova address on WSL\n"
91 "\t-h, --help - show this message\n"
92 , name, default_csgen);
93 /* clang-format on */
94 exit(2);
95 }
96
97 /* clang-format off */
98 static const struct option opts[] = {
99 { "exe", required_argument, 0, 'e' },
100 { "override", required_argument, 0, 'o' },
101 { "generator", required_argument, 0, 'g' },
102 { "first", required_argument, 0, 'f' },
103 { "last", required_argument, 0, 'l' },
104 { "address", required_argument, 0, 'a' },
105 { "help", no_argument, 0, 'h' },
106 };
107 /* clang-format on */
108
109 int
main(int argc,char ** argv)110 main(int argc, char **argv)
111 {
112 int ret = -1;
113 int c;
114
115 uint32_t submit_to_override = -1;
116 uint32_t first_submit = 0;
117 uint32_t last_submit = -1;
118 uint64_t base_addr = 0;
119
120 char *default_csgen = malloc(PATH_MAX);
121 snprintf(default_csgen, PATH_MAX, "%s/generate_rd", dirname(argv[0]));
122
123 const char *csgen = default_csgen;
124
125 while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
126 switch (c) {
127 case 0:
128 /* option that set a flag, nothing to do */
129 break;
130 case 'e':
131 exename = optarg;
132 break;
133 case 'o':
134 submit_to_override = strtoul(optarg, NULL, 0);
135 break;
136 case 'g':
137 csgen = optarg;
138 break;
139 case 'f':
140 first_submit = strtoul(optarg, NULL, 0);
141 break;
142 case 'l':
143 last_submit = strtoul(optarg, NULL, 0);
144 break;
145 case 'a':
146 base_addr = strtoull(optarg, NULL, 0);
147 break;
148 case 'h':
149 default:
150 print_usage(argv[0], default_csgen);
151 }
152 }
153
154 while (optind < argc) {
155 ret = handle_file(argv[optind], first_submit, last_submit,
156 submit_to_override, base_addr, csgen);
157 if (ret) {
158 fprintf(stderr, "error reading: %s\n", argv[optind]);
159 fprintf(stderr, "continuing..\n");
160 }
161 optind++;
162 }
163
164 if (ret)
165 print_usage(argv[0], default_csgen);
166
167 return ret;
168 }
169
170 struct buffer {
171 struct rb_node node;
172
173 uint32_t gem_handle;
174 uint64_t size;
175 uint64_t iova;
176 void *map;
177
178 bool used;
179 uint32_t flags;
180 };
181
182 struct cmdstream {
183 uint64_t iova;
184 uint64_t size;
185 };
186
187 struct wrbuf {
188 uint64_t iova;
189 uint64_t size;
190 char* name;
191 };
192
193 struct device {
194 int fd;
195
196 struct rb_tree buffers;
197 struct util_vma_heap vma;
198
199 struct u_vector cmdstreams;
200
201 uint64_t shader_log_iova;
202 uint64_t cp_log_iova;
203
204 bool has_set_iova;
205
206 uint32_t va_id;
207 void *va_map;
208 uint64_t va_iova;
209
210 struct u_vector wrbufs;
211
212 #ifdef FD_REPLAY_MSM
213 uint32_t queue_id;
214 #endif
215
216 #ifdef FD_REPLAY_KGSL
217 uint32_t context_id;
218 #endif
219
220 #ifdef FD_REPLAY_WSL
221 struct d3dkmthandle device;
222 struct d3dkmthandle context;
223
224 /* We don't know at the moment a good way to wait for submission to complete
225 * on WSL, so we could use our own fences.
226 */
227 uint64_t fence_iova;
228 uint64_t fence_ib_iova;
229 volatile uint32_t *fence;
230 uint32_t *fence_ib;
231 #endif
232 };
233
234 void buffer_mem_free(struct device *dev, struct buffer *buf);
235
236 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)237 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
238 {
239 const struct buffer *buf1 = (const struct buffer *)n1;
240 const struct buffer *buf2 = (const struct buffer *)n2;
241 /* Note that gpuaddr comparisions can overflow an int: */
242 if (buf1->iova > buf2->iova)
243 return 1;
244 else if (buf1->iova < buf2->iova)
245 return -1;
246 return 0;
247 }
248
249 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)250 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
251 {
252 const struct buffer *buf = (const struct buffer *)node;
253 uint64_t iova = *(uint64_t *)addrptr;
254 if (buf->iova + buf->size <= iova)
255 return -1;
256 else if (buf->iova > iova)
257 return 1;
258 return 0;
259 }
260
261 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)262 device_get_buffer(struct device *dev, uint64_t iova)
263 {
264 if (iova == 0)
265 return NULL;
266 return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
267 rb_buffer_search_cmp);
268 }
269
270 static void
device_mark_buffers(struct device * dev)271 device_mark_buffers(struct device *dev)
272 {
273 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
274 buf->used = false;
275 }
276 }
277
278 static void
device_free_buffers(struct device * dev)279 device_free_buffers(struct device *dev)
280 {
281 rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
282 buffer_mem_free(dev, buf);
283 rb_tree_remove(&dev->buffers, &buf->node);
284 free(buf);
285 }
286 }
287
288 static void
device_print_shader_log(struct device * dev)289 device_print_shader_log(struct device *dev)
290 {
291 struct shader_log {
292 uint64_t cur_iova;
293 union {
294 uint32_t entries_u32[0];
295 float entries_float[0];
296 };
297 };
298
299 if (dev->shader_log_iova != 0)
300 {
301 struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
302 if (buf) {
303 struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
304 uint32_t count = (log->cur_iova - dev->shader_log_iova -
305 offsetof(struct shader_log, entries_u32)) / 4;
306
307 printf("Shader Log Entries: %u\n", count);
308
309 for (uint32_t i = 0; i < count; i++) {
310 printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
311 log->entries_float[i]);
312 }
313
314 printf("========================================\n");
315 }
316 }
317 }
318
319 static void
device_print_cp_log(struct device * dev)320 device_print_cp_log(struct device *dev)
321 {
322 struct cp_log {
323 uint64_t cur_iova;
324 uint64_t tmp;
325 uint64_t first_entry_size;
326 };
327
328 struct cp_log_entry {
329 uint64_t size;
330 uint32_t data[0];
331 };
332
333 if (dev->cp_log_iova == 0)
334 return;
335
336 struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
337 if (!buf)
338 return;
339
340 struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
341 if (log->first_entry_size == 0)
342 return;
343
344 struct cp_log_entry *log_entry =
345 buf->map + offsetof(struct cp_log, first_entry_size);
346 uint32_t idx = 0;
347 while (log_entry->size != 0) {
348 printf("\nCP Log [%u]:\n", idx++);
349 uint32_t dwords = log_entry->size / 4;
350
351 for (uint32_t i = 0; i < dwords; i++) {
352 if (i % 8 == 0)
353 printf("\t");
354 printf("%08x ", log_entry->data[i]);
355 if (i % 8 == 7)
356 printf("\n");
357 }
358 printf("\n");
359
360 log_entry = (void *)log_entry + log_entry->size +
361 offsetof(struct cp_log_entry, data);
362 }
363 }
364
365 static void
device_dump_wrbuf(struct device * dev)366 device_dump_wrbuf(struct device *dev)
367 {
368 if (!u_vector_length(&dev->wrbufs))
369 return;
370
371 char buffer_dir[PATH_MAX];
372 getcwd(buffer_dir, sizeof(buffer_dir));
373 strcat(buffer_dir, "/buffers");
374 rmdir(buffer_dir);
375 mkdir(buffer_dir, 0777);
376
377 struct wrbuf *wrbuf;
378 u_vector_foreach(wrbuf, &dev->wrbufs) {
379 char buffer_path[PATH_MAX];
380 snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
381 FILE *f = fopen(buffer_path, "wb");
382 if (!f) {
383 fprintf(stderr, "Error opening %s\n", buffer_path);
384 goto end_it;
385 }
386
387 struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
388 if (!buf) {
389 fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
390 goto end_it;
391 }
392
393 uint64_t offset = wrbuf->iova - buf->iova;
394 uint64_t size = MIN2(wrbuf->size, buf->size - offset);
395 if (size != wrbuf->size) {
396 fprintf(stderr, "Warning: Clamping buffer %s as it's smaller than expected (0x%" PRIx64 " < 0x%" PRIx64 ")\n", wrbuf->name, size, wrbuf->size);
397 }
398
399 printf("Dumping %s (0x%" PRIx64 " - 0x%" PRIx64 ")\n", wrbuf->name, wrbuf->iova, wrbuf->iova + size);
400
401 fwrite(buf->map + offset, size, 1, f);
402
403 end_it:
404 fclose(f);
405 }
406 }
407
408 #if FD_REPLAY_MSM
409 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)410 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
411 {
412 struct timespec t;
413 clock_gettime(CLOCK_MONOTONIC, &t);
414 tv->tv_sec = t.tv_sec + ns / 1000000000;
415 tv->tv_nsec = t.tv_nsec + ns % 1000000000;
416 }
417
418 static struct device *
device_create(uint64_t base_addr)419 device_create(uint64_t base_addr)
420 {
421 struct device *dev = calloc(sizeof(struct device), 1);
422
423 dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
424 if (dev->fd < 0) {
425 errx(1, "Cannot open MSM fd!");
426 }
427
428 uint64_t va_start, va_size;
429
430 struct drm_msm_param req = {
431 .pipe = MSM_PIPE_3D0,
432 .param = MSM_PARAM_VA_START,
433 };
434
435 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
436 va_start = req.value;
437
438 if (!ret) {
439 req.param = MSM_PARAM_VA_SIZE;
440 ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
441 va_size = req.value;
442
443 dev->has_set_iova = true;
444 }
445
446 if (ret) {
447 printf("MSM_INFO_SET_IOVA is not supported!\n");
448
449 struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
450 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
451 dev->va_id = req_new.handle;
452
453 struct drm_msm_gem_info req_info = {
454 .handle = req_new.handle,
455 .info = MSM_INFO_GET_IOVA,
456 };
457
458 drmCommandWriteRead(dev->fd,
459 DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
460 dev->va_iova = req_info.value;
461
462 struct drm_msm_gem_info req_offset = {
463 .handle = req_new.handle,
464 .info = MSM_INFO_GET_OFFSET,
465 };
466
467 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
468
469 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
470 dev->fd, req_offset.value);
471 if (dev->va_map == MAP_FAILED) {
472 err(1, "mmap failure");
473 }
474
475 va_start = dev->va_iova;
476 va_size = FAKE_ADDRESS_SPACE_SIZE;
477
478 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
479 }
480
481 struct drm_msm_submitqueue req_queue = {
482 .flags = 0,
483 .prio = 0,
484 };
485
486 ret = drmCommandWriteRead(dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req_queue,
487 sizeof(req_queue));
488 if (ret) {
489 err(1, "DRM_MSM_SUBMITQUEUE_NEW failure");
490 }
491
492 dev->queue_id = req_queue.id;
493
494 rb_tree_init(&dev->buffers);
495 util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
496 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
497 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
498
499 return dev;
500 }
501
502 static void
device_submit_cmdstreams(struct device * dev)503 device_submit_cmdstreams(struct device *dev)
504 {
505 if (!u_vector_length(&dev->cmdstreams)) {
506 device_free_buffers(dev);
507 return;
508 }
509
510 struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
511
512 uint32_t idx = 0;
513 struct cmdstream *cmd;
514 u_vector_foreach(cmd, &dev->cmdstreams) {
515 struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
516
517 uint32_t bo_idx = 0;
518 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
519 if (buf == cmdstream_buf)
520 break;
521
522 bo_idx++;
523 }
524
525 if (cmdstream_buf)
526 cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
527
528 struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
529 submit_cmd->type = MSM_SUBMIT_CMD_BUF;
530 submit_cmd->submit_idx = dev->has_set_iova ? bo_idx : 0;
531 if (dev->has_set_iova) {
532 submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
533 } else {
534 submit_cmd->submit_offset = cmd->iova - dev->va_iova;
535 }
536 submit_cmd->size = cmd->size;
537 submit_cmd->pad = 0;
538 submit_cmd->nr_relocs = 0;
539 submit_cmd->relocs = 0;
540
541 idx++;
542 }
543
544 uint32_t bo_count = 0;
545 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
546 if (buf)
547 bo_count++;
548 }
549
550 if (!dev->has_set_iova) {
551 bo_count = 1;
552 }
553
554 struct drm_msm_gem_submit_bo *bo_list =
555 calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
556
557 if (dev->has_set_iova) {
558 uint32_t bo_idx = 0;
559 rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
560 struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
561 submit_bo->handle = buf->gem_handle;
562 submit_bo->flags =
563 buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
564 submit_bo->presumed = buf->iova;
565
566 buf->flags = 0;
567 }
568 } else {
569 bo_list[0].handle = dev->va_id;
570 bo_list[0].flags =
571 MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
572 bo_list[0].presumed = dev->va_iova;
573 }
574
575 struct drm_msm_gem_submit submit_req = {
576 .flags = MSM_PIPE_3D0,
577 .queueid = dev->queue_id,
578 .bos = (uint64_t)(uintptr_t)bo_list,
579 .nr_bos = bo_count,
580 .cmds = (uint64_t)(uintptr_t)cmds,
581 .nr_cmds = u_vector_length(&dev->cmdstreams),
582 .in_syncobjs = 0,
583 .out_syncobjs = 0,
584 .nr_in_syncobjs = 0,
585 .nr_out_syncobjs = 0,
586 .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
587 };
588
589 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
590 sizeof(submit_req));
591
592 if (ret) {
593 err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
594 }
595
596 /* Wait for submission to complete in order to be sure that
597 * freeing buffers would free their VMAs in the kernel.
598 * Makes sure that new allocations won't clash with old ones.
599 */
600 struct drm_msm_wait_fence wait_req = {
601 .fence = submit_req.fence,
602 .queueid = dev->queue_id,
603 };
604 get_abs_timeout(&wait_req.timeout, 1000000000);
605
606 ret =
607 drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
608 if (ret && (ret != -ETIMEDOUT)) {
609 err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
610 }
611
612 u_vector_finish(&dev->cmdstreams);
613 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
614
615 device_print_shader_log(dev);
616 device_print_cp_log(dev);
617
618 device_dump_wrbuf(dev);
619 u_vector_finish(&dev->wrbufs);
620 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
621
622 device_free_buffers(dev);
623 }
624
625 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)626 buffer_mem_alloc(struct device *dev, struct buffer *buf)
627 {
628 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
629 if (!success)
630 errx(1, "Failed to allocate buffer");
631
632 if (!dev->has_set_iova) {
633 uint64_t offset = buf->iova - dev->va_iova;
634 assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
635 buf->map = ((uint8_t*)dev->va_map) + offset;
636 return;
637 }
638
639 {
640 struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
641
642 int ret =
643 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
644 if (ret) {
645 err(1, "DRM_MSM_GEM_NEW failure %d", ret);
646 }
647
648 buf->gem_handle = req.handle;
649 }
650
651 {
652 struct drm_msm_gem_info req = {
653 .handle = buf->gem_handle,
654 .info = MSM_INFO_SET_IOVA,
655 .value = buf->iova,
656 };
657
658 int ret =
659 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
660
661 if (ret) {
662 err(1, "MSM_INFO_SET_IOVA failure %d", ret);
663 }
664 }
665
666 {
667 struct drm_msm_gem_info req = {
668 .handle = buf->gem_handle,
669 .info = MSM_INFO_GET_OFFSET,
670 };
671
672 int ret =
673 drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
674 if (ret) {
675 err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
676 }
677
678 void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
679 dev->fd, req.value);
680 if (map == MAP_FAILED) {
681 err(1, "mmap failure");
682 }
683
684 buf->map = map;
685 }
686 }
687
688 void
buffer_mem_free(struct device * dev,struct buffer * buf)689 buffer_mem_free(struct device *dev, struct buffer *buf)
690 {
691 if (dev->has_set_iova) {
692 munmap(buf->map, buf->size);
693
694 struct drm_msm_gem_info req_iova = {
695 .handle = buf->gem_handle,
696 .info = MSM_INFO_SET_IOVA,
697 .value = 0,
698 };
699
700 int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
701 sizeof(req_iova));
702 if (ret < 0) {
703 err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
704 return;
705 }
706
707 struct drm_gem_close req = {
708 .handle = buf->gem_handle,
709 };
710 drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
711 }
712
713 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
714 }
715
716 #elif FD_REPLAY_KGSL
717 static int
safe_ioctl(int fd,unsigned long request,void * arg)718 safe_ioctl(int fd, unsigned long request, void *arg)
719 {
720 int ret;
721
722 do {
723 ret = ioctl(fd, request, arg);
724 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
725
726 return ret;
727 }
728
729 static struct device *
device_create(uint64_t base_addr)730 device_create(uint64_t base_addr)
731 {
732 struct device *dev = calloc(sizeof(struct device), 1);
733
734 static const char path[] = "/dev/kgsl-3d0";
735
736 dev->fd = open(path, O_RDWR | O_CLOEXEC);
737 if (dev->fd < 0) {
738 errx(1, "Cannot open KGSL fd!");
739 }
740
741 struct kgsl_gpumem_alloc_id req = {
742 .size = FAKE_ADDRESS_SPACE_SIZE,
743 .flags = KGSL_MEMFLAGS_IOCOHERENT,
744 };
745
746 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
747 if (ret) {
748 err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
749 }
750
751 dev->va_id = req.id;
752 dev->va_iova = req.gpuaddr;
753 dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
754 MAP_SHARED, dev->fd, req.id << 12);
755
756 rb_tree_init(&dev->buffers);
757 util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
758 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
759 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
760
761 struct kgsl_drawctxt_create drawctxt_req = {
762 .flags = KGSL_CONTEXT_SAVE_GMEM |
763 KGSL_CONTEXT_NO_GMEM_ALLOC |
764 KGSL_CONTEXT_PREAMBLE,
765 };
766
767 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
768 if (ret) {
769 err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
770 }
771
772 printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
773
774 dev->context_id = drawctxt_req.drawctxt_id;
775
776 return dev;
777 }
778
779 static void
device_submit_cmdstreams(struct device * dev)780 device_submit_cmdstreams(struct device *dev)
781 {
782 if (!u_vector_length(&dev->cmdstreams)) {
783 device_free_buffers(dev);
784 return;
785 }
786
787 struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
788
789 uint32_t idx = 0;
790 struct cmdstream *cmd;
791 u_vector_foreach(cmd, &dev->cmdstreams) {
792 struct kgsl_command_object *submit_cmd = &cmds[idx++];
793 submit_cmd->gpuaddr = cmd->iova;
794 submit_cmd->size = cmd->size;
795 submit_cmd->flags = KGSL_CMDLIST_IB;
796 submit_cmd->id = dev->va_id;
797 }
798
799 struct kgsl_gpu_command submit_req = {
800 .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
801 .cmdlist = (uintptr_t) &cmds,
802 .cmdsize = sizeof(struct kgsl_command_object),
803 .numcmds = u_vector_length(&dev->cmdstreams),
804 .numsyncs = 0,
805 .context_id = dev->context_id,
806 };
807
808 int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
809
810 if (ret) {
811 err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
812 }
813
814 struct kgsl_device_waittimestamp_ctxtid wait = {
815 .context_id = dev->context_id,
816 .timestamp = submit_req.timestamp,
817 .timeout = 3000,
818 };
819
820 ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
821
822 if (ret) {
823 err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
824 }
825
826 u_vector_finish(&dev->cmdstreams);
827 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
828
829 device_print_shader_log(dev);
830 device_print_cp_log(dev);
831
832 device_dump_wrbuf(dev);
833 u_vector_finish(&dev->wrbufs);
834 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
835
836 device_free_buffers(dev);
837 }
838
839 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)840 buffer_mem_alloc(struct device *dev, struct buffer *buf)
841 {
842 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
843 if (!success)
844 errx(1, "Failed to allocate buffer");
845
846 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
847 }
848
849 void
buffer_mem_free(struct device * dev,struct buffer * buf)850 buffer_mem_free(struct device *dev, struct buffer *buf)
851 {
852 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
853 }
854 #else
855
856 static int
safe_ioctl(int fd,unsigned long request,void * arg)857 safe_ioctl(int fd, unsigned long request, void *arg)
858 {
859 int ret;
860
861 do {
862 ret = ioctl(fd, request, arg);
863 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
864
865 return ret;
866 }
867
868 struct alloc_priv_info {
869 __u32 struct_size;
870 char _pad0[4];
871 __u32 unk0; // 1
872 char _pad1[4];
873 __u64 size;
874 __u32 alignment;
875 char _pad2[20];
876 __u64 allocated_size;
877 __u32 unk1; // 1
878 char _pad4[8]; /* offset: 60*/
879 __u32 unk2; // 61
880 char _pad5[76];
881 __u32 unk3; /* offset: 148 */ // 1
882 char _pad6[8];
883 __u32 unk4; /* offset: 160 */ // 1
884 char _pad7[44];
885 __u32 unk5; /* offset: 208 */ // 3
886 char _pad8[16];
887 __u32 size_2; /* offset: 228 */
888 __u32 unk6; // 1
889 __u32 size_3;
890 __u32 size_4;
891 __u32 unk7; /* offset: 244 */ // 1
892 char _pad9[56];
893 };
894 static_assert(sizeof(struct alloc_priv_info) == 304);
895 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
896 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
897 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
898
899 struct submit_priv_ib_info {
900 char _pad5[4];
901 __u32 size_dwords;
902 __u64 iova;
903 char _pad6[8];
904 } __attribute__((packed));
905
906 struct submit_priv_data {
907 __u32 magic0;
908 char _pad0[4];
909 __u32 struct_size;
910 char _pad1[4];
911 /* It seems that priv data can have several sub-datas
912 * cmdbuf is one of them, after it there is another 8 byte struct
913 * without anything useful in it. That second data doesn't seem
914 * important for replaying.
915 */
916 __u32 datas_count;
917 char _pad2[32];
918 struct {
919 __u32 magic1;
920 __u32 data_size;
921
922 struct {
923 __u32 unk1;
924 __u32 cmdbuf_size;
925 char _pad3[32];
926 __u32 ib_count;
927 char _pad4[36];
928
929 struct submit_priv_ib_info ibs[];
930 } cmdbuf;
931 } data0;
932
933 // unsigned char magic2[8];
934 } __attribute__((packed));
935 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
936 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
937
938 static struct device *
device_create(uint64_t base_addr)939 device_create(uint64_t base_addr)
940 {
941 struct device *dev = calloc(sizeof(struct device), 1);
942
943 static const char path[] = "/dev/dxg";
944
945 dev->fd = open(path, O_RDWR | O_CLOEXEC);
946 if (dev->fd < 0) {
947 errx(1, "Cannot open /dev/dxg fd");
948 }
949
950 struct d3dkmt_adapterinfo adapters[1];
951 struct d3dkmt_enumadapters3 enum_adapters = {
952 .adapter_count = 1,
953 .adapters = adapters,
954 };
955 int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
956 if (ret) {
957 errx(1, "LX_DXENUMADAPTERS3 failure");
958 }
959
960 if (enum_adapters.adapter_count == 0) {
961 errx(1, "No adapters found");
962 }
963
964 struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
965
966 struct d3dkmt_openadapterfromluid open_adapter = {
967 .adapter_luid = adapter_luid,
968 };
969 ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
970 if (ret) {
971 errx(1, "LX_DXOPENADAPTERFROMLUID failure");
972 }
973
974 struct d3dkmthandle adapter = open_adapter.adapter_handle;
975
976 struct d3dkmt_createdevice create_device = {
977 .adapter = adapter,
978 };
979 ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
980 if (ret) {
981 errx(1, "LX_DXCREATEDEVICE failure");
982 }
983
984 struct d3dkmthandle device = create_device.device;
985 dev->device = device;
986
987 unsigned char create_context_priv_data[] = {
988 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
989 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
990 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
991 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
992 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
993 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
994 };
995
996 struct d3dkmt_createcontextvirtual create_context = {
997 .device = device,
998 .node_ordinal = 0,
999 .engine_affinity = 1,
1000 .priv_drv_data = create_context_priv_data,
1001 .priv_drv_data_size = sizeof(create_context_priv_data),
1002 .client_hint = 16,
1003 };
1004 ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
1005 if (ret) {
1006 errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
1007 }
1008
1009 dev->context = create_context.context;
1010
1011 struct d3dkmt_createpagingqueue create_paging_queue = {
1012 .device = device,
1013 .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
1014 .physical_adapter_index = 0,
1015 };
1016 ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
1017 if (ret) {
1018 errx(1, "LX_DXCREATEPAGINGQUEUE failure");
1019 }
1020 struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
1021
1022
1023 uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
1024 struct alloc_priv_info priv_alloc_info = {
1025 .struct_size = sizeof(struct alloc_priv_info),
1026 .unk0 = 1,
1027 .size = alloc_size,
1028 .alignment = 4096,
1029 .unk1 = 1,
1030 .unk2 = 61,
1031 .unk3 = 1,
1032 .unk4 = 1,
1033 .unk5 = 3,
1034 .size_2 = alloc_size,
1035 .unk6 = 1,
1036 .size_3 = alloc_size,
1037 .size_4 = alloc_size,
1038 .unk7 = 1,
1039 };
1040
1041 struct d3dddi_allocationinfo2 alloc_info = {
1042 .priv_drv_data = &priv_alloc_info,
1043 .priv_drv_data_size = sizeof(struct alloc_priv_info),
1044 };
1045
1046 struct d3dkmt_createallocation create_allocation = {
1047 .device = device,
1048 .alloc_count = 1,
1049 .allocation_info = &alloc_info,
1050 };
1051 ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1052 if (ret) {
1053 errx(1, "LX_DXCREATEALLOCATION failure");
1054 }
1055
1056 assert(priv_alloc_info.allocated_size == alloc_size);
1057
1058 struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1059 .paging_queue = paging_queue,
1060 .base_address = base_addr,
1061 .maximum_address = 18446744073709551615ull,
1062 .allocation = create_allocation.allocation_info[0].allocation,
1063 .size_in_pages = MAX2(alloc_size / 4096, 1),
1064 .protection = {
1065 .write = 1,
1066 .execute = 1,
1067 },
1068 };
1069 ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1070 if (ret != 259) {
1071 errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1072 }
1073
1074 __u32 priority = 0;
1075 struct d3dddi_makeresident make_resident = {
1076 .paging_queue = paging_queue,
1077 .alloc_count = 1,
1078 .allocation_list = &create_allocation.allocation_info[0].allocation,
1079 .priority_list = &priority,
1080 };
1081 ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1082 if (ret != 259) {
1083 errx(1, "LX_DXMAKERESIDENT failure");
1084 }
1085
1086 struct d3dkmt_lock2 lock = {
1087 .device = device,
1088 .allocation = create_allocation.allocation_info[0].allocation,
1089 };
1090 ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1091 if (ret) {
1092 errx(1, "LX_DXLOCK2 failure");
1093 }
1094
1095 dev->va_iova = map_virtual_address.virtual_address;
1096 dev->va_map = lock.data;
1097
1098 rb_tree_init(&dev->buffers);
1099 util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1100 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1101 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1102
1103 printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1104
1105 uint64_t hole_size = 4096;
1106 dev->vma.alloc_high = true;
1107 dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1108 dev->fence_ib_iova = dev->fence_iova + 8;
1109 dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1110 dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1111 dev->vma.alloc_high = false;
1112
1113 return dev;
1114 }
1115
1116 static void
device_submit_cmdstreams(struct device * dev)1117 device_submit_cmdstreams(struct device *dev)
1118 {
1119 if (!u_vector_length(&dev->cmdstreams)) {
1120 device_free_buffers(dev);
1121 return;
1122 }
1123
1124 uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1125
1126 uint32_t priv_data_size =
1127 sizeof(struct submit_priv_data) +
1128 cmdstream_count * sizeof(struct submit_priv_ib_info);
1129
1130 struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1131 priv_data->magic0 = 0xccaabbee;
1132 priv_data->struct_size = priv_data_size;
1133 priv_data->datas_count = 1;
1134
1135 priv_data->data0.magic1 = 0xfadcab02;
1136 priv_data->data0.data_size =
1137 sizeof(priv_data->data0) +
1138 cmdstream_count * sizeof(struct submit_priv_ib_info);
1139 priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1140 priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1141 cmdstream_count * sizeof(struct submit_priv_ib_info);
1142 priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1143
1144 struct cmdstream *cmd;
1145 uint32_t idx = 0;
1146 u_vector_foreach(cmd, &dev->cmdstreams) {
1147 priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1148 priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1149 idx++;
1150 }
1151
1152 priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1153 priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1154
1155 *dev->fence = 0x00000000;
1156 dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1157 dev->fence_ib[1] = dev->fence_iova;
1158 dev->fence_ib[2] = dev->fence_iova >> 32;
1159 dev->fence_ib[3] = 0xababfcfc;
1160
1161 // Fill second (empty) data block
1162 // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1163 // magic_end[0] = 0xfadcab00;
1164 // magic_end[1] = 0x00000008;
1165
1166 struct d3dkmt_submitcommand submission = {
1167 .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1168 .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1169 .broadcast_context_count = 1,
1170 .broadcast_context[0] = dev->context,
1171 .priv_drv_data_size = priv_data_size,
1172 .priv_drv_data = priv_data,
1173 };
1174
1175 int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1176 if (ret) {
1177 errx(1, "LX_DXSUBMITCOMMAND failure");
1178 }
1179
1180 free(priv_data);
1181
1182 u_vector_finish(&dev->cmdstreams);
1183 u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1184
1185 // TODO: better way to wait
1186 for (unsigned i = 0; i < 1000; i++) {
1187 usleep(1000);
1188 if (*dev->fence != 0)
1189 break;
1190 }
1191 if (*dev->fence == 0) {
1192 errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1193 }
1194
1195 device_print_shader_log(dev);
1196 device_print_cp_log(dev);
1197
1198 device_dump_wrbuf(dev);
1199 u_vector_finish(&dev->wrbufs);
1200 u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1201
1202 device_free_buffers(dev);
1203 }
1204
1205 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1206 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1207 {
1208 bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1209 if (!success)
1210 errx(1, "Failed to allocate buffer");
1211
1212 buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1213 }
1214
1215 void
buffer_mem_free(struct device * dev,struct buffer * buf)1216 buffer_mem_free(struct device *dev, struct buffer *buf)
1217 {
1218 util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1219 }
1220
1221 #endif
1222
1223 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1224 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1225 void *hostptr)
1226 {
1227 struct buffer *buf = device_get_buffer(dev, iova);
1228
1229 if (!buf) {
1230 buf = calloc(sizeof(struct buffer), 1);
1231 buf->iova = iova;
1232 buf->size = size;
1233
1234 rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1235
1236 buffer_mem_alloc(dev, buf);
1237 } else if (buf->size != size) {
1238 buffer_mem_free(dev, buf);
1239 buf->size = size;
1240 buffer_mem_alloc(dev, buf);
1241 }
1242
1243 memcpy(buf->map, hostptr, size);
1244
1245 buf->used = true;
1246 }
1247
1248 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1249 override_cmdstream(struct device *dev, struct cmdstream *cs,
1250 const char *cmdstreamgen)
1251 {
1252 #if FD_REPLAY_KGSL
1253 static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1254 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1255 static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1256 #endif
1257
1258
1259 /* Find a free space for the new cmdstreams and resources we will use
1260 * when overriding existing cmdstream.
1261 */
1262 uint64_t hole_size = util_vma_heap_get_max_free_continuous_size(&dev->vma);
1263 uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 1);
1264 util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1265
1266 char cmd[2048];
1267 snprintf(cmd, sizeof(cmd),
1268 "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1269 hole_iova, hole_size, tmpfilename);
1270
1271 printf("generating cmdstream '%s'\n", cmd);
1272
1273 int ret = system(cmd);
1274 if (ret) {
1275 fprintf(stderr, "Error executing %s\n", cmd);
1276 return -1;
1277 }
1278
1279 struct io *io;
1280 struct rd_parsed_section ps = {0};
1281
1282 io = io_open(tmpfilename);
1283 if (!io) {
1284 fprintf(stderr, "could not open: %s\n", tmpfilename);
1285 return -1;
1286 }
1287
1288 struct {
1289 unsigned int len;
1290 uint64_t gpuaddr;
1291 } gpuaddr = {0};
1292
1293 while (parse_rd_section(io, &ps)) {
1294 switch (ps.type) {
1295 case RD_GPUADDR:
1296 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1297 /* no-op */
1298 break;
1299 case RD_BUFFER_CONTENTS:
1300 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1301 ps.buf = NULL;
1302 break;
1303 case RD_CMDSTREAM_ADDR: {
1304 unsigned int sizedwords;
1305 uint64_t gpuaddr;
1306 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1307 printf("override cmdstream: %d dwords\n", sizedwords);
1308
1309 cs->iova = gpuaddr;
1310 cs->size = sizedwords * sizeof(uint32_t);
1311 break;
1312 }
1313 case RD_SHADER_LOG_BUFFER: {
1314 unsigned int sizedwords;
1315 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1316 break;
1317 }
1318 case RD_CP_LOG_BUFFER: {
1319 unsigned int sizedwords;
1320 parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1321 break;
1322 }
1323 case RD_WRBUFFER: {
1324 struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1325 uint64_t *p = (uint64_t *)ps.buf;
1326 wrbuf->iova = p[0];
1327 wrbuf->size = p[1];
1328 bool clear = p[2];
1329 int name_len = ps.sz - (3 * sizeof(uint64_t));
1330 wrbuf->name = calloc(sizeof(char), name_len);
1331 memcpy(wrbuf->name, (char*)(p + 3), name_len); // includes null terminator
1332
1333 if (clear) {
1334 struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
1335 assert(buf);
1336
1337 uint64_t offset = wrbuf->iova - buf->iova;
1338 uint64_t end = MIN2(offset + wrbuf->size, buf->size);
1339 while (offset < end) {
1340 static const uint64_t clear_value = 0xdeadbeefdeadbeef;
1341 memcpy(buf->map + offset, &clear_value,
1342 MIN2(sizeof(clear_value), end - offset));
1343 offset += sizeof(clear_value);
1344 }
1345 }
1346
1347 break;
1348 }
1349 default:
1350 break;
1351 }
1352 }
1353
1354 io_close(io);
1355 if (ps.ret < 0) {
1356 fprintf(stderr, "corrupt file %s\n", tmpfilename);
1357 }
1358
1359 return ps.ret;
1360 }
1361
1362 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1363 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1364 uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1365 {
1366 struct io *io;
1367 int submit = 0;
1368 bool skip = false;
1369 bool need_submit = false;
1370 struct rd_parsed_section ps = {0};
1371
1372 printf("Reading %s...\n", filename);
1373
1374 if (!strcmp(filename, "-"))
1375 io = io_openfd(0);
1376 else
1377 io = io_open(filename);
1378
1379 if (!io) {
1380 fprintf(stderr, "could not open: %s\n", filename);
1381 return -1;
1382 }
1383
1384 struct device *dev = device_create(base_addr);
1385
1386 struct {
1387 unsigned int len;
1388 uint64_t gpuaddr;
1389 } gpuaddr = {0};
1390
1391 while (parse_rd_section(io, &ps)) {
1392 switch (ps.type) {
1393 case RD_TEST:
1394 case RD_VERT_SHADER:
1395 case RD_FRAG_SHADER:
1396 /* no-op */
1397 break;
1398 case RD_CMD:
1399 skip = false;
1400 if (exename) {
1401 skip |= (strstr(ps.buf, exename) != ps.buf);
1402 } else {
1403 skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1404 skip |= (strstr(ps.buf, "chrome") == ps.buf);
1405 skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1406 skip |= ((char *)ps.buf)[0] == 'X';
1407 }
1408 break;
1409
1410 case RD_GPUADDR:
1411 if (need_submit) {
1412 need_submit = false;
1413 device_submit_cmdstreams(dev);
1414 }
1415
1416 parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1417 /* no-op */
1418 break;
1419 case RD_BUFFER_CONTENTS:
1420 /* TODO: skip buffer uploading and even reading if this buffer
1421 * is used for submit outside of [first_submit, last_submit]
1422 * range. A set of buffers is shared between several cmdstreams,
1423 * so we'd have to find starting from which RD_CMD to upload
1424 * the buffers.
1425 */
1426 upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1427 break;
1428 case RD_CMDSTREAM_ADDR: {
1429 unsigned int sizedwords;
1430 uint64_t gpuaddr;
1431 parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1432
1433 bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1434 printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1435 submit, sizedwords);
1436
1437 if (add_submit) {
1438 struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1439
1440 if (submit == submit_to_override) {
1441 if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1442 break;
1443 } else {
1444 cs->iova = gpuaddr;
1445 cs->size = sizedwords * sizeof(uint32_t);
1446 }
1447 }
1448
1449 need_submit = true;
1450
1451 submit++;
1452 break;
1453 }
1454 case RD_GPU_ID: {
1455 uint32_t gpu_id = parse_gpu_id(ps.buf);
1456 if (gpu_id)
1457 printf("gpuid: %d\n", gpu_id);
1458 break;
1459 }
1460 case RD_CHIP_ID: {
1461 uint64_t chip_id = parse_chip_id(ps.buf);
1462 printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1463 break;
1464 }
1465 default:
1466 break;
1467 }
1468 }
1469
1470 if (need_submit)
1471 device_submit_cmdstreams(dev);
1472
1473 close(dev->fd);
1474
1475 io_close(io);
1476 fflush(stdout);
1477
1478 if (ps.ret < 0) {
1479 printf("corrupt file\n");
1480 }
1481 return 0;
1482 }
1483