xref: /aosp_15_r20/external/mesa3d/src/freedreno/decode/replay.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <inttypes.h>
13 #include <signal.h>
14 #include <stdarg.h>
15 #include <stdbool.h>
16 #include <stdint.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include <libgen.h>
22 #if FD_REPLAY_KGSL
23 #include "../vulkan/msm_kgsl.h"
24 #elif FD_REPLAY_MSM
25 #include <xf86drm.h>
26 #include "drm-uapi/msm_drm.h"
27 #elif FD_REPLAY_WSL
28 #define __KERNEL__
29 #include "drm-uapi/d3dkmthk.h"
30 #endif
31 
32 #include <sys/ioctl.h>
33 #include <sys/mman.h>
34 #include <sys/stat.h>
35 #include <sys/types.h>
36 #include <sys/wait.h>
37 
38 #include "util/os_time.h"
39 #include "util/rb_tree.h"
40 #include "util/u_vector.h"
41 #include "util/vma.h"
42 #include "buffers.h"
43 #include "cffdec.h"
44 #include "io.h"
45 #include "redump.h"
46 #include "rdutil.h"
47 
48 /**
49  * Replay command stream obtained from:
50  * - /sys/kernel/debug/dri/0/rd
51  * - /sys/kernel/debug/dri/0/hangrd
52  * !!! Command stream capture should be done with ALL buffers:
53  * - echo 1 > /sys/module/msm/parameters/rd_full
54  *
55  * Requires kernel with MSM_INFO_SET_IOVA support.
56  * In case userspace IOVAs are not supported, like on KGSL, we have to
57  * pre-allocate a single buffer and hope it always allocated starting
58  * from the same address.
59  *
60  * TODO: Misrendering, would require marking framebuffer images
61  *       at each renderpass in order to fetch and decode them.
62  *
63  * Code from Freedreno/Turnip is not re-used here since the relevant
64  * pieces may introduce additional allocations which cannot be allowed
65  * during the replay.
66  *
67  * For how-to see freedreno.rst
68  */
69 
70 static const char *exename = NULL;
71 
72 static const uint64_t FAKE_ADDRESS_SPACE_SIZE = 1024 * 1024 * 1024;
73 
74 static int handle_file(const char *filename, uint32_t first_submit,
75                        uint32_t last_submit, uint32_t submit_to_override,
76                        uint64_t base_addr, const char *cmdstreamgen);
77 
78 static void
print_usage(const char * name,const char * default_csgen)79 print_usage(const char *name, const char *default_csgen)
80 {
81    /* clang-format off */
82    fprintf(stderr, "Usage:\n\n"
83            "\t%s [OPTIONS]... FILE...\n\n"
84            "Options:\n"
85            "\t-e, --exe=NAME         - only use cmdstream from named process\n"
86            "\t-o  --override=submit  - № of the submit to override\n"
87            "\t-g  --generator=path   - executable which generate cmdstream for override (default: %s)\n"
88            "\t-f  --first=submit     - first submit № to replay\n"
89            "\t-l  --last=submit      - last submit № to replay\n"
90            "\t-a  --address=address  - base iova address on WSL\n"
91            "\t-h, --help             - show this message\n"
92            , name, default_csgen);
93    /* clang-format on */
94    exit(2);
95 }
96 
97 /* clang-format off */
98 static const struct option opts[] = {
99       { "exe",       required_argument, 0, 'e' },
100       { "override",  required_argument, 0, 'o' },
101       { "generator", required_argument, 0, 'g' },
102       { "first",     required_argument, 0, 'f' },
103       { "last",      required_argument, 0, 'l' },
104       { "address",   required_argument, 0, 'a' },
105       { "help",      no_argument,       0, 'h' },
106 };
107 /* clang-format on */
108 
109 int
main(int argc,char ** argv)110 main(int argc, char **argv)
111 {
112    int ret = -1;
113    int c;
114 
115    uint32_t submit_to_override = -1;
116    uint32_t first_submit = 0;
117    uint32_t last_submit = -1;
118    uint64_t base_addr = 0;
119 
120    char *default_csgen = malloc(PATH_MAX);
121    snprintf(default_csgen, PATH_MAX, "%s/generate_rd", dirname(argv[0]));
122 
123    const char *csgen = default_csgen;
124 
125    while ((c = getopt_long(argc, argv, "e:o:g:f:l:a:h", opts, NULL)) != -1) {
126       switch (c) {
127       case 0:
128          /* option that set a flag, nothing to do */
129          break;
130       case 'e':
131          exename = optarg;
132          break;
133       case 'o':
134          submit_to_override = strtoul(optarg, NULL, 0);
135          break;
136       case 'g':
137          csgen = optarg;
138          break;
139       case 'f':
140          first_submit = strtoul(optarg, NULL, 0);
141          break;
142       case 'l':
143          last_submit = strtoul(optarg, NULL, 0);
144          break;
145       case 'a':
146          base_addr = strtoull(optarg, NULL, 0);
147          break;
148       case 'h':
149       default:
150          print_usage(argv[0], default_csgen);
151       }
152    }
153 
154    while (optind < argc) {
155       ret = handle_file(argv[optind], first_submit, last_submit,
156                         submit_to_override, base_addr, csgen);
157       if (ret) {
158          fprintf(stderr, "error reading: %s\n", argv[optind]);
159          fprintf(stderr, "continuing..\n");
160       }
161       optind++;
162    }
163 
164    if (ret)
165       print_usage(argv[0], default_csgen);
166 
167    return ret;
168 }
169 
170 struct buffer {
171    struct rb_node node;
172 
173    uint32_t gem_handle;
174    uint64_t size;
175    uint64_t iova;
176    void *map;
177 
178    bool used;
179    uint32_t flags;
180 };
181 
182 struct cmdstream {
183    uint64_t iova;
184    uint64_t size;
185 };
186 
187 struct wrbuf {
188    uint64_t iova;
189    uint64_t size;
190    char* name;
191 };
192 
193 struct device {
194    int fd;
195 
196    struct rb_tree buffers;
197    struct util_vma_heap vma;
198 
199    struct u_vector cmdstreams;
200 
201    uint64_t shader_log_iova;
202    uint64_t cp_log_iova;
203 
204    bool has_set_iova;
205 
206    uint32_t va_id;
207    void *va_map;
208    uint64_t va_iova;
209 
210    struct u_vector wrbufs;
211 
212 #ifdef FD_REPLAY_MSM
213    uint32_t queue_id;
214 #endif
215 
216 #ifdef FD_REPLAY_KGSL
217    uint32_t context_id;
218 #endif
219 
220 #ifdef FD_REPLAY_WSL
221    struct d3dkmthandle device;
222    struct d3dkmthandle context;
223 
224    /* We don't know at the moment a good way to wait for submission to complete
225     * on WSL, so we could use our own fences.
226     */
227    uint64_t fence_iova;
228    uint64_t fence_ib_iova;
229    volatile uint32_t *fence;
230    uint32_t *fence_ib;
231 #endif
232 };
233 
234 void buffer_mem_free(struct device *dev, struct buffer *buf);
235 
236 static int
rb_buffer_insert_cmp(const struct rb_node * n1,const struct rb_node * n2)237 rb_buffer_insert_cmp(const struct rb_node *n1, const struct rb_node *n2)
238 {
239    const struct buffer *buf1 = (const struct buffer *)n1;
240    const struct buffer *buf2 = (const struct buffer *)n2;
241    /* Note that gpuaddr comparisions can overflow an int: */
242    if (buf1->iova > buf2->iova)
243       return 1;
244    else if (buf1->iova < buf2->iova)
245       return -1;
246    return 0;
247 }
248 
249 static int
rb_buffer_search_cmp(const struct rb_node * node,const void * addrptr)250 rb_buffer_search_cmp(const struct rb_node *node, const void *addrptr)
251 {
252    const struct buffer *buf = (const struct buffer *)node;
253    uint64_t iova = *(uint64_t *)addrptr;
254    if (buf->iova + buf->size <= iova)
255       return -1;
256    else if (buf->iova > iova)
257       return 1;
258    return 0;
259 }
260 
261 static struct buffer *
device_get_buffer(struct device * dev,uint64_t iova)262 device_get_buffer(struct device *dev, uint64_t iova)
263 {
264    if (iova == 0)
265       return NULL;
266    return (struct buffer *)rb_tree_search(&dev->buffers, &iova,
267                                           rb_buffer_search_cmp);
268 }
269 
270 static void
device_mark_buffers(struct device * dev)271 device_mark_buffers(struct device *dev)
272 {
273    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
274       buf->used = false;
275    }
276 }
277 
278 static void
device_free_buffers(struct device * dev)279 device_free_buffers(struct device *dev)
280 {
281    rb_tree_foreach_safe (struct buffer, buf, &dev->buffers, node) {
282       buffer_mem_free(dev, buf);
283       rb_tree_remove(&dev->buffers, &buf->node);
284       free(buf);
285    }
286 }
287 
288 static void
device_print_shader_log(struct device * dev)289 device_print_shader_log(struct device *dev)
290 {
291    struct shader_log {
292       uint64_t cur_iova;
293       union {
294          uint32_t entries_u32[0];
295          float entries_float[0];
296       };
297    };
298 
299    if (dev->shader_log_iova != 0)
300    {
301       struct buffer *buf = device_get_buffer(dev, dev->shader_log_iova);
302       if (buf) {
303          struct shader_log *log = buf->map + (dev->shader_log_iova - buf->iova);
304          uint32_t count = (log->cur_iova - dev->shader_log_iova -
305                            offsetof(struct shader_log, entries_u32)) / 4;
306 
307          printf("Shader Log Entries: %u\n", count);
308 
309          for (uint32_t i = 0; i < count; i++) {
310             printf("[%u] %08x %.4f\n", i, log->entries_u32[i],
311                    log->entries_float[i]);
312          }
313 
314          printf("========================================\n");
315       }
316    }
317 }
318 
319 static void
device_print_cp_log(struct device * dev)320 device_print_cp_log(struct device *dev)
321 {
322    struct cp_log {
323       uint64_t cur_iova;
324       uint64_t tmp;
325       uint64_t first_entry_size;
326    };
327 
328    struct cp_log_entry {
329       uint64_t size;
330       uint32_t data[0];
331    };
332 
333    if (dev->cp_log_iova == 0)
334       return;
335 
336    struct buffer *buf = device_get_buffer(dev, dev->cp_log_iova);
337    if (!buf)
338       return;
339 
340    struct cp_log *log = buf->map + (dev->cp_log_iova - buf->iova);
341    if (log->first_entry_size == 0)
342       return;
343 
344    struct cp_log_entry *log_entry =
345       buf->map + offsetof(struct cp_log, first_entry_size);
346    uint32_t idx = 0;
347    while (log_entry->size != 0) {
348       printf("\nCP Log [%u]:\n", idx++);
349       uint32_t dwords = log_entry->size / 4;
350 
351       for (uint32_t i = 0; i < dwords; i++) {
352          if (i % 8 == 0)
353             printf("\t");
354          printf("%08x ", log_entry->data[i]);
355          if (i % 8 == 7)
356             printf("\n");
357       }
358       printf("\n");
359 
360       log_entry = (void *)log_entry + log_entry->size +
361                   offsetof(struct cp_log_entry, data);
362    }
363 }
364 
365 static void
device_dump_wrbuf(struct device * dev)366 device_dump_wrbuf(struct device *dev)
367 {
368    if (!u_vector_length(&dev->wrbufs))
369       return;
370 
371    char buffer_dir[PATH_MAX];
372    getcwd(buffer_dir, sizeof(buffer_dir));
373    strcat(buffer_dir, "/buffers");
374    rmdir(buffer_dir);
375    mkdir(buffer_dir, 0777);
376 
377    struct wrbuf *wrbuf;
378    u_vector_foreach(wrbuf, &dev->wrbufs) {
379       char buffer_path[PATH_MAX];
380       snprintf(buffer_path, sizeof(buffer_path), "%s/%s", buffer_dir, wrbuf->name);
381       FILE *f = fopen(buffer_path, "wb");
382       if (!f) {
383          fprintf(stderr, "Error opening %s\n", buffer_path);
384          goto end_it;
385       }
386 
387       struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
388       if (!buf) {
389          fprintf(stderr, "Error getting buffer for %s\n", buffer_path);
390          goto end_it;
391       }
392 
393       uint64_t offset = wrbuf->iova - buf->iova;
394       uint64_t size = MIN2(wrbuf->size, buf->size - offset);
395       if (size != wrbuf->size) {
396          fprintf(stderr, "Warning: Clamping buffer %s as it's smaller than expected (0x%" PRIx64 " < 0x%" PRIx64 ")\n", wrbuf->name, size, wrbuf->size);
397       }
398 
399       printf("Dumping %s (0x%" PRIx64 " - 0x%" PRIx64 ")\n", wrbuf->name, wrbuf->iova, wrbuf->iova + size);
400 
401       fwrite(buf->map + offset, size, 1, f);
402 
403       end_it:
404       fclose(f);
405    }
406 }
407 
408 #if FD_REPLAY_MSM
409 static inline void
get_abs_timeout(struct drm_msm_timespec * tv,uint64_t ns)410 get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
411 {
412    struct timespec t;
413    clock_gettime(CLOCK_MONOTONIC, &t);
414    tv->tv_sec = t.tv_sec + ns / 1000000000;
415    tv->tv_nsec = t.tv_nsec + ns % 1000000000;
416 }
417 
418 static struct device *
device_create(uint64_t base_addr)419 device_create(uint64_t base_addr)
420 {
421    struct device *dev = calloc(sizeof(struct device), 1);
422 
423    dev->fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);
424    if (dev->fd < 0) {
425       errx(1, "Cannot open MSM fd!");
426    }
427 
428    uint64_t va_start, va_size;
429 
430    struct drm_msm_param req = {
431       .pipe = MSM_PIPE_3D0,
432       .param = MSM_PARAM_VA_START,
433    };
434 
435    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
436    va_start = req.value;
437 
438    if (!ret) {
439       req.param = MSM_PARAM_VA_SIZE;
440       ret = drmCommandWriteRead(dev->fd, DRM_MSM_GET_PARAM, &req, sizeof(req));
441       va_size = req.value;
442 
443       dev->has_set_iova = true;
444    }
445 
446    if (ret) {
447       printf("MSM_INFO_SET_IOVA is not supported!\n");
448 
449       struct drm_msm_gem_new req_new = {.size = FAKE_ADDRESS_SPACE_SIZE, .flags = MSM_BO_CACHED_COHERENT};
450       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req_new, sizeof(req_new));
451       dev->va_id = req_new.handle;
452 
453       struct drm_msm_gem_info req_info = {
454          .handle = req_new.handle,
455          .info = MSM_INFO_GET_IOVA,
456       };
457 
458       drmCommandWriteRead(dev->fd,
459                                  DRM_MSM_GEM_INFO, &req_info, sizeof(req_info));
460       dev->va_iova = req_info.value;
461 
462       struct drm_msm_gem_info req_offset = {
463          .handle = req_new.handle,
464          .info = MSM_INFO_GET_OFFSET,
465       };
466 
467       drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_offset, sizeof(req_offset));
468 
469       dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,
470                        dev->fd, req_offset.value);
471       if (dev->va_map == MAP_FAILED) {
472          err(1, "mmap failure");
473       }
474 
475       va_start = dev->va_iova;
476       va_size = FAKE_ADDRESS_SPACE_SIZE;
477 
478       printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
479    }
480 
481    struct drm_msm_submitqueue req_queue = {
482       .flags = 0,
483       .prio = 0,
484    };
485 
486    ret = drmCommandWriteRead(dev->fd, DRM_MSM_SUBMITQUEUE_NEW, &req_queue,
487                              sizeof(req_queue));
488    if (ret) {
489       err(1, "DRM_MSM_SUBMITQUEUE_NEW failure");
490    }
491 
492    dev->queue_id = req_queue.id;
493 
494    rb_tree_init(&dev->buffers);
495    util_vma_heap_init(&dev->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
496    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
497    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
498 
499    return dev;
500 }
501 
502 static void
device_submit_cmdstreams(struct device * dev)503 device_submit_cmdstreams(struct device *dev)
504 {
505    if (!u_vector_length(&dev->cmdstreams)) {
506       device_free_buffers(dev);
507       return;
508    }
509 
510    struct drm_msm_gem_submit_cmd cmds[u_vector_length(&dev->cmdstreams)];
511 
512    uint32_t idx = 0;
513    struct cmdstream *cmd;
514    u_vector_foreach(cmd, &dev->cmdstreams) {
515       struct buffer *cmdstream_buf = device_get_buffer(dev, cmd->iova);
516 
517       uint32_t bo_idx = 0;
518       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
519          if (buf == cmdstream_buf)
520             break;
521 
522          bo_idx++;
523       }
524 
525       if (cmdstream_buf)
526          cmdstream_buf->flags = MSM_SUBMIT_BO_DUMP;
527 
528       struct drm_msm_gem_submit_cmd *submit_cmd = &cmds[idx];
529       submit_cmd->type = MSM_SUBMIT_CMD_BUF;
530       submit_cmd->submit_idx = dev->has_set_iova ? bo_idx : 0;
531       if (dev->has_set_iova) {
532          submit_cmd->submit_offset = cmd->iova - cmdstream_buf->iova;
533       } else {
534          submit_cmd->submit_offset = cmd->iova - dev->va_iova;
535       }
536       submit_cmd->size = cmd->size;
537       submit_cmd->pad = 0;
538       submit_cmd->nr_relocs = 0;
539       submit_cmd->relocs = 0;
540 
541       idx++;
542    }
543 
544    uint32_t bo_count = 0;
545    rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
546       if (buf)
547          bo_count++;
548    }
549 
550    if (!dev->has_set_iova) {
551       bo_count = 1;
552    }
553 
554    struct drm_msm_gem_submit_bo *bo_list =
555       calloc(sizeof(struct drm_msm_gem_submit_bo), bo_count);
556 
557    if (dev->has_set_iova) {
558       uint32_t bo_idx = 0;
559       rb_tree_foreach (struct buffer, buf, &dev->buffers, node) {
560          struct drm_msm_gem_submit_bo *submit_bo = &bo_list[bo_idx++];
561          submit_bo->handle = buf->gem_handle;
562          submit_bo->flags =
563             buf->flags | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
564          submit_bo->presumed = buf->iova;
565 
566          buf->flags = 0;
567       }
568    } else {
569       bo_list[0].handle = dev->va_id;
570       bo_list[0].flags =
571          MSM_SUBMIT_BO_DUMP | MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE;
572       bo_list[0].presumed = dev->va_iova;
573    }
574 
575    struct drm_msm_gem_submit submit_req = {
576       .flags = MSM_PIPE_3D0,
577       .queueid = dev->queue_id,
578       .bos = (uint64_t)(uintptr_t)bo_list,
579       .nr_bos = bo_count,
580       .cmds = (uint64_t)(uintptr_t)cmds,
581       .nr_cmds = u_vector_length(&dev->cmdstreams),
582       .in_syncobjs = 0,
583       .out_syncobjs = 0,
584       .nr_in_syncobjs = 0,
585       .nr_out_syncobjs = 0,
586       .syncobj_stride = sizeof(struct drm_msm_gem_submit_syncobj),
587    };
588 
589    int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_SUBMIT, &submit_req,
590                                  sizeof(submit_req));
591 
592    if (ret) {
593       err(1, "DRM_MSM_GEM_SUBMIT failure %d", ret);
594    }
595 
596    /* Wait for submission to complete in order to be sure that
597     * freeing buffers would free their VMAs in the kernel.
598     * Makes sure that new allocations won't clash with old ones.
599     */
600    struct drm_msm_wait_fence wait_req = {
601       .fence = submit_req.fence,
602       .queueid = dev->queue_id,
603    };
604    get_abs_timeout(&wait_req.timeout, 1000000000);
605 
606    ret =
607       drmCommandWrite(dev->fd, DRM_MSM_WAIT_FENCE, &wait_req, sizeof(wait_req));
608    if (ret && (ret != -ETIMEDOUT)) {
609       err(1, "DRM_MSM_WAIT_FENCE failure %d", ret);
610    }
611 
612    u_vector_finish(&dev->cmdstreams);
613    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
614 
615    device_print_shader_log(dev);
616    device_print_cp_log(dev);
617 
618    device_dump_wrbuf(dev);
619    u_vector_finish(&dev->wrbufs);
620    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
621 
622    device_free_buffers(dev);
623 }
624 
625 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)626 buffer_mem_alloc(struct device *dev, struct buffer *buf)
627 {
628    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
629    if (!success)
630       errx(1, "Failed to allocate buffer");
631 
632    if (!dev->has_set_iova) {
633       uint64_t offset = buf->iova - dev->va_iova;
634       assert(offset < FAKE_ADDRESS_SPACE_SIZE && (offset + buf->size) <= FAKE_ADDRESS_SPACE_SIZE);
635       buf->map = ((uint8_t*)dev->va_map) + offset;
636       return;
637    }
638 
639    {
640       struct drm_msm_gem_new req = {.size = buf->size, .flags = MSM_BO_WC};
641 
642       int ret =
643          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_NEW, &req, sizeof(req));
644       if (ret) {
645          err(1, "DRM_MSM_GEM_NEW failure %d", ret);
646       }
647 
648       buf->gem_handle = req.handle;
649    }
650 
651    {
652       struct drm_msm_gem_info req = {
653          .handle = buf->gem_handle,
654          .info = MSM_INFO_SET_IOVA,
655          .value = buf->iova,
656       };
657 
658       int ret =
659          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
660 
661       if (ret) {
662          err(1, "MSM_INFO_SET_IOVA failure %d", ret);
663       }
664    }
665 
666    {
667       struct drm_msm_gem_info req = {
668          .handle = buf->gem_handle,
669          .info = MSM_INFO_GET_OFFSET,
670       };
671 
672       int ret =
673          drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req, sizeof(req));
674       if (ret) {
675          err(1, "MSM_INFO_GET_OFFSET failure %d", ret);
676       }
677 
678       void *map = mmap(0, buf->size, PROT_READ | PROT_WRITE, MAP_SHARED,
679                        dev->fd, req.value);
680       if (map == MAP_FAILED) {
681          err(1, "mmap failure");
682       }
683 
684       buf->map = map;
685    }
686 }
687 
688 void
buffer_mem_free(struct device * dev,struct buffer * buf)689 buffer_mem_free(struct device *dev, struct buffer *buf)
690 {
691    if (dev->has_set_iova) {
692       munmap(buf->map, buf->size);
693 
694       struct drm_msm_gem_info req_iova = {
695          .handle = buf->gem_handle,
696          .info = MSM_INFO_SET_IOVA,
697          .value = 0,
698       };
699 
700       int ret = drmCommandWriteRead(dev->fd, DRM_MSM_GEM_INFO, &req_iova,
701                                     sizeof(req_iova));
702       if (ret < 0) {
703          err(1, "MSM_INFO_SET_IOVA(0) failed! %d", ret);
704          return;
705       }
706 
707       struct drm_gem_close req = {
708          .handle = buf->gem_handle,
709       };
710       drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
711    }
712 
713    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
714 }
715 
716 #elif FD_REPLAY_KGSL
717 static int
safe_ioctl(int fd,unsigned long request,void * arg)718 safe_ioctl(int fd, unsigned long request, void *arg)
719 {
720    int ret;
721 
722    do {
723       ret = ioctl(fd, request, arg);
724    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
725 
726    return ret;
727 }
728 
729 static struct device *
device_create(uint64_t base_addr)730 device_create(uint64_t base_addr)
731 {
732    struct device *dev = calloc(sizeof(struct device), 1);
733 
734    static const char path[] = "/dev/kgsl-3d0";
735 
736    dev->fd = open(path, O_RDWR | O_CLOEXEC);
737    if (dev->fd < 0) {
738       errx(1, "Cannot open KGSL fd!");
739    }
740 
741    struct kgsl_gpumem_alloc_id req = {
742       .size = FAKE_ADDRESS_SPACE_SIZE,
743       .flags = KGSL_MEMFLAGS_IOCOHERENT,
744    };
745 
746    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPUMEM_ALLOC_ID, &req);
747    if (ret) {
748       err(1, "IOCTL_KGSL_GPUMEM_ALLOC_ID failure");
749    }
750 
751    dev->va_id = req.id;
752    dev->va_iova = req.gpuaddr;
753    dev->va_map = mmap(0, FAKE_ADDRESS_SPACE_SIZE, PROT_READ | PROT_WRITE,
754                       MAP_SHARED, dev->fd, req.id << 12);
755 
756    rb_tree_init(&dev->buffers);
757    util_vma_heap_init(&dev->vma, req.gpuaddr, ROUND_DOWN_TO(FAKE_ADDRESS_SPACE_SIZE, 4096));
758    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
759    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
760 
761    struct kgsl_drawctxt_create drawctxt_req = {
762       .flags = KGSL_CONTEXT_SAVE_GMEM |
763               KGSL_CONTEXT_NO_GMEM_ALLOC |
764               KGSL_CONTEXT_PREAMBLE,
765    };
766 
767    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DRAWCTXT_CREATE, &drawctxt_req);
768    if (ret) {
769       err(1, "IOCTL_KGSL_DRAWCTXT_CREATE failure");
770    }
771 
772    printf("Allocated iova %" PRIx64 "\n", dev->va_iova);
773 
774    dev->context_id = drawctxt_req.drawctxt_id;
775 
776    return dev;
777 }
778 
779 static void
device_submit_cmdstreams(struct device * dev)780 device_submit_cmdstreams(struct device *dev)
781 {
782    if (!u_vector_length(&dev->cmdstreams)) {
783       device_free_buffers(dev);
784       return;
785    }
786 
787    struct kgsl_command_object cmds[u_vector_length(&dev->cmdstreams)];
788 
789    uint32_t idx = 0;
790    struct cmdstream *cmd;
791    u_vector_foreach(cmd, &dev->cmdstreams) {
792       struct kgsl_command_object *submit_cmd = &cmds[idx++];
793       submit_cmd->gpuaddr = cmd->iova;
794       submit_cmd->size = cmd->size;
795       submit_cmd->flags = KGSL_CMDLIST_IB;
796       submit_cmd->id = dev->va_id;
797    }
798 
799    struct kgsl_gpu_command submit_req = {
800       .flags = KGSL_CMDBATCH_SUBMIT_IB_LIST,
801       .cmdlist = (uintptr_t) &cmds,
802       .cmdsize = sizeof(struct kgsl_command_object),
803       .numcmds = u_vector_length(&dev->cmdstreams),
804       .numsyncs = 0,
805       .context_id = dev->context_id,
806    };
807 
808    int ret = safe_ioctl(dev->fd, IOCTL_KGSL_GPU_COMMAND, &submit_req);
809 
810    if (ret) {
811       err(1, "IOCTL_KGSL_GPU_COMMAND failure %d", ret);
812    }
813 
814    struct kgsl_device_waittimestamp_ctxtid wait = {
815       .context_id = dev->context_id,
816       .timestamp = submit_req.timestamp,
817       .timeout = 3000,
818    };
819 
820    ret = safe_ioctl(dev->fd, IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID, &wait);
821 
822    if (ret) {
823       err(1, "IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID failure %d", ret);
824    }
825 
826    u_vector_finish(&dev->cmdstreams);
827    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
828 
829    device_print_shader_log(dev);
830    device_print_cp_log(dev);
831 
832    device_dump_wrbuf(dev);
833    u_vector_finish(&dev->wrbufs);
834    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
835 
836    device_free_buffers(dev);
837 }
838 
839 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)840 buffer_mem_alloc(struct device *dev, struct buffer *buf)
841 {
842    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
843    if (!success)
844       errx(1, "Failed to allocate buffer");
845 
846    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
847 }
848 
849 void
buffer_mem_free(struct device * dev,struct buffer * buf)850 buffer_mem_free(struct device *dev, struct buffer *buf)
851 {
852    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
853 }
854 #else
855 
856 static int
safe_ioctl(int fd,unsigned long request,void * arg)857 safe_ioctl(int fd, unsigned long request, void *arg)
858 {
859    int ret;
860 
861    do {
862       ret = ioctl(fd, request, arg);
863    } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
864 
865    return ret;
866 }
867 
868 struct alloc_priv_info {
869    __u32 struct_size;
870    char _pad0[4];
871    __u32 unk0; // 1
872    char _pad1[4];
873    __u64 size;
874    __u32 alignment;
875    char _pad2[20];
876    __u64 allocated_size;
877    __u32 unk1;   // 1
878    char _pad4[8]; /* offset: 60*/
879    __u32 unk2;   // 61
880    char _pad5[76];
881    __u32 unk3; /* offset: 148 */ // 1
882    char _pad6[8];
883    __u32 unk4; /* offset: 160 */ // 1
884    char _pad7[44];
885    __u32 unk5; /* offset: 208 */ // 3
886    char _pad8[16];
887    __u32 size_2; /* offset: 228 */
888    __u32 unk6;   // 1
889    __u32 size_3;
890    __u32 size_4;
891    __u32 unk7; /* offset: 244 */ // 1
892    char _pad9[56];
893 };
894 static_assert(sizeof(struct alloc_priv_info) == 304);
895 static_assert(offsetof(struct alloc_priv_info, unk1) == 56);
896 static_assert(offsetof(struct alloc_priv_info, unk3) == 148);
897 static_assert(offsetof(struct alloc_priv_info, unk5) == 208);
898 
899 struct submit_priv_ib_info {
900    char _pad5[4];
901    __u32 size_dwords;
902    __u64 iova;
903    char _pad6[8];
904 } __attribute__((packed));
905 
906 struct submit_priv_data {
907    __u32 magic0;
908    char _pad0[4];
909    __u32 struct_size;
910    char _pad1[4];
911    /* It seems that priv data can have several sub-datas
912     * cmdbuf is one of them, after it there is another 8 byte struct
913     * without anything useful in it. That second data doesn't seem
914     * important for replaying.
915     */
916    __u32 datas_count;
917    char _pad2[32];
918    struct {
919       __u32 magic1;
920       __u32 data_size;
921 
922       struct {
923          __u32 unk1;
924          __u32 cmdbuf_size;
925          char _pad3[32];
926          __u32 ib_count;
927          char _pad4[36];
928 
929          struct submit_priv_ib_info ibs[];
930       } cmdbuf;
931    } data0;
932 
933    //    unsigned char magic2[8];
934 } __attribute__((packed));
935 static_assert(offsetof(struct submit_priv_data, data0) == 0x34);
936 static_assert(offsetof(struct submit_priv_data, data0.cmdbuf.ibs) == 0x8c);
937 
938 static struct device *
device_create(uint64_t base_addr)939 device_create(uint64_t base_addr)
940 {
941    struct device *dev = calloc(sizeof(struct device), 1);
942 
943    static const char path[] = "/dev/dxg";
944 
945    dev->fd = open(path, O_RDWR | O_CLOEXEC);
946    if (dev->fd < 0) {
947       errx(1, "Cannot open /dev/dxg fd");
948    }
949 
950    struct d3dkmt_adapterinfo adapters[1];
951    struct d3dkmt_enumadapters3 enum_adapters = {
952       .adapter_count = 1,
953       .adapters = adapters,
954    };
955    int ret = safe_ioctl(dev->fd, LX_DXENUMADAPTERS3, &enum_adapters);
956    if (ret) {
957       errx(1, "LX_DXENUMADAPTERS3 failure");
958    }
959 
960    if (enum_adapters.adapter_count == 0) {
961       errx(1, "No adapters found");
962    }
963 
964    struct winluid adapter_luid = enum_adapters.adapters[0].adapter_luid;
965 
966    struct d3dkmt_openadapterfromluid open_adapter = {
967       .adapter_luid = adapter_luid,
968    };
969    ret = safe_ioctl(dev->fd, LX_DXOPENADAPTERFROMLUID, &open_adapter);
970    if (ret) {
971       errx(1, "LX_DXOPENADAPTERFROMLUID failure");
972    }
973 
974    struct d3dkmthandle adapter = open_adapter.adapter_handle;
975 
976    struct d3dkmt_createdevice create_device = {
977       .adapter = adapter,
978    };
979    ret = safe_ioctl(dev->fd, LX_DXCREATEDEVICE, &create_device);
980    if (ret) {
981       errx(1, "LX_DXCREATEDEVICE failure");
982    }
983 
984    struct d3dkmthandle device = create_device.device;
985    dev->device = device;
986 
987    unsigned char create_context_priv_data[] = {
988       0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00,
989       0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
990       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x00, 0x00, 0x00,
991       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
992       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
993       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
994    };
995 
996    struct d3dkmt_createcontextvirtual create_context = {
997       .device = device,
998       .node_ordinal = 0,
999       .engine_affinity = 1,
1000       .priv_drv_data = create_context_priv_data,
1001       .priv_drv_data_size = sizeof(create_context_priv_data),
1002       .client_hint = 16,
1003    };
1004    ret = safe_ioctl(dev->fd, LX_DXCREATECONTEXTVIRTUAL, &create_context);
1005    if (ret) {
1006       errx(1, "LX_DXCREATECONTEXTVIRTUAL failure");
1007    }
1008 
1009    dev->context = create_context.context;
1010 
1011    struct d3dkmt_createpagingqueue create_paging_queue = {
1012       .device = device,
1013       .priority = _D3DDDI_PAGINGQUEUE_PRIORITY_NORMAL,
1014       .physical_adapter_index = 0,
1015    };
1016    ret = safe_ioctl(dev->fd, LX_DXCREATEPAGINGQUEUE, &create_paging_queue);
1017    if (ret) {
1018       errx(1, "LX_DXCREATEPAGINGQUEUE failure");
1019    }
1020    struct d3dkmthandle paging_queue = create_paging_queue.paging_queue;
1021 
1022 
1023    uint32_t alloc_size = FAKE_ADDRESS_SPACE_SIZE;
1024    struct alloc_priv_info priv_alloc_info = {
1025       .struct_size = sizeof(struct alloc_priv_info),
1026       .unk0 = 1,
1027       .size = alloc_size,
1028       .alignment = 4096,
1029       .unk1 = 1,
1030       .unk2 = 61,
1031       .unk3 = 1,
1032       .unk4 = 1,
1033       .unk5 = 3,
1034       .size_2 = alloc_size,
1035       .unk6 = 1,
1036       .size_3 = alloc_size,
1037       .size_4 = alloc_size,
1038       .unk7 = 1,
1039    };
1040 
1041    struct d3dddi_allocationinfo2 alloc_info = {
1042       .priv_drv_data = &priv_alloc_info,
1043       .priv_drv_data_size = sizeof(struct alloc_priv_info),
1044    };
1045 
1046    struct d3dkmt_createallocation create_allocation = {
1047       .device = device,
1048       .alloc_count = 1,
1049       .allocation_info = &alloc_info,
1050    };
1051    ret = safe_ioctl(dev->fd, LX_DXCREATEALLOCATION, &create_allocation);
1052    if (ret) {
1053       errx(1, "LX_DXCREATEALLOCATION failure");
1054    }
1055 
1056    assert(priv_alloc_info.allocated_size == alloc_size);
1057 
1058    struct d3dddi_mapgpuvirtualaddress map_virtual_address = {
1059       .paging_queue = paging_queue,
1060       .base_address = base_addr,
1061       .maximum_address = 18446744073709551615ull,
1062       .allocation = create_allocation.allocation_info[0].allocation,
1063       .size_in_pages = MAX2(alloc_size / 4096, 1),
1064       .protection = {
1065          .write = 1,
1066          .execute = 1,
1067       },
1068    };
1069    ret = safe_ioctl(dev->fd, LX_DXMAPGPUVIRTUALADDRESS, &map_virtual_address);
1070    if (ret != 259) {
1071       errx(1, "LX_DXMAPGPUVIRTUALADDRESS failure");
1072    }
1073 
1074    __u32 priority = 0;
1075    struct d3dddi_makeresident make_resident = {
1076       .paging_queue = paging_queue,
1077       .alloc_count = 1,
1078       .allocation_list = &create_allocation.allocation_info[0].allocation,
1079       .priority_list = &priority,
1080    };
1081    ret = safe_ioctl(dev->fd, LX_DXMAKERESIDENT, &make_resident);
1082    if (ret != 259) {
1083       errx(1, "LX_DXMAKERESIDENT failure");
1084    }
1085 
1086    struct d3dkmt_lock2 lock = {
1087       .device = device,
1088       .allocation = create_allocation.allocation_info[0].allocation,
1089    };
1090    ret = safe_ioctl(dev->fd, LX_DXLOCK2, &lock);
1091    if (ret) {
1092       errx(1, "LX_DXLOCK2 failure");
1093    }
1094 
1095    dev->va_iova = map_virtual_address.virtual_address;
1096    dev->va_map = lock.data;
1097 
1098    rb_tree_init(&dev->buffers);
1099    util_vma_heap_init(&dev->vma, dev->va_iova, ROUND_DOWN_TO(alloc_size, 4096));
1100    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1101    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1102 
1103    printf("Allocated iova at 0x%" PRIx64 "\n", dev->va_iova);
1104 
1105    uint64_t hole_size = 4096;
1106    dev->vma.alloc_high = true;
1107    dev->fence_iova = util_vma_heap_alloc(&dev->vma, hole_size, 4096);
1108    dev->fence_ib_iova = dev->fence_iova + 8;
1109    dev->fence = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_iova - dev->va_iova));
1110    dev->fence_ib = (uint32_t *) ((uint8_t*)dev->va_map + (dev->fence_ib_iova - dev->va_iova));
1111    dev->vma.alloc_high = false;
1112 
1113    return dev;
1114 }
1115 
1116 static void
device_submit_cmdstreams(struct device * dev)1117 device_submit_cmdstreams(struct device *dev)
1118 {
1119    if (!u_vector_length(&dev->cmdstreams)) {
1120       device_free_buffers(dev);
1121       return;
1122    }
1123 
1124    uint32_t cmdstream_count = u_vector_length(&dev->cmdstreams) + 1;
1125 
1126    uint32_t priv_data_size =
1127       sizeof(struct submit_priv_data) +
1128       cmdstream_count * sizeof(struct submit_priv_ib_info);
1129 
1130    struct submit_priv_data *priv_data = calloc(1, priv_data_size);
1131    priv_data->magic0 = 0xccaabbee;
1132    priv_data->struct_size = priv_data_size;
1133    priv_data->datas_count = 1;
1134 
1135    priv_data->data0.magic1 = 0xfadcab02;
1136    priv_data->data0.data_size =
1137       sizeof(priv_data->data0) +
1138       cmdstream_count * sizeof(struct submit_priv_ib_info);
1139    priv_data->data0.cmdbuf.unk1 = 0xcccc0001;
1140    priv_data->data0.cmdbuf.cmdbuf_size = sizeof(priv_data->data0.cmdbuf) +
1141       cmdstream_count * sizeof(struct submit_priv_ib_info);
1142    priv_data->data0.cmdbuf.ib_count = cmdstream_count;
1143 
1144    struct cmdstream *cmd;
1145    uint32_t idx = 0;
1146    u_vector_foreach(cmd, &dev->cmdstreams) {
1147       priv_data->data0.cmdbuf.ibs[idx].size_dwords = cmd->size / 4;
1148       priv_data->data0.cmdbuf.ibs[idx].iova = cmd->iova;
1149       idx++;
1150    }
1151 
1152    priv_data->data0.cmdbuf.ibs[idx].size_dwords = 4;
1153    priv_data->data0.cmdbuf.ibs[idx].iova = dev->fence_ib_iova;
1154 
1155    *dev->fence = 0x00000000;
1156    dev->fence_ib[0] = pm4_pkt7_hdr(0x3d, 3); // CP_MEM_WRITE
1157    dev->fence_ib[1] = dev->fence_iova;
1158    dev->fence_ib[2] = dev->fence_iova >> 32;
1159    dev->fence_ib[3] = 0xababfcfc;
1160 
1161    // Fill second (empty) data block
1162    // uint32_t *magic_end = (uint32_t *)(((char *) priv_data) + priv_data_size - 8);
1163    // magic_end[0] = 0xfadcab00;
1164    // magic_end[1] = 0x00000008;
1165 
1166    struct d3dkmt_submitcommand submission = {
1167       .command_buffer = priv_data->data0.cmdbuf.ibs[0].iova,
1168       .command_length = priv_data->data0.cmdbuf.ibs[0].size_dwords * sizeof(uint32_t),
1169       .broadcast_context_count = 1,
1170       .broadcast_context[0] = dev->context,
1171       .priv_drv_data_size = priv_data_size,
1172       .priv_drv_data = priv_data,
1173    };
1174 
1175    int ret = safe_ioctl(dev->fd, LX_DXSUBMITCOMMAND, &submission);
1176    if (ret) {
1177       errx(1, "LX_DXSUBMITCOMMAND failure");
1178    }
1179 
1180    free(priv_data);
1181 
1182    u_vector_finish(&dev->cmdstreams);
1183    u_vector_init(&dev->cmdstreams, 8, sizeof(struct cmdstream));
1184 
1185    // TODO: better way to wait
1186    for (unsigned i = 0; i < 1000; i++) {
1187       usleep(1000);
1188       if (*dev->fence != 0)
1189          break;
1190    }
1191    if (*dev->fence == 0) {
1192       errx(1, "Waiting for submission failed! GPU faulted or kernel did not execute this submission.");
1193    }
1194 
1195    device_print_shader_log(dev);
1196    device_print_cp_log(dev);
1197 
1198    device_dump_wrbuf(dev);
1199    u_vector_finish(&dev->wrbufs);
1200    u_vector_init(&dev->wrbufs, 8, sizeof(struct wrbuf));
1201 
1202    device_free_buffers(dev);
1203 }
1204 
1205 static void
buffer_mem_alloc(struct device * dev,struct buffer * buf)1206 buffer_mem_alloc(struct device *dev, struct buffer *buf)
1207 {
1208    bool success = util_vma_heap_alloc_addr(&dev->vma, buf->iova, buf->size);
1209    if (!success)
1210       errx(1, "Failed to allocate buffer");
1211 
1212    buf->map = ((uint8_t*)dev->va_map) + (buf->iova - dev->va_iova);
1213 }
1214 
1215 void
buffer_mem_free(struct device * dev,struct buffer * buf)1216 buffer_mem_free(struct device *dev, struct buffer *buf)
1217 {
1218    util_vma_heap_free(&dev->vma, buf->iova, buf->size);
1219 }
1220 
1221 #endif
1222 
1223 static void
upload_buffer(struct device * dev,uint64_t iova,unsigned int size,void * hostptr)1224 upload_buffer(struct device *dev, uint64_t iova, unsigned int size,
1225               void *hostptr)
1226 {
1227    struct buffer *buf = device_get_buffer(dev, iova);
1228 
1229    if (!buf) {
1230       buf = calloc(sizeof(struct buffer), 1);
1231       buf->iova = iova;
1232       buf->size = size;
1233 
1234       rb_tree_insert(&dev->buffers, &buf->node, rb_buffer_insert_cmp);
1235 
1236       buffer_mem_alloc(dev, buf);
1237    } else if (buf->size != size) {
1238       buffer_mem_free(dev, buf);
1239       buf->size = size;
1240       buffer_mem_alloc(dev, buf);
1241    }
1242 
1243    memcpy(buf->map, hostptr, size);
1244 
1245    buf->used = true;
1246 }
1247 
1248 static int
override_cmdstream(struct device * dev,struct cmdstream * cs,const char * cmdstreamgen)1249 override_cmdstream(struct device *dev, struct cmdstream *cs,
1250                    const char *cmdstreamgen)
1251 {
1252 #if FD_REPLAY_KGSL
1253    static const char *tmpfilename = "/sdcard/Download/cmdstream_override.rd";
1254 #elif FD_REPLAY_MSM || FD_REPLAY_WSL
1255    static const char *tmpfilename = "/tmp/cmdstream_override.rd";
1256 #endif
1257 
1258 
1259    /* Find a free space for the new cmdstreams and resources we will use
1260     * when overriding existing cmdstream.
1261     */
1262    uint64_t hole_size = util_vma_heap_get_max_free_continuous_size(&dev->vma);
1263    uint64_t hole_iova = util_vma_heap_alloc(&dev->vma, hole_size, 1);
1264    util_vma_heap_free(&dev->vma, hole_iova, hole_size);
1265 
1266    char cmd[2048];
1267    snprintf(cmd, sizeof(cmd),
1268             "%s --vastart=%" PRIu64 " --vasize=%" PRIu64 " %s", cmdstreamgen,
1269             hole_iova, hole_size, tmpfilename);
1270 
1271    printf("generating cmdstream '%s'\n", cmd);
1272 
1273    int ret = system(cmd);
1274    if (ret) {
1275       fprintf(stderr, "Error executing %s\n", cmd);
1276       return -1;
1277    }
1278 
1279    struct io *io;
1280    struct rd_parsed_section ps = {0};
1281 
1282    io = io_open(tmpfilename);
1283    if (!io) {
1284       fprintf(stderr, "could not open: %s\n", tmpfilename);
1285       return -1;
1286    }
1287 
1288    struct {
1289       unsigned int len;
1290       uint64_t gpuaddr;
1291    } gpuaddr = {0};
1292 
1293    while (parse_rd_section(io, &ps)) {
1294       switch (ps.type) {
1295       case RD_GPUADDR:
1296          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1297          /* no-op */
1298          break;
1299       case RD_BUFFER_CONTENTS:
1300          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1301          ps.buf = NULL;
1302          break;
1303       case RD_CMDSTREAM_ADDR: {
1304          unsigned int sizedwords;
1305          uint64_t gpuaddr;
1306          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1307          printf("override cmdstream: %d dwords\n", sizedwords);
1308 
1309          cs->iova = gpuaddr;
1310          cs->size = sizedwords * sizeof(uint32_t);
1311          break;
1312       }
1313       case RD_SHADER_LOG_BUFFER: {
1314          unsigned int sizedwords;
1315          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->shader_log_iova);
1316          break;
1317       }
1318       case RD_CP_LOG_BUFFER: {
1319          unsigned int sizedwords;
1320          parse_addr(ps.buf, ps.sz, &sizedwords, &dev->cp_log_iova);
1321          break;
1322       }
1323       case RD_WRBUFFER: {
1324          struct wrbuf *wrbuf = u_vector_add(&dev->wrbufs);
1325          uint64_t *p = (uint64_t *)ps.buf;
1326          wrbuf->iova = p[0];
1327          wrbuf->size = p[1];
1328          bool clear = p[2];
1329          int name_len = ps.sz - (3 * sizeof(uint64_t));
1330          wrbuf->name = calloc(sizeof(char), name_len);
1331          memcpy(wrbuf->name, (char*)(p + 3), name_len); // includes null terminator
1332 
1333          if (clear) {
1334             struct buffer *buf = device_get_buffer(dev, wrbuf->iova);
1335             assert(buf);
1336 
1337             uint64_t offset = wrbuf->iova - buf->iova;
1338             uint64_t end = MIN2(offset + wrbuf->size, buf->size);
1339             while (offset < end) {
1340                static const uint64_t clear_value = 0xdeadbeefdeadbeef;
1341                memcpy(buf->map + offset, &clear_value,
1342                       MIN2(sizeof(clear_value), end - offset));
1343                offset += sizeof(clear_value);
1344             }
1345          }
1346 
1347          break;
1348       }
1349       default:
1350          break;
1351       }
1352    }
1353 
1354    io_close(io);
1355    if (ps.ret < 0) {
1356       fprintf(stderr, "corrupt file %s\n", tmpfilename);
1357    }
1358 
1359    return ps.ret;
1360 }
1361 
1362 static int
handle_file(const char * filename,uint32_t first_submit,uint32_t last_submit,uint32_t submit_to_override,uint64_t base_addr,const char * cmdstreamgen)1363 handle_file(const char *filename, uint32_t first_submit, uint32_t last_submit,
1364             uint32_t submit_to_override, uint64_t base_addr, const char *cmdstreamgen)
1365 {
1366    struct io *io;
1367    int submit = 0;
1368    bool skip = false;
1369    bool need_submit = false;
1370    struct rd_parsed_section ps = {0};
1371 
1372    printf("Reading %s...\n", filename);
1373 
1374    if (!strcmp(filename, "-"))
1375       io = io_openfd(0);
1376    else
1377       io = io_open(filename);
1378 
1379    if (!io) {
1380       fprintf(stderr, "could not open: %s\n", filename);
1381       return -1;
1382    }
1383 
1384    struct device *dev = device_create(base_addr);
1385 
1386    struct {
1387       unsigned int len;
1388       uint64_t gpuaddr;
1389    } gpuaddr = {0};
1390 
1391    while (parse_rd_section(io, &ps)) {
1392       switch (ps.type) {
1393       case RD_TEST:
1394       case RD_VERT_SHADER:
1395       case RD_FRAG_SHADER:
1396          /* no-op */
1397          break;
1398       case RD_CMD:
1399          skip = false;
1400          if (exename) {
1401             skip |= (strstr(ps.buf, exename) != ps.buf);
1402          } else {
1403             skip |= (strstr(ps.buf, "fdperf") == ps.buf);
1404             skip |= (strstr(ps.buf, "chrome") == ps.buf);
1405             skip |= (strstr(ps.buf, "surfaceflinger") == ps.buf);
1406             skip |= ((char *)ps.buf)[0] == 'X';
1407          }
1408          break;
1409 
1410       case RD_GPUADDR:
1411          if (need_submit) {
1412             need_submit = false;
1413             device_submit_cmdstreams(dev);
1414          }
1415 
1416          parse_addr(ps.buf, ps.sz, &gpuaddr.len, &gpuaddr.gpuaddr);
1417          /* no-op */
1418          break;
1419       case RD_BUFFER_CONTENTS:
1420          /* TODO: skip buffer uploading and even reading if this buffer
1421           * is used for submit outside of [first_submit, last_submit]
1422           * range. A set of buffers is shared between several cmdstreams,
1423           * so we'd have to find starting from which RD_CMD to upload
1424           * the buffers.
1425           */
1426          upload_buffer(dev, gpuaddr.gpuaddr, gpuaddr.len, ps.buf);
1427          break;
1428       case RD_CMDSTREAM_ADDR: {
1429          unsigned int sizedwords;
1430          uint64_t gpuaddr;
1431          parse_addr(ps.buf, ps.sz, &sizedwords, &gpuaddr);
1432 
1433          bool add_submit = !skip && (submit >= first_submit) && (submit <= last_submit);
1434          printf("%scmdstream %d: %d dwords\n", add_submit ? "" : "skipped ",
1435                 submit, sizedwords);
1436 
1437          if (add_submit) {
1438             struct cmdstream *cs = u_vector_add(&dev->cmdstreams);
1439 
1440             if (submit == submit_to_override) {
1441                if (override_cmdstream(dev, cs, cmdstreamgen) < 0)
1442                   break;
1443             } else {
1444                cs->iova = gpuaddr;
1445                cs->size = sizedwords * sizeof(uint32_t);
1446             }
1447          }
1448 
1449          need_submit = true;
1450 
1451          submit++;
1452          break;
1453       }
1454       case RD_GPU_ID: {
1455          uint32_t gpu_id = parse_gpu_id(ps.buf);
1456          if (gpu_id)
1457             printf("gpuid: %d\n", gpu_id);
1458          break;
1459       }
1460       case RD_CHIP_ID: {
1461          uint64_t chip_id = parse_chip_id(ps.buf);
1462          printf("chip_id: 0x%" PRIx64 "\n", chip_id);
1463          break;
1464       }
1465       default:
1466          break;
1467       }
1468    }
1469 
1470    if (need_submit)
1471       device_submit_cmdstreams(dev);
1472 
1473    close(dev->fd);
1474 
1475    io_close(io);
1476    fflush(stdout);
1477 
1478    if (ps.ret < 0) {
1479       printf("corrupt file\n");
1480    }
1481    return 0;
1482 }
1483