1 /*
2 * Copyright © 2022 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <err.h>
8 #include <getopt.h>
9 #include <stdint.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/types.h>
14
15 #include "redump.h"
16
17 #include "util/u_math.h"
18
19 #include "adreno_common.xml.h"
20 #include "adreno_pm4.xml.h"
21 #include "freedreno_pm4.h"
22
23 #include "a6xx.xml.h"
24
25 #include "ir3/ir3_assembler.h"
26 #include "ir3/ir3_compiler.h"
27 #include "ir3/ir3_shader.h"
28
29 #include "util/list.h"
30 #include "util/vma.h"
31
32 struct cmdstream {
33 struct list_head link;
34
35 uint32_t *mem;
36 uint32_t total_size;
37 uint32_t cur;
38
39 uint64_t iova;
40 };
41
42 static uint64_t
cs_get_cur_iova(struct cmdstream * cs)43 cs_get_cur_iova(struct cmdstream *cs)
44 {
45 return cs->iova + cs->cur * sizeof(uint32_t);
46 }
47
48 struct wrbuf {
49 struct list_head link;
50
51 uint64_t iova;
52 uint64_t size;
53 uint64_t clear;
54 const char *name;
55 };
56
57 struct replay_context {
58 void *mem_ctx;
59
60 struct util_vma_heap vma;
61
62 struct cmdstream *submit_cs;
63 struct cmdstream *state_cs;
64 struct cmdstream *shader_cs;
65
66 struct cmdstream *shader_log;
67 struct cmdstream *cp_log;
68
69 struct list_head cs_list;
70
71 struct list_head wrbuf_list;
72
73 struct ir3_compiler *compiler;
74
75 struct hash_table_u64 *compiled_shaders;
76
77 const char *output_name;
78 };
79
80 static void
pkt(struct cmdstream * cs,uint32_t payload)81 pkt(struct cmdstream *cs, uint32_t payload)
82 {
83 assert(cs->cur <= cs->total_size);
84 cs->mem[cs->cur++] = payload;
85 }
86
87 static void
pkt_qw(struct cmdstream * cs,uint64_t payload)88 pkt_qw(struct cmdstream *cs, uint64_t payload)
89 {
90 pkt(cs, payload);
91 pkt(cs, payload >> 32);
92 }
93
94 static uint64_t
pkt_blob(struct cmdstream * cs,void * payload,uint32_t size,uint32_t alignment)95 pkt_blob(struct cmdstream *cs, void *payload, uint32_t size, uint32_t alignment)
96 {
97 cs->cur = align(cs->cur, alignment / sizeof(uint32_t));
98 uint64_t start_iova = cs_get_cur_iova(cs);
99
100 memcpy(cs->mem + cs->cur, payload, size);
101 cs->cur += size;
102
103 return start_iova;
104 }
105
106 static void
pkt4(struct cmdstream * cs,uint16_t regindx,uint16_t cnt,uint32_t payload)107 pkt4(struct cmdstream *cs, uint16_t regindx, uint16_t cnt, uint32_t payload)
108 {
109 pkt(cs, pm4_pkt4_hdr(regindx, cnt));
110 pkt(cs, payload);
111 }
112
113 static void
pkt7(struct cmdstream * cs,uint8_t opcode,uint16_t cnt)114 pkt7(struct cmdstream *cs, uint8_t opcode, uint16_t cnt)
115 {
116 pkt(cs, pm4_pkt7_hdr(opcode, cnt));
117 }
118
119 struct rd_section {
120 uint32_t type;
121 uint32_t size;
122 };
123
124 static struct cmdstream *
cs_alloc(struct replay_context * ctx,uint32_t size)125 cs_alloc(struct replay_context *ctx, uint32_t size)
126 {
127 struct cmdstream *cs = (struct cmdstream *) calloc(1, sizeof(struct cmdstream));
128 cs->mem = (uint32_t *)calloc(1, size);
129 cs->total_size = size / sizeof(uint32_t);
130 cs->cur = 0;
131 cs->iova = util_vma_heap_alloc(&ctx->vma, size, 4096);
132
133 assert(cs->iova != 0);
134
135 list_addtail(&cs->link, &ctx->cs_list);
136
137 return cs;
138 }
139
140 static void
rd_write_gpu_addr_section(FILE * out,struct cmdstream * cs,enum rd_sect_type section)141 rd_write_gpu_addr_section(FILE *out, struct cmdstream *cs, enum rd_sect_type section)
142 {
143 const uint32_t packet[] = {(uint32_t)cs->iova,
144 (uint32_t)(cs->cur * sizeof(uint32_t)),
145 (uint32_t)(cs->iova >> 32)};
146 struct rd_section section_address = {.type = section,
147 .size = sizeof(packet)};
148 fwrite(§ion_address, sizeof(section_address), 1, out);
149 fwrite(packet, sizeof(packet), 1, out);
150 }
151
152 static void
rd_write_cs_buffer(FILE * out,struct cmdstream * cs)153 rd_write_cs_buffer(FILE *out, struct cmdstream *cs)
154 {
155 if (cs->cur == 0)
156 return;
157
158 rd_write_gpu_addr_section(out, cs, RD_GPUADDR);
159
160 struct rd_section section_contents = {.type = RD_BUFFER_CONTENTS,
161 .size = uint32_t(cs->cur * sizeof(uint32_t))};
162
163 fwrite(§ion_contents, sizeof(section_contents), 1, out);
164 fwrite(cs->mem, sizeof(uint32_t), cs->cur, out);
165 }
166
167 static void
rd_write_cs_submit(FILE * out,struct cmdstream * cs)168 rd_write_cs_submit(FILE *out, struct cmdstream *cs)
169 {
170 const uint32_t packet[] = {(uint32_t)cs->iova, cs->cur,
171 (uint32_t)(cs->iova >> 32)};
172 struct rd_section section_cmdstream = {.type = RD_CMDSTREAM_ADDR,
173 .size = sizeof(packet)};
174
175 fwrite(§ion_cmdstream, sizeof(section_cmdstream), 1, out);
176 fwrite(packet, sizeof(packet), 1, out);
177 }
178
179 static void
rd_write_wrbuffer(FILE * out,struct wrbuf * wrbuf)180 rd_write_wrbuffer(FILE *out, struct wrbuf *wrbuf)
181 {
182 uint32_t name_len = strlen(wrbuf->name) + 1;
183 struct rd_section section = {.type = RD_WRBUFFER,
184 .size = (uint32_t)(sizeof(uint64_t) * 3) + name_len};
185 fwrite(§ion, sizeof(section), 1, out);
186 fwrite(&wrbuf->iova, sizeof(uint64_t), 1, out);
187 fwrite(&wrbuf->size, sizeof(uint64_t), 1, out);
188 fwrite(&wrbuf->clear, sizeof(uint64_t), 1, out);
189 fwrite(wrbuf->name, sizeof(char), name_len, out);
190 }
191
192 static void
print_usage(const char * name)193 print_usage(const char *name)
194 {
195 /* clang-format off */
196 fprintf(stderr, "Usage:\n\n"
197 "\t%s [OPTIONS]... FILE...\n\n"
198 "Options:\n"
199 "\t --vastart=offset\n"
200 "\t --vasize=size\n"
201 "\t-h, --help - show this message\n"
202 , name);
203 /* clang-format on */
204 exit(2);
205 }
206
207 #define OPT_VA_START 1000
208 #define OPT_VA_SIZE 1001
209
210 /* clang-format off */
211 static const struct option opts[] = {
212 { "vastart", required_argument, 0, OPT_VA_START },
213 { "vasize", required_argument, 0, OPT_VA_SIZE },
214 { "help", no_argument, 0, 'h' },
215 };
216 /* clang-format on */
217
218 static void
replay_context_init(struct replay_context * ctx,struct fd_dev_id * dev_id,int argc,char ** argv)219 replay_context_init(struct replay_context *ctx, struct fd_dev_id *dev_id,
220 int argc, char **argv)
221 {
222 uint64_t va_start = 0;
223 uint64_t va_size = 0;
224
225 int c;
226 while ((c = getopt_long(argc, argv, "h", opts, NULL)) != -1) {
227 switch (c) {
228 case OPT_VA_START:
229 va_start = strtoull(optarg, NULL, 0);
230 break;
231 case OPT_VA_SIZE:
232 va_size = strtoull(optarg, NULL, 0);
233 break;
234 case 'h':
235 default:
236 print_usage(argv[0]);
237 }
238 }
239
240 if (optind < argc) {
241 ctx->output_name = argv[optind];
242 } else {
243 }
244
245 if (!va_start || !va_size || !ctx->output_name) {
246 print_usage(argv[0]);
247 exit(1);
248 }
249
250 ctx->mem_ctx = ralloc_context(NULL);
251 list_inithead(&ctx->cs_list);
252 list_inithead(&ctx->wrbuf_list);
253
254 util_vma_heap_init(&ctx->vma, va_start, ROUND_DOWN_TO(va_size, 4096));
255
256 ctx->submit_cs = cs_alloc(ctx, 1024 * 1024);
257 ctx->state_cs = cs_alloc(ctx, 2 * 1024 * 1024);
258 ctx->shader_cs = cs_alloc(ctx, 8 * 1024 * 1024);
259
260 ctx->shader_log = cs_alloc(ctx, 1024 * 1024);
261 ctx->shader_log->mem[0] = (ctx->shader_log->iova & 0xffffffff) + sizeof(uint64_t);
262 ctx->shader_log->mem[1] = ctx->shader_log->iova >> 32;
263 ctx->shader_log->cur = ctx->shader_log->total_size;
264
265 ctx->cp_log = cs_alloc(ctx, 8 * 1024 * 1024);
266 ((uint64_t *)ctx->cp_log->mem)[0] = ctx->cp_log->iova + 2 * sizeof(uint64_t);
267 ((uint64_t *)ctx->cp_log->mem)[1] = sizeof(uint64_t);
268 ctx->cp_log->cur = ctx->cp_log->total_size;
269
270 struct ir3_compiler_options options{
271 .disable_cache = true,
272 };
273 ctx->compiler =
274 ir3_compiler_create(NULL, dev_id, fd_dev_info_raw(dev_id), &options);
275 ctx->compiled_shaders = _mesa_hash_table_u64_create(ctx->mem_ctx);
276 }
277
278 static void
replay_context_finish(struct replay_context * ctx)279 replay_context_finish(struct replay_context *ctx)
280 {
281 FILE *out = fopen(ctx->output_name, "w");
282 if (!out) {
283 errx(1, "Cannot open '%s' for writing\n", ctx->output_name);
284 }
285
286 static const uint32_t gpu_id = 660;
287 struct rd_section section_gpu_id = {.type = RD_GPU_ID,
288 .size = 1 * sizeof(uint32_t)};
289 fwrite(§ion_gpu_id, sizeof(section_gpu_id), 1, out);
290 fwrite(&gpu_id, sizeof(uint32_t), 1, out);
291
292 rd_write_gpu_addr_section(out, ctx->shader_log, RD_SHADER_LOG_BUFFER);
293 rd_write_gpu_addr_section(out, ctx->cp_log, RD_CP_LOG_BUFFER);
294
295 list_for_each_entry (struct cmdstream, cs, &ctx->cs_list, link) {
296 rd_write_cs_buffer(out, cs);
297 }
298 rd_write_cs_submit(out, ctx->submit_cs);
299
300 list_for_each_entry (struct wrbuf, wrbuf, &ctx->wrbuf_list, link) {
301 rd_write_wrbuffer(out, wrbuf);
302 }
303
304 fclose(out);
305 }
306
307 static void
upload_shader(struct replay_context * ctx,uint64_t id,const char * source)308 upload_shader(struct replay_context *ctx, uint64_t id, const char *source)
309 {
310 FILE *in = fmemopen((void *)source, strlen(source), "r");
311
312 struct ir3_kernel_info info = {
313 .shader_print_buffer_iova = ctx->shader_log->iova,
314 };
315 struct ir3_shader *shader = ir3_parse_asm(ctx->compiler, &info, in);
316 assert(shader);
317
318 fclose(in);
319
320 uint64_t *shader_iova = ralloc(ctx->mem_ctx, uint64_t);
321 *shader_iova = pkt_blob(ctx->shader_cs, shader->variants->bin,
322 shader->variants->info.size, 128);
323 ralloc_free(shader);
324
325 _mesa_hash_table_u64_insert(ctx->compiled_shaders, id, shader_iova);
326 }
327
328 static void
emit_shader_iova(struct replay_context * ctx,struct cmdstream * cs,uint64_t id)329 emit_shader_iova(struct replay_context *ctx, struct cmdstream *cs, uint64_t id)
330 {
331 uint64_t *shader_iova = (uint64_t *)
332 _mesa_hash_table_u64_search(ctx->compiled_shaders, id);
333 if (shader_iova) {
334 pkt_qw(cs, *shader_iova);
335 } else {
336 fprintf(stderr,
337 "Not override for shader at 0x%" PRIx64 ", using original\n", id);
338 pkt_qw(cs, id);
339 }
340 }
341
342 #define begin_draw_state() \
343 uint64_t subcs_iova_start = cs_get_cur_iova(ctx.state_cs); \
344 struct cmdstream *prev_cs = cs; \
345 struct cmdstream *cs = ctx.state_cs;
346
347 #define end_draw_state(params) \
348 uint64_t subcs_iova_end = cs_get_cur_iova(ctx.state_cs); \
349 uint32_t subcs_size = \
350 (subcs_iova_end - subcs_iova_start) / sizeof(uint32_t); \
351 pkt7(prev_cs, CP_SET_DRAW_STATE, 3); \
352 pkt(prev_cs, (params) | subcs_size); \
353 pkt_qw(prev_cs, subcs_iova_start);
354
355 #define begin_ib() \
356 struct cmdstream *prev_cs = cs; \
357 struct cmdstream *cs = cs_alloc(&ctx, 1024 * 1024);
358
359 #define end_ib() \
360 uint64_t ibcs_size = cs->cur; \
361 pkt7(prev_cs, CP_INDIRECT_BUFFER, 3); \
362 pkt_qw(prev_cs, cs->iova); \
363 pkt(prev_cs, ibcs_size);
364
365 static void
gpu_print(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint32_t dwords)366 gpu_print(struct replay_context *ctx, struct cmdstream *_cs, uint64_t iova,
367 uint32_t dwords)
368 {
369 uint64_t header_iova, body_iova;
370 struct cmdstream *prev_cs = _cs;
371 struct cmdstream *cs = cs_alloc(ctx, 4096);
372 /* Commands that are being modified should be in a separate cmdstream,
373 * otherwise they would be prefetched and writes would not be visible.
374 */
375 {
376 /* Write size into entry's header */
377 pkt7(cs, CP_MEM_WRITE, 4);
378 header_iova = cs_get_cur_iova(cs);
379 pkt_qw(cs, 0xdeadbeef);
380 uint64_t size_iova = cs_get_cur_iova(cs);
381 pkt(cs, dwords * 4);
382 pkt(cs, 0);
383
384 /* Copy the data into entry's body */
385 pkt7(cs, CP_MEMCPY, 5);
386 pkt(cs, dwords);
387 pkt_qw(cs, iova);
388 body_iova = cs_get_cur_iova(cs);
389 pkt_qw(cs, 0xdeadbeef);
390
391 /* iova = iova + body_size + header_size */
392 pkt7(cs, CP_MEM_TO_MEM, 9);
393 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
394 pkt_qw(cs, ctx->cp_log->iova);
395 pkt_qw(cs, ctx->cp_log->iova);
396 pkt_qw(cs, size_iova);
397 pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
398 }
399
400 {
401 struct cmdstream *cs = prev_cs;
402 pkt7(cs, CP_MEM_TO_MEM, 5);
403 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE | CP_MEM_TO_MEM_0_WAIT_FOR_MEM_WRITES);
404 pkt_qw(cs, header_iova);
405 pkt_qw(cs, ctx->cp_log->iova);
406
407 pkt7(cs, CP_MEM_TO_MEM, 7);
408 pkt(cs, CP_MEM_TO_MEM_0_DOUBLE);
409 pkt_qw(cs, body_iova);
410 pkt_qw(cs, ctx->cp_log->iova);
411 pkt_qw(cs, ctx->cp_log->iova + sizeof(uint64_t));
412
413 pkt7(cs, CP_WAIT_MEM_WRITES, 0);
414 pkt7(cs, CP_WAIT_FOR_ME, 0);
415 }
416
417 end_ib();
418 }
419
420 /* This function is used to read a buffer from the GPU into a file.
421 * The buffer can optionally be cleared to 0xdeadbeef at the start
422 * of the cmdstream by setting the clear parameter to true.
423 *
424 * Note: Unlike gpu_print, this function isn't sequenced, it will
425 * read the state of the buffer at the end of the cmdstream, not
426 * at the point of the call.
427 */
428 static void
gpu_read_into_file(struct replay_context * ctx,struct cmdstream * _cs,uint64_t iova,uint64_t size,bool clear,const char * name)429 gpu_read_into_file(struct replay_context *ctx, struct cmdstream *_cs,
430 uint64_t iova, uint64_t size, bool clear, const char *name)
431 {
432 struct wrbuf *wrbuf = (struct wrbuf *) calloc(1, sizeof(struct wrbuf));
433 wrbuf->iova = iova;
434 wrbuf->size = size;
435 wrbuf->clear = clear;
436 wrbuf->name = strdup(name);
437
438 assert(wrbuf->iova != 0);
439
440 list_addtail(&wrbuf->link, &ctx->wrbuf_list);
441 }