1 /*
2 * Copyright © 2012 Rob Clark <[email protected]>
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <inttypes.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdbool.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23
24 #include "freedreno_pm4.h"
25
26 #include "buffers.h"
27 #include "cffdec.h"
28 #include "disasm.h"
29 #include "redump.h"
30 #include "rnnutil.h"
31 #include "script.h"
32
33 /* ************************************************************************* */
34 /* originally based on kernel recovery dump code: */
35
36 static const struct cffdec_options *options;
37
38 static bool needs_wfi = false;
39 static bool summary = false;
40 static bool in_summary = false;
41 static int vertices;
42
43 static inline unsigned
regcnt(void)44 regcnt(void)
45 {
46 if (options->info->chip >= 5)
47 return 0x3ffff;
48 else
49 return 0x7fff;
50 }
51
52 static int
is_64b(void)53 is_64b(void)
54 {
55 return options->info->chip >= 5;
56 }
57
58 static int draws[4];
59 static struct {
60 uint64_t base;
61 uint32_t size; /* in dwords */
62 /* Generally cmdstream consists of multiple IB calls to different
63 * buffers, which are themselves often re-used for each tile. The
64 * triggered flag serves two purposes to help make it more clear
65 * what part of the cmdstream is before vs after the the GPU hang:
66 *
67 * 1) if in IB2 we are passed the point within the IB2 buffer where
68 * the GPU hung, but IB1 is not passed the point within its
69 * buffer where the GPU had hung, then we know the GPU hang
70 * happens on a future use of that IB2 buffer.
71 *
72 * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
73 * hung, but we've already passed the trigger point at the same
74 * IB level, we know that we are passed the point where the GPU
75 * had hung.
76 *
77 * So this is a one way switch, false->true. And a higher #'d
78 * IB level isn't considered triggered unless the lower #'d IB
79 * level is.
80 */
81 bool triggered : 1;
82 bool base_seen : 1;
83 } ibs[4];
84 static int ib;
85
86 static int draw_count;
87 static int current_draw_count;
88
89 /* query mode.. to handle symbolic register name queries, we need to
90 * defer parsing query string until after gpu_id is know and rnn db
91 * loaded:
92 */
93 static int *queryvals;
94
95 static bool
quiet(int lvl)96 quiet(int lvl)
97 {
98 if ((options->draw_filter != -1) &&
99 (options->draw_filter != current_draw_count))
100 return true;
101 if ((lvl >= 3) && (summary || options->querystrs || options->script))
102 return true;
103 if ((lvl >= 2) && (options->querystrs || options->script))
104 return true;
105 return false;
106 }
107
108 void
printl(int lvl,const char * fmt,...)109 printl(int lvl, const char *fmt, ...)
110 {
111 va_list args;
112 if (quiet(lvl))
113 return;
114 va_start(args, fmt);
115 vprintf(fmt, args);
116 va_end(args);
117 }
118
119 static const char *levels[] = {
120 "\t",
121 "\t\t",
122 "\t\t\t",
123 "\t\t\t\t",
124 "\t\t\t\t\t",
125 "\t\t\t\t\t\t",
126 "\t\t\t\t\t\t\t",
127 "\t\t\t\t\t\t\t\t",
128 "\t\t\t\t\t\t\t\t\t",
129 "x",
130 "x",
131 "x",
132 "x",
133 "x",
134 "x",
135 };
136
137 enum state_src_t {
138 STATE_SRC_DIRECT,
139 STATE_SRC_INDIRECT,
140 STATE_SRC_BINDLESS,
141 };
142
143 /* SDS (CP_SET_DRAW_STATE) helpers: */
144 static void load_all_groups(int level);
145 static void disable_all_groups(void);
146
147 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
148 int level);
149 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
150
151 static bool
highlight_gpuaddr(uint64_t gpuaddr)152 highlight_gpuaddr(uint64_t gpuaddr)
153 {
154 if (!options->ibs[ib].base)
155 return false;
156
157 if ((ib > 0) && options->ibs[ib - 1].base &&
158 !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
159 return false;
160
161 if (ibs[ib].base_seen)
162 return false;
163
164 if (ibs[ib].triggered)
165 return options->color;
166
167 if (options->ibs[ib].base != ibs[ib].base)
168 return false;
169
170 uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
171 uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
172
173 bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
174
175 if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
176 ibs[ib].base_seen = true;
177 return false;
178 }
179
180 ibs[ib].triggered |= triggered;
181
182 if (triggered)
183 printf("ESTIMATED CRASH LOCATION!\n");
184
185 return triggered & options->color;
186 }
187
188 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)189 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
190 {
191 int i, j;
192 int lastzero = 1;
193
194 if (quiet(2))
195 return;
196
197 bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
198
199 for (i = 0; i < sizedwords; i += 8) {
200 int zero = 1;
201
202 /* always show first row: */
203 if (i == 0)
204 zero = 0;
205
206 for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
207 if (dwords[i + j])
208 zero = 0;
209
210 if (zero && !lastzero)
211 printf("*\n");
212
213 lastzero = zero;
214
215 if (zero)
216 continue;
217
218 uint64_t addr = gpuaddr(&dwords[i]);
219
220 if (highlight)
221 printf("\x1b[0;1;31m");
222
223 if (is_64b()) {
224 printf("%016" PRIx64 ":%s", addr, levels[level]);
225 } else {
226 printf("%08x:%s", (uint32_t)addr, levels[level]);
227 }
228
229 if (highlight)
230 printf("\x1b[0m");
231
232 printf("%04x:", i * 4);
233
234 for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
235 printf(" %08x", dwords[i + j]);
236 }
237
238 printf("\n");
239 }
240 }
241
242 static void
dump_float(float * dwords,uint32_t sizedwords,int level)243 dump_float(float *dwords, uint32_t sizedwords, int level)
244 {
245 int i;
246 for (i = 0; i < sizedwords; i++) {
247 if ((i % 8) == 0) {
248 if (is_64b()) {
249 printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
250 } else {
251 printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
252 }
253 } else {
254 printf(" ");
255 }
256 printf("%8f", *(dwords++));
257 if ((i % 8) == 7)
258 printf("\n");
259 }
260 if (i % 8)
261 printf("\n");
262 }
263
264 /* I believe the surface format is low bits:
265 #define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL
266 comments in sys2gmem_tex_const indicate that address is [31:12], but
267 looks like at least some of the bits above the format have different meaning..
268 */
269 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)270 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
271 uint32_t mask)
272 {
273 assert(!is_64b()); /* this is only used on a2xx */
274 *gpuaddr = dword & ~mask;
275 *flags = dword & mask;
276 }
277
278 static uint32_t type0_reg_vals[0x3ffff + 1];
279 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
280 8]; /* written since last draw */
281 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
282 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
283
284 static bool
reg_rewritten(uint32_t regbase)285 reg_rewritten(uint32_t regbase)
286 {
287 return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
288 }
289
290 bool
reg_written(uint32_t regbase)291 reg_written(uint32_t regbase)
292 {
293 return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
294 }
295
296 static void
clear_rewritten(void)297 clear_rewritten(void)
298 {
299 memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
300 }
301
302 static void
clear_written(void)303 clear_written(void)
304 {
305 memset(type0_reg_written, 0, sizeof(type0_reg_written));
306 clear_rewritten();
307 }
308
309 uint32_t
reg_lastval(uint32_t regbase)310 reg_lastval(uint32_t regbase)
311 {
312 return lastvals[regbase];
313 }
314
315 static void
clear_lastvals(void)316 clear_lastvals(void)
317 {
318 memset(lastvals, 0, sizeof(lastvals));
319 }
320
321 uint32_t
reg_val(uint32_t regbase)322 reg_val(uint32_t regbase)
323 {
324 return type0_reg_vals[regbase];
325 }
326
327 void
reg_set(uint32_t regbase,uint32_t val)328 reg_set(uint32_t regbase, uint32_t val)
329 {
330 assert(regbase < regcnt());
331 type0_reg_vals[regbase] = val;
332 type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
333 type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
334 }
335
336 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)337 reg_dump_scratch(const char *name, uint32_t dword, int level)
338 {
339 unsigned r;
340
341 if (quiet(3))
342 return;
343
344 r = regbase("CP_SCRATCH[0].REG");
345
346 // if not, try old a2xx/a3xx version:
347 if (!r)
348 r = regbase("CP_SCRATCH_REG0");
349
350 if (!r)
351 return;
352
353 printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
354 reg_val(r + 6), reg_val(r + 7));
355 }
356
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360 void *buf;
361
362 if (quiet(quietlvl))
363 return;
364
365 buf = hostptr(gpuaddr);
366 if (buf) {
367 dump_hex(buf, sizedwords, level + 1);
368 }
369 }
370
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374 dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380 dump_gpuaddr(dword, level);
381 }
382
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387 gpuaddr_lo = dword;
388 }
389
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393 dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395
396 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)397 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
398 {
399 dump_gpuaddr(qword, level);
400 }
401
402 static void
dump_shader(const char * ext,void * buf,int bufsz)403 dump_shader(const char *ext, void *buf, int bufsz)
404 {
405 if (options->dump_shaders) {
406 static int n = 0;
407 char filename[16];
408 int fd;
409 sprintf(filename, "%04d.%s", n++, ext);
410 fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
411 if (fd != -1) {
412 write(fd, buf, bufsz);
413 close(fd);
414 }
415 }
416 }
417
418 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)419 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
420 {
421 void *buf;
422
423 gpuaddr &= 0xfffffffffffffff0;
424
425 if (quiet(3))
426 return;
427
428 buf = hostptr(gpuaddr);
429 if (buf) {
430 uint32_t sizedwords = hostlen(gpuaddr) / 4;
431 const char *ext;
432
433 dump_hex(buf, MIN2(64, sizedwords), level + 1);
434 try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
435
436 /* this is a bit ugly way, but oh well.. */
437 if (strstr(name, "SP_VS_OBJ")) {
438 ext = "vo3";
439 } else if (strstr(name, "SP_FS_OBJ")) {
440 ext = "fo3";
441 } else if (strstr(name, "SP_GS_OBJ")) {
442 ext = "go3";
443 } else if (strstr(name, "SP_CS_OBJ")) {
444 ext = "co3";
445 } else {
446 ext = NULL;
447 }
448
449 if (ext)
450 dump_shader(ext, buf, sizedwords * 4);
451 }
452 }
453
454 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)455 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
456 {
457 disasm_gpuaddr(name, dword, level);
458 }
459
460 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)461 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
462 {
463 disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
464 }
465
466 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)467 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
468 {
469 disasm_gpuaddr(name, qword, level);
470 }
471
472 /* Find the value of the TEX_COUNT register that corresponds to the named
473 * TEX_SAMP/TEX_CONST reg.
474 *
475 * Note, this kinda assumes an equal # of samplers and textures, but not
476 * really sure if there is a much better option. I suppose on a6xx we
477 * could instead decode the bitfields in SP_xS_CONFIG
478 */
479 static int
get_tex_count(const char * name)480 get_tex_count(const char *name)
481 {
482 char count_reg[strlen(name) + 5];
483 char *p;
484
485 p = strstr(name, "CONST");
486 if (!p)
487 p = strstr(name, "SAMP");
488 if (!p)
489 return 0;
490
491 int n = p - name;
492 strncpy(count_reg, name, n);
493 strcpy(count_reg + n, "COUNT");
494
495 return reg_val(regbase(count_reg));
496 }
497
498 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)499 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
500 {
501 if (!in_summary)
502 return;
503
504 int num_unit = get_tex_count(name);
505 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
506 void *buf = hostptr(gpuaddr);
507
508 if (!buf)
509 return;
510
511 dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
512 }
513
514 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)515 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
516 {
517 if (!in_summary)
518 return;
519
520 int num_unit = get_tex_count(name);
521 uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
522 void *buf = hostptr(gpuaddr);
523
524 if (!buf)
525 return;
526
527 dump_tex_const(buf, num_unit, level + 1);
528 }
529
530 /*
531 * Registers with special handling (rnndec_decode() handles rest):
532 */
533 #define REG(x, fxn) { #x, fxn }
534 #define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }
535 static struct {
536 const char *regname;
537 void (*fxn)(const char *name, uint32_t dword, int level);
538 void (*fxn64)(const char *name, uint64_t qword, int level);
539 uint32_t regbase;
540 bool is_reg64;
541 } reg_a2xx[] = {
542 REG(CP_SCRATCH_REG0, reg_dump_scratch),
543 REG(CP_SCRATCH_REG1, reg_dump_scratch),
544 REG(CP_SCRATCH_REG2, reg_dump_scratch),
545 REG(CP_SCRATCH_REG3, reg_dump_scratch),
546 REG(CP_SCRATCH_REG4, reg_dump_scratch),
547 REG(CP_SCRATCH_REG5, reg_dump_scratch),
548 REG(CP_SCRATCH_REG6, reg_dump_scratch),
549 REG(CP_SCRATCH_REG7, reg_dump_scratch),
550 {NULL},
551 }, reg_a3xx[] = {
552 REG(CP_SCRATCH_REG0, reg_dump_scratch),
553 REG(CP_SCRATCH_REG1, reg_dump_scratch),
554 REG(CP_SCRATCH_REG2, reg_dump_scratch),
555 REG(CP_SCRATCH_REG3, reg_dump_scratch),
556 REG(CP_SCRATCH_REG4, reg_dump_scratch),
557 REG(CP_SCRATCH_REG5, reg_dump_scratch),
558 REG(CP_SCRATCH_REG6, reg_dump_scratch),
559 REG(CP_SCRATCH_REG7, reg_dump_scratch),
560 REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
561 REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
562 REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
563 REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
564 REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
565 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
566 {NULL},
567 }, reg_a4xx[] = {
568 REG(CP_SCRATCH[0].REG, reg_dump_scratch),
569 REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
570 REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
571 REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
572 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
573 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
574 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
575 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
576 REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
577 REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
578 REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
579 REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
580 REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
581 REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
582 REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
583 REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
584 REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
585 REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
586 REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
587 REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
588 REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
589 REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
590 REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
591 REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
592 REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
593 {NULL},
594 }, reg_a5xx[] = {
595 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
596 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
597 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
598 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
599 REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
600 REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
601 REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
602 REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
603 REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
604 REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
605 REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
606 REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
607 REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
608 REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
609 REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
610 REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
611 REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
612 REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
613 REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
614 REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615 REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
616 REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
617 REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
618 REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619 REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
620 REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
621 REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
622 REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
623 REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
624 REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
625 REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
626 REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
627 REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
628 REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
629 REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
630 REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
631 REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
632 REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
633 REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
634 REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
635 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
636 REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
637 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
638 // REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
639 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
640 // REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
641 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
642 // REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
643 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
644 // REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
645 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
646 // REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
647 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
648 // REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
649 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
650 // REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
651 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
652 // REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
653 // REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
654 // REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
655 // REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
656 // REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
657 // REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
658 // REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
659 // REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
660 // REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
661 // REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
662 // REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
663
664 // REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
665 // REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
666 // REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
667 // REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
668 // REG(RB_2D_DST_LO, reg_gpuaddr_lo),
669 // REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
670 // REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
671 // REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
672
673 {NULL},
674 }, reg_a6xx[] = {
675 REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
676 REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
677 REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
678 REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
679
680 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
681 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
682 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
683 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
684 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
685 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
686
687 REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
688 REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
689 REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
690 REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
691 REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
692 REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
693 REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
694 REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
695 REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
696 REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
697 REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
698 REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
699
700 {NULL},
701 }, reg_a7xx[] = {
702 REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
703 REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
704 REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
705 REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
706 REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
707 REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
708
709 {NULL},
710 }, *type0_reg;
711
712 static struct rnn *rnn;
713
714 static void
init_rnn(const char * gpuname)715 init_rnn(const char *gpuname)
716 {
717 rnn = rnn_new(!options->color);
718
719 rnn_load(rnn, gpuname);
720
721 if (options->querystrs) {
722 int i;
723 queryvals = calloc(options->nquery, sizeof(queryvals[0]));
724
725 for (i = 0; i < options->nquery; i++) {
726 int val = strtol(options->querystrs[i], NULL, 0);
727
728 if (val == 0)
729 val = regbase(options->querystrs[i]);
730
731 queryvals[i] = val;
732 printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
733 }
734 }
735
736 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
737 type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
738 if (!type0_reg[idx].regbase) {
739 printf("invalid register name: %s\n", type0_reg[idx].regname);
740 exit(1);
741 }
742 }
743 }
744
745 void
reset_regs(void)746 reset_regs(void)
747 {
748 clear_written();
749 clear_lastvals();
750 memset(&ibs, 0, sizeof(ibs));
751 }
752
753 void
cffdec_init(const struct cffdec_options * _options)754 cffdec_init(const struct cffdec_options *_options)
755 {
756 options = _options;
757 summary = options->summary;
758
759 /* in case we're decoding multiple files: */
760 free(queryvals);
761 reset_regs();
762 draw_count = 0;
763
764 if (!options->info)
765 return;
766
767 switch (options->info->chip) {
768 case 2:
769 type0_reg = reg_a2xx;
770 init_rnn("a2xx");
771 break;
772 case 3:
773 type0_reg = reg_a3xx;
774 init_rnn("a3xx");
775 break;
776 case 4:
777 type0_reg = reg_a4xx;
778 init_rnn("a4xx");
779 break;
780 case 5:
781 type0_reg = reg_a5xx;
782 init_rnn("a5xx");
783 break;
784 case 6:
785 type0_reg = reg_a6xx;
786 init_rnn("a6xx");
787 break;
788 case 7:
789 type0_reg = reg_a7xx;
790 init_rnn("a7xx");
791 break;
792 default:
793 errx(-1, "unsupported generation: %u", options->info->chip);
794 }
795 }
796
797 const char *
pktname(unsigned opc)798 pktname(unsigned opc)
799 {
800 return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
801 }
802
803 const char *
regname(uint32_t regbase,int color)804 regname(uint32_t regbase, int color)
805 {
806 return rnn_regname(rnn, regbase, color);
807 }
808
809 uint32_t
regbase(const char * name)810 regbase(const char *name)
811 {
812 return rnn_regbase(rnn, name);
813 }
814
815 static int
endswith(uint32_t regbase,const char * suffix)816 endswith(uint32_t regbase, const char *suffix)
817 {
818 const char *name = regname(regbase, 0);
819 const char *s = strstr(name, suffix);
820 if (!s)
821 return 0;
822 return (s - strlen(name) + strlen(suffix)) == name;
823 }
824
825 struct regacc
regacc(struct rnn * r)826 regacc(struct rnn *r)
827 {
828 if (!r)
829 r = rnn;
830
831 return (struct regacc){ .rnn = r };
832 }
833
834 /* returns true if the complete reg value has been accumulated: */
835 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)836 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
837 {
838 if (r->has_dword_lo) {
839 /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
840 * see: https://patchwork.freedesktop.org/series/112302/
841 */
842 if (regbase != r->regbase + 1) {
843 printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
844 r->has_dword_lo = false;
845 return true;
846 }
847
848 r->value |= ((uint64_t)dword) << 32;
849 r->has_dword_lo = false;
850
851 return true;
852 }
853
854 r->regbase = regbase;
855 r->value = dword;
856
857 struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
858 r->has_dword_lo = (info->width == 64);
859
860 /* Workaround for kernel devcore dump bugs: */
861 if ((info->width == 64) && endswith(regbase, "_HI")) {
862 printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
863 r->has_dword_lo = false;
864 }
865
866 rnn_reginfo_free(info);
867
868 return !r->has_dword_lo;
869 }
870
871 void
dump_register_val(struct regacc * r,int level)872 dump_register_val(struct regacc *r, int level)
873 {
874 struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
875
876 if (info && info->typeinfo) {
877 uint64_t gpuaddr = 0;
878 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
879 printf("%s%s: %s", levels[level], info->name, decoded);
880
881 /* Try and figure out if we are looking at a gpuaddr.. this
882 * might be useful for other gen's too, but at least a5xx has
883 * the _HI/_LO suffix we can look for. Maybe a better approach
884 * would be some special annotation in the xml..
885 * for a6xx use "address" and "waddress" types
886 */
887 if (options->info->chip >= 6) {
888 if (!strcmp(info->typeinfo->name, "address") ||
889 !strcmp(info->typeinfo->name, "waddress")) {
890 gpuaddr = r->value;
891 }
892 } else if (options->info->chip >= 5) {
893 /* TODO we shouldn't rely on reg_val() since reg_set() might
894 * not have been called yet for the other half of the 64b reg.
895 * We can remove this hack once a5xx.xml is converted to reg64
896 * and address/waddess.
897 */
898 if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
899 gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
900 } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
901 gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
902 }
903 }
904
905 if (gpuaddr && hostptr(gpuaddr)) {
906 printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
907 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
908 hostlen(gpubaseaddr(gpuaddr)));
909 }
910
911 printf("\n");
912
913 free(decoded);
914 } else if (info) {
915 printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
916 } else {
917 printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
918 }
919
920 rnn_reginfo_free(info);
921 }
922
923 static void
dump_register(struct regacc * r,int level)924 dump_register(struct regacc *r, int level)
925 {
926 if (!quiet(3)) {
927 dump_register_val(r, level);
928 }
929
930 for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
931 if (type0_reg[idx].regbase == r->regbase) {
932 if (type0_reg[idx].is_reg64) {
933 type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
934 } else {
935 type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
936 }
937 break;
938 }
939 }
940 }
941
942 static bool
is_banked_reg(uint32_t regbase)943 is_banked_reg(uint32_t regbase)
944 {
945 return (0x2000 <= regbase) && (regbase < 0x2400);
946 }
947
948 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)949 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
950 int level)
951 {
952 struct regacc r = regacc(NULL);
953
954 while (sizedwords--) {
955 int last_summary = summary;
956
957 /* access to non-banked registers needs a WFI:
958 * TODO banked register range for a2xx??
959 */
960 if (needs_wfi && !is_banked_reg(regbase))
961 printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
962
963 reg_set(regbase, *dwords);
964 if (regacc_push(&r, regbase, *dwords))
965 dump_register(&r, level);
966 regbase++;
967 dwords++;
968 summary = last_summary;
969 }
970 }
971
972 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)973 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
974 {
975 struct rnndomain *dom;
976 int i;
977
978 dom = rnn_finddomain(rnn->db, name);
979
980 if (!dom)
981 return;
982
983 if (script_packet)
984 script_packet(dwords, sizedwords, rnn, dom);
985
986 if (quiet(2))
987 return;
988
989 for (i = 0; i < sizedwords; i++) {
990 struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
991 char *decoded;
992 if (!(info && info->typeinfo))
993 break;
994 uint64_t value = dwords[i];
995 if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
996 value |= (uint64_t)dwords[i + 1] << 32;
997 i++; /* skip the next dword since we're printing it now */
998 }
999 decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1000 /* Unlike the register printing path, we don't print the name
1001 * of the register, so if it doesn't contain other named
1002 * things (i.e. it isn't a bitset) then print the register
1003 * name as if it's a bitset with a single entry. This avoids
1004 * having to create a dummy register with a single entry to
1005 * get a name in the decoding.
1006 */
1007 if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1008 info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1009 printf("%s%s\n", levels[level], decoded);
1010 } else {
1011 printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1012 info->name, rnn->vc->colors->reset, decoded);
1013 }
1014 free(decoded);
1015 free(info->name);
1016 free(info);
1017 }
1018 }
1019
1020 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1021 static unsigned mode;
1022 static const char *render_mode;
1023 static const char *thread;
1024 static enum {
1025 MODE_BINNING = 0x1,
1026 MODE_GMEM = 0x2,
1027 MODE_BYPASS = 0x4,
1028 MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1029 } enable_mask = MODE_ALL;
1030 static bool skip_ib2_enable_global;
1031 static bool skip_ib2_enable_local;
1032
1033 static void
print_mode(int level)1034 print_mode(int level)
1035 {
1036 if ((options->info->chip >= 5) && !quiet(2)) {
1037 printf("%smode: %s", levels[level], render_mode);
1038 if (thread)
1039 printf(":%s", thread);
1040 printf("\n");
1041 printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1042 skip_ib2_enable_local);
1043 }
1044 }
1045
1046 static bool
skip_query(void)1047 skip_query(void)
1048 {
1049 switch (options->query_mode) {
1050 case QUERY_ALL:
1051 /* never skip: */
1052 return false;
1053 case QUERY_WRITTEN:
1054 for (int i = 0; i < options->nquery; i++) {
1055 uint32_t regbase = queryvals[i];
1056 if (!reg_written(regbase)) {
1057 continue;
1058 }
1059 if (reg_rewritten(regbase)) {
1060 return false;
1061 }
1062 }
1063 return true;
1064 case QUERY_DELTA:
1065 for (int i = 0; i < options->nquery; i++) {
1066 uint32_t regbase = queryvals[i];
1067 if (!reg_written(regbase)) {
1068 continue;
1069 }
1070 uint32_t lastval = reg_val(regbase);
1071 if (lastval != lastvals[regbase]) {
1072 return false;
1073 }
1074 }
1075 return true;
1076 }
1077 return true;
1078 }
1079
1080 static void
__do_query(const char * primtype,uint32_t num_indices)1081 __do_query(const char *primtype, uint32_t num_indices)
1082 {
1083 int n = 0;
1084
1085 if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1086 uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1087 uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1088
1089 bin_x1 = scissor_tl & 0xffff;
1090 bin_y1 = scissor_tl >> 16;
1091 bin_x2 = scissor_br & 0xffff;
1092 bin_y2 = scissor_br >> 16;
1093 }
1094
1095 for (int i = 0; i < options->nquery; i++) {
1096 uint32_t regbase = queryvals[i];
1097 if (!reg_written(regbase))
1098 continue;
1099
1100 struct regacc r = regacc(NULL);
1101
1102 /* 64b regs require two successive 32b dwords: */
1103 for (int d = 0; d < 2; d++)
1104 if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1105 break;
1106
1107 printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1108 bin_y1, bin_x2, bin_y2, num_indices);
1109 if (options->info->chip >= 5)
1110 printf("%s:", render_mode);
1111 if (thread)
1112 printf("%s:", thread);
1113 printf("\t%08"PRIx64, r.value);
1114 if (r.value != lastvals[regbase]) {
1115 printf("!");
1116 } else {
1117 printf(" ");
1118 }
1119 if (reg_rewritten(regbase)) {
1120 printf("+");
1121 } else {
1122 printf(" ");
1123 }
1124 dump_register_val(&r, 0);
1125 n++;
1126 }
1127
1128 if (n > 1)
1129 printf("\n");
1130 }
1131
1132 static void
do_query_compare(const char * primtype,uint32_t num_indices)1133 do_query_compare(const char *primtype, uint32_t num_indices)
1134 {
1135 unsigned saved_enable_mask = enable_mask;
1136 const char *saved_render_mode = render_mode;
1137
1138 /* in 'query-compare' mode, we want to see if the register is writtten
1139 * or changed in any mode:
1140 *
1141 * (NOTE: this could cause false-positive for 'query-delta' if the reg
1142 * is written with different values in binning vs sysmem/gmem mode, as
1143 * we don't track previous values per-mode, but I think we can live with
1144 * that)
1145 */
1146 enable_mask = MODE_ALL;
1147
1148 clear_rewritten();
1149 load_all_groups(0);
1150
1151 if (!skip_query()) {
1152 /* dump binning pass values: */
1153 enable_mask = MODE_BINNING;
1154 render_mode = "BINNING";
1155 clear_rewritten();
1156 load_all_groups(0);
1157 __do_query(primtype, num_indices);
1158
1159 /* dump draw pass values: */
1160 enable_mask = MODE_GMEM | MODE_BYPASS;
1161 render_mode = "DRAW";
1162 clear_rewritten();
1163 load_all_groups(0);
1164 __do_query(primtype, num_indices);
1165
1166 printf("\n");
1167 }
1168
1169 enable_mask = saved_enable_mask;
1170 render_mode = saved_render_mode;
1171
1172 disable_all_groups();
1173 }
1174
1175 /* well, actually query and script..
1176 * NOTE: call this before dump_register_summary()
1177 */
1178 static void
do_query(const char * primtype,uint32_t num_indices)1179 do_query(const char *primtype, uint32_t num_indices)
1180 {
1181 if (script_draw)
1182 script_draw(primtype, num_indices);
1183
1184 if (options->query_compare) {
1185 do_query_compare(primtype, num_indices);
1186 return;
1187 }
1188
1189 if (skip_query())
1190 return;
1191
1192 __do_query(primtype, num_indices);
1193 }
1194
1195 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1196 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1197 {
1198 uint32_t start = dwords[1] >> 16;
1199 uint32_t size = dwords[1] & 0xffff;
1200 const char *type = NULL, *ext = NULL;
1201 gl_shader_stage disasm_type;
1202
1203 switch (dwords[0]) {
1204 case 0:
1205 type = "vertex";
1206 ext = "vo";
1207 disasm_type = MESA_SHADER_VERTEX;
1208 break;
1209 case 1:
1210 type = "fragment";
1211 ext = "fo";
1212 disasm_type = MESA_SHADER_FRAGMENT;
1213 break;
1214 default:
1215 type = "<unknown>";
1216 disasm_type = 0;
1217 break;
1218 }
1219
1220 printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1221 size);
1222 disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1223
1224 /* dump raw shader: */
1225 if (ext)
1226 dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1227 }
1228
1229 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1230 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1231 {
1232 uint32_t reg = dwords[0] & 0xffff;
1233 struct regacc r = regacc(NULL);
1234 for (int i = 1; i < sizedwords; i++) {
1235 if (regacc_push(&r, reg, dwords[i]))
1236 dump_register(&r, level + 1);
1237 reg_set(reg, dwords[i]);
1238 reg++;
1239 }
1240 }
1241
1242 enum state_t {
1243 TEX_SAMP = 1,
1244 TEX_CONST,
1245 TEX_MIPADDR, /* a3xx only */
1246 SHADER_PROG,
1247 SHADER_CONST,
1248
1249 // image/ssbo state:
1250 SSBO_0,
1251 SSBO_1,
1252 SSBO_2,
1253
1254 UBO,
1255
1256 // unknown things, just to hexdumps:
1257 UNKNOWN_DWORDS,
1258 UNKNOWN_2DWORDS,
1259 UNKNOWN_4DWORDS,
1260 };
1261
1262 enum adreno_state_block {
1263 SB_VERT_TEX = 0,
1264 SB_VERT_MIPADDR = 1,
1265 SB_FRAG_TEX = 2,
1266 SB_FRAG_MIPADDR = 3,
1267 SB_VERT_SHADER = 4,
1268 SB_GEOM_SHADER = 5,
1269 SB_FRAG_SHADER = 6,
1270 SB_COMPUTE_SHADER = 7,
1271 };
1272
1273 /* TODO there is probably a clever way to let rnndec parse things so
1274 * we don't have to care about packet format differences across gens
1275 */
1276
1277 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1278 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1279 enum state_t *state, enum state_src_t *src)
1280 {
1281 unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1282 unsigned state_type = dwords[1] & 0x3;
1283 static const struct {
1284 gl_shader_stage stage;
1285 enum state_t state;
1286 } lookup[0xf][0x3] = {
1287 [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1288 [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1289 [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1290 [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1291 [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1292 [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1293 [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1294 [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1295 };
1296
1297 *stage = lookup[state_block_id][state_type].stage;
1298 *state = lookup[state_block_id][state_type].state;
1299 unsigned state_src = (dwords[0] >> 16) & 0x7;
1300 if (state_src == 0 /* SS_DIRECT */)
1301 *src = STATE_SRC_DIRECT;
1302 else
1303 *src = STATE_SRC_INDIRECT;
1304 }
1305
1306 static enum state_src_t
_get_state_src(unsigned dword0)1307 _get_state_src(unsigned dword0)
1308 {
1309 switch ((dword0 >> 16) & 0x3) {
1310 case 0: /* SS4_DIRECT / SS6_DIRECT */
1311 return STATE_SRC_DIRECT;
1312 case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1313 return STATE_SRC_INDIRECT;
1314 case 1: /* SS6_BINDLESS */
1315 return STATE_SRC_BINDLESS;
1316 default:
1317 return STATE_SRC_DIRECT;
1318 }
1319 }
1320
1321 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1322 _get_state_type(unsigned state_block_id, unsigned state_type,
1323 gl_shader_stage *stage, enum state_t *state)
1324 {
1325 static const struct {
1326 gl_shader_stage stage;
1327 enum state_t state;
1328 } lookup[0x10][0x4] = {
1329 // SB4_VS_TEX:
1330 [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1331 [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1332 [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1333 // SB4_HS_TEX:
1334 [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1335 [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1336 [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1337 // SB4_DS_TEX:
1338 [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1339 [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1340 [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1341 // SB4_GS_TEX:
1342 [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1343 [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1344 [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1345 // SB4_FS_TEX:
1346 [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1347 [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1348 [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1349 // SB4_CS_TEX:
1350 [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1351 [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1352 [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1353 // SB4_VS_SHADER:
1354 [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1355 [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1356 [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1357 // SB4_HS_SHADER
1358 [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1359 [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1360 [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1361 // SB4_DS_SHADER
1362 [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1363 [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1364 [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1365 // SB4_GS_SHADER
1366 [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1367 [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1368 [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1369 // SB4_FS_SHADER:
1370 [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1371 [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1372 [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1373 // SB4_CS_SHADER:
1374 [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1375 [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1376 [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1377 [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1378 // SB4_SSBO (shared across all stages)
1379 [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1380 [0xe][1] = {0, SSBO_1},
1381 [0xe][2] = {0, SSBO_2},
1382 // SB4_CS_SSBO
1383 [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1384 [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1385 [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1386 // unknown things
1387 /* This looks like combined UBO state for 3d stages (a5xx and
1388 * before?? I think a6xx has UBO state per shader stage:
1389 */
1390 [0x6][2] = {0, UBO},
1391 [0x7][1] = {0, UNKNOWN_2DWORDS},
1392 };
1393
1394 *stage = lookup[state_block_id][state_type].stage;
1395 *state = lookup[state_block_id][state_type].state;
1396 }
1397
1398 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1399 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1400 enum state_t *state, enum state_src_t *src)
1401 {
1402 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1403 unsigned state_type = dwords[1] & 0x3;
1404 _get_state_type(state_block_id, state_type, stage, state);
1405 *src = _get_state_src(dwords[0]);
1406 }
1407
1408 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1409 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1410 enum state_t *state, enum state_src_t *src)
1411 {
1412 unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1413 unsigned state_type = (dwords[0] >> 14) & 0x3;
1414 _get_state_type(state_block_id, state_type, stage, state);
1415 *src = _get_state_src(dwords[0]);
1416 }
1417
1418 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1419 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1420 {
1421 for (int i = 0; i < num_unit; i++) {
1422 /* work-around to reduce noise for opencl blob which always
1423 * writes the max # regardless of # of textures used
1424 */
1425 if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1426 break;
1427
1428 if (options->info->chip == 3) {
1429 dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1430 dump_hex(texsamp, 2, level + 1);
1431 texsamp += 2;
1432 } else if (options->info->chip == 4) {
1433 dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1434 dump_hex(texsamp, 2, level + 1);
1435 texsamp += 2;
1436 } else if (options->info->chip == 5) {
1437 dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1438 dump_hex(texsamp, 4, level + 1);
1439 texsamp += 4;
1440 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1441 dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1442 dump_hex(texsamp, 4, level + 1);
1443 texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1444 }
1445 }
1446 }
1447
1448 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1449 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1450 {
1451 for (int i = 0; i < num_unit; i++) {
1452 /* work-around to reduce noise for opencl blob which always
1453 * writes the max # regardless of # of textures used
1454 */
1455 if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1456 (texconst[2] == 0) && (texconst[3] == 0))
1457 break;
1458
1459 if (options->info->chip == 3) {
1460 dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1461 dump_hex(texconst, 4, level + 1);
1462 texconst += 4;
1463 } else if (options->info->chip == 4) {
1464 dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1465 if (options->dump_textures) {
1466 uint32_t addr = texconst[4] & ~0x1f;
1467 dump_gpuaddr(addr, level - 2);
1468 }
1469 dump_hex(texconst, 8, level + 1);
1470 texconst += 8;
1471 } else if (options->info->chip == 5) {
1472 dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1473 if (options->dump_textures) {
1474 uint64_t addr =
1475 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1476 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1477 }
1478 dump_hex(texconst, 12, level + 1);
1479 texconst += 12;
1480 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1481 dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1482 if (options->dump_textures) {
1483 uint64_t addr =
1484 (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1485 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1486 }
1487 dump_hex(texconst, 16, level + 1);
1488 texconst += 16;
1489 }
1490 }
1491 }
1492
1493 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1494 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1495 {
1496 gl_shader_stage stage;
1497 enum state_t state;
1498 enum state_src_t src;
1499 uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1500 uint64_t ext_src_addr;
1501 void *contents;
1502 int i;
1503
1504 if (quiet(2) && !options->script)
1505 return;
1506
1507 if (options->info->chip >= 6)
1508 a6xx_get_state_type(dwords, &stage, &state, &src);
1509 else if (options->info->chip >= 4)
1510 a4xx_get_state_type(dwords, &stage, &state, &src);
1511 else
1512 a3xx_get_state_type(dwords, &stage, &state, &src);
1513
1514 switch (src) {
1515 case STATE_SRC_DIRECT:
1516 ext_src_addr = 0;
1517 break;
1518 case STATE_SRC_INDIRECT:
1519 if (is_64b()) {
1520 ext_src_addr = dwords[1] & 0xfffffffc;
1521 ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1522 } else {
1523 ext_src_addr = dwords[1] & 0xfffffffc;
1524 }
1525
1526 break;
1527 case STATE_SRC_BINDLESS: {
1528 const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1529 ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1530 : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1531
1532 if (is_64b()) {
1533 const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1534 ext_src_addr = reg_val(reg) & 0xfffffffc;
1535 ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1536 } else {
1537 const unsigned reg = base_reg + (dwords[1] >> 28);
1538 ext_src_addr = reg_val(reg) & 0xfffffffc;
1539 }
1540
1541 ext_src_addr += 4 * (dwords[1] & 0xffffff);
1542 break;
1543 }
1544 }
1545
1546 if (ext_src_addr)
1547 contents = hostptr(ext_src_addr);
1548 else
1549 contents = is_64b() ? dwords + 3 : dwords + 2;
1550
1551 if (!contents)
1552 return;
1553
1554 switch (state) {
1555 case SHADER_PROG: {
1556 const char *ext = NULL;
1557
1558 if (quiet(2))
1559 return;
1560
1561 if (options->info->chip >= 4)
1562 num_unit *= 16;
1563 else if (options->info->chip >= 3)
1564 num_unit *= 4;
1565
1566 /* shaders:
1567 *
1568 * note: num_unit seems to be # of instruction groups, where
1569 * an instruction group has 4 64bit instructions.
1570 */
1571 if (stage == MESA_SHADER_VERTEX) {
1572 ext = "vo3";
1573 } else if (stage == MESA_SHADER_GEOMETRY) {
1574 ext = "go3";
1575 } else if (stage == MESA_SHADER_COMPUTE) {
1576 ext = "co3";
1577 } else if (stage == MESA_SHADER_FRAGMENT) {
1578 ext = "fo3";
1579 }
1580
1581 if (contents)
1582 try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1583 options->info->chip * 100);
1584
1585 /* dump raw shader: */
1586 if (ext)
1587 dump_shader(ext, contents, num_unit * 2 * 4);
1588
1589 break;
1590 }
1591 case SHADER_CONST: {
1592 if (quiet(2))
1593 return;
1594
1595 /* uniforms/consts:
1596 *
1597 * note: num_unit seems to be # of pairs of dwords??
1598 */
1599
1600 if (options->info->chip >= 4)
1601 num_unit *= 2;
1602
1603 dump_float(contents, num_unit * 2, level + 1);
1604 dump_hex(contents, num_unit * 2, level + 1);
1605
1606 break;
1607 }
1608 case TEX_MIPADDR: {
1609 uint32_t *addrs = contents;
1610
1611 if (quiet(2))
1612 return;
1613
1614 /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1615 for (i = 0; i < num_unit; i++) {
1616 void *ptr = hostptr(addrs[i]);
1617 printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1618 if (options->dump_textures) {
1619 printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1620 dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1621 }
1622 }
1623 break;
1624 }
1625 case TEX_SAMP: {
1626 dump_tex_samp(contents, src, num_unit, level);
1627 break;
1628 }
1629 case TEX_CONST: {
1630 dump_tex_const(contents, num_unit, level);
1631 break;
1632 }
1633 case SSBO_0: {
1634 uint32_t *ssboconst = (uint32_t *)contents;
1635
1636 for (i = 0; i < num_unit; i++) {
1637 int sz = 4;
1638 if (options->info->chip == 4) {
1639 dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1640 } else if (options->info->chip == 5) {
1641 dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1642 } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1643 sz = 16;
1644 dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1645 }
1646 dump_hex(ssboconst, sz, level + 1);
1647 ssboconst += sz;
1648 }
1649 break;
1650 }
1651 case SSBO_1: {
1652 uint32_t *ssboconst = (uint32_t *)contents;
1653
1654 for (i = 0; i < num_unit; i++) {
1655 if (options->info->chip == 4)
1656 dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1657 else if (options->info->chip == 5)
1658 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1659 dump_hex(ssboconst, 2, level + 1);
1660 ssboconst += 2;
1661 }
1662 break;
1663 }
1664 case SSBO_2: {
1665 uint32_t *ssboconst = (uint32_t *)contents;
1666
1667 for (i = 0; i < num_unit; i++) {
1668 /* TODO a4xx and a5xx might be same: */
1669 if (options->info->chip == 5) {
1670 dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1671 dump_hex(ssboconst, 2, level + 1);
1672 }
1673 if (options->dump_textures) {
1674 uint64_t addr =
1675 (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1676 dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1677 }
1678 ssboconst += 2;
1679 }
1680 break;
1681 }
1682 case UBO: {
1683 uint32_t *uboconst = (uint32_t *)contents;
1684
1685 for (i = 0; i < num_unit; i++) {
1686 // TODO probably similar on a4xx..
1687 if (options->info->chip == 5)
1688 dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1689 else if (options->info->chip == 6)
1690 dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1691 dump_hex(uboconst, 2, level + 1);
1692 uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1693 }
1694 break;
1695 }
1696 case UNKNOWN_DWORDS: {
1697 if (quiet(2))
1698 return;
1699 dump_hex(contents, num_unit, level + 1);
1700 break;
1701 }
1702 case UNKNOWN_2DWORDS: {
1703 if (quiet(2))
1704 return;
1705 dump_hex(contents, num_unit * 2, level + 1);
1706 break;
1707 }
1708 case UNKNOWN_4DWORDS: {
1709 if (quiet(2))
1710 return;
1711 dump_hex(contents, num_unit * 4, level + 1);
1712 break;
1713 }
1714 default:
1715 if (quiet(2))
1716 return;
1717 /* hmm.. */
1718 dump_hex(contents, num_unit, level + 1);
1719 break;
1720 }
1721 }
1722
1723 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1724 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1725 {
1726 bin_x1 = dwords[1] & 0xffff;
1727 bin_y1 = dwords[1] >> 16;
1728 bin_x2 = dwords[2] & 0xffff;
1729 bin_y2 = dwords[2] >> 16;
1730 }
1731
1732 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1733 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1734 int level)
1735 {
1736 uint32_t w, h, p;
1737 uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1738 uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1739 static const char *filter[] = {
1740 "point",
1741 "bilinear",
1742 "bicubic",
1743 };
1744 static const char *clamp[] = {
1745 "wrap",
1746 "mirror",
1747 "clamp-last-texel",
1748 };
1749 static const char swiznames[] = "xyzw01??";
1750
1751 /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1752
1753 /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1754 * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1755 */
1756 p = (dwords[0] >> 22) << 5;
1757 clamp_x = (dwords[0] >> 10) & 0x3;
1758 clamp_y = (dwords[0] >> 13) & 0x3;
1759 clamp_z = (dwords[0] >> 16) & 0x3;
1760
1761 /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1762 * NearestClamp=1:OGL Mode
1763 */
1764 parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1765
1766 /* Width, Height, EndianSwap=0:None */
1767 w = (dwords[2] & 0x1fff) + 1;
1768 h = ((dwords[2] >> 13) & 0x1fff) + 1;
1769
1770 /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1771 * Mip=2:BaseMap
1772 */
1773 mag = (dwords[3] >> 19) & 0x3;
1774 min = (dwords[3] >> 21) & 0x3;
1775 swiz = (dwords[3] >> 1) & 0xfff;
1776
1777 /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1778 * Dim3d=0
1779 */
1780 // XXX
1781
1782 /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1783 * Dim=1:2d, MipPacking=0
1784 */
1785 parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1786
1787 printf("%sset texture const %04x\n", levels[level], val);
1788 printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1789 clamp[clamp_y], clamp[clamp_z]);
1790 printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1791 filter[mag]);
1792 printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1793 swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1794 swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1795 printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1796 levels[level + 1], gpuaddr, flags, w, h, p,
1797 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1798 printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1799 mip_flags);
1800 }
1801
1802 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1803 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1804 int level)
1805 {
1806 int i;
1807 printf("%sset shader const %04x\n", levels[level], val);
1808 for (i = 0; i < sizedwords;) {
1809 uint32_t gpuaddr, flags;
1810 parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1811 void *addr = hostptr(gpuaddr);
1812 if (addr) {
1813 const char *fmt =
1814 rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1815 uint32_t size = dwords[i++];
1816 printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1817 size, fmt);
1818 // TODO maybe dump these as bytes instead of dwords?
1819 size = (size + 3) / 4; // for now convert to dwords
1820 dump_hex(addr, MIN2(size, 64), level + 1);
1821 if (size > MIN2(size, 64))
1822 printf("%s\t\t...\n", levels[level + 1]);
1823 dump_float(addr, MIN2(size, 64), level + 1);
1824 if (size > MIN2(size, 64))
1825 printf("%s\t\t...\n", levels[level + 1]);
1826 }
1827 }
1828 }
1829
1830 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1831 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1832 {
1833 uint32_t val = dwords[0] & 0xffff;
1834 switch ((dwords[0] >> 16) & 0xf) {
1835 case 0x0:
1836 dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1837 break;
1838 case 0x1:
1839 /* need to figure out how const space is partitioned between
1840 * attributes, textures, etc..
1841 */
1842 if (val < 0x78) {
1843 dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1844 } else {
1845 dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1846 }
1847 break;
1848 case 0x2:
1849 printf("%sset bool const %04x\n", levels[level], val);
1850 break;
1851 case 0x3:
1852 printf("%sset loop const %04x\n", levels[level], val);
1853 break;
1854 case 0x4:
1855 val += 0x2000;
1856 if (dwords[0] & 0x80000000) {
1857 uint32_t srcreg = dwords[1];
1858 uint32_t dstval = dwords[2];
1859
1860 /* TODO: not sure what happens w/ payload != 2.. */
1861 assert(sizedwords == 3);
1862 assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1863
1864 /* note: rnn_regname uses a static buf so we can't do
1865 * two regname() calls for one printf..
1866 */
1867 printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1868 printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1869
1870 dstval += type0_reg_vals[srcreg];
1871
1872 dump_registers(val, &dstval, 1, level + 1);
1873 } else {
1874 dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1875 }
1876 break;
1877 }
1878 }
1879
1880 static void dump_register_summary(int level);
1881
1882 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1883 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1884 {
1885 const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0] & 0xff);
1886 printl(2, "%sevent %s\n", levels[level], name);
1887
1888 if (name && (options->info->chip > 5)) {
1889 char eventname[64];
1890 snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1891 if (!strcmp(name, "BLIT") || !strcmp(name, "LRZ_CLEAR")) {
1892 do_query(eventname, 0);
1893 print_mode(level);
1894 dump_register_summary(level);
1895 }
1896 }
1897 }
1898
1899 static void
dump_register_summary(int level)1900 dump_register_summary(int level)
1901 {
1902 uint32_t i;
1903 bool saved_summary = summary;
1904 summary = false;
1905
1906 in_summary = true;
1907
1908 struct regacc r = regacc(NULL);
1909
1910 /* dump current state of registers: */
1911 printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1912
1913 bool changed = false;
1914 bool written = false;
1915
1916 for (i = 0; i < regcnt(); i++) {
1917 uint32_t regbase = i;
1918 uint32_t lastval = reg_val(regbase);
1919 /* skip registers that haven't been updated since last draw/blit: */
1920 if (!(options->allregs || reg_rewritten(regbase)))
1921 continue;
1922 if (!reg_written(regbase))
1923 continue;
1924 if (lastval != lastvals[regbase]) {
1925 changed |= true;
1926 lastvals[regbase] = lastval;
1927 }
1928 if (reg_rewritten(regbase)) {
1929 written |= true;
1930 }
1931 if (!quiet(2)) {
1932 if (regacc_push(&r, regbase, lastval)) {
1933 if (changed) {
1934 printl(2, "!");
1935 } else {
1936 printl(2, " ");
1937 }
1938 if (written) {
1939 printl(2, "+");
1940 } else {
1941 printl(2, " ");
1942 }
1943 printl(2, "\t%08"PRIx64, r.value);
1944 dump_register(&r, level);
1945
1946 changed = written = false;
1947 }
1948 }
1949 }
1950
1951 clear_rewritten();
1952
1953 in_summary = false;
1954
1955 draw_count++;
1956 summary = saved_summary;
1957 }
1958
1959 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1960 draw_indx_common(uint32_t *dwords, int level)
1961 {
1962 uint32_t prim_type = dwords[1] & 0x1f;
1963 uint32_t source_select = (dwords[1] >> 6) & 0x3;
1964 uint32_t num_indices = dwords[2];
1965 const char *primtype;
1966
1967 primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1968
1969 do_query(primtype, num_indices);
1970
1971 printl(2, "%sdraw: %d\n", levels[level], draws[ib]);
1972 printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);
1973 printl(2, "%ssource_select: %s (%d)\n", levels[level],
1974 rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1975 printl(2, "%snum_indices: %d\n", levels[level], num_indices);
1976
1977 vertices += num_indices;
1978
1979 draws[ib]++;
1980
1981 return num_indices;
1982 }
1983
1984 enum pc_di_index_size {
1985 INDEX_SIZE_IGN = 0,
1986 INDEX_SIZE_16_BIT = 0,
1987 INDEX_SIZE_32_BIT = 1,
1988 INDEX_SIZE_8_BIT = 2,
1989 INDEX_SIZE_INVALID = 0,
1990 };
1991
1992 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1993 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1994 {
1995 uint32_t num_indices = draw_indx_common(dwords, level);
1996
1997 assert(!is_64b());
1998
1999 /* if we have an index buffer, dump that: */
2000 if (sizedwords == 5) {
2001 void *ptr = hostptr(dwords[3]);
2002 printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);
2003 printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);
2004 if (ptr) {
2005 enum pc_di_index_size size =
2006 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2007 if (!quiet(2)) {
2008 int i;
2009 printf("%sidxs: ", levels[level]);
2010 if (size == INDEX_SIZE_8_BIT) {
2011 uint8_t *idx = ptr;
2012 for (i = 0; i < dwords[4]; i++)
2013 printf(" %u", idx[i]);
2014 } else if (size == INDEX_SIZE_16_BIT) {
2015 uint16_t *idx = ptr;
2016 for (i = 0; i < dwords[4] / 2; i++)
2017 printf(" %u", idx[i]);
2018 } else if (size == INDEX_SIZE_32_BIT) {
2019 uint32_t *idx = ptr;
2020 for (i = 0; i < dwords[4] / 4; i++)
2021 printf(" %u", idx[i]);
2022 }
2023 printf("\n");
2024 dump_hex(ptr, dwords[4] / 4, level + 1);
2025 }
2026 }
2027 }
2028
2029 /* don't bother dumping registers for the dummy draw_indx's.. */
2030 if (num_indices > 0)
2031 dump_register_summary(level);
2032
2033 needs_wfi = true;
2034 }
2035
2036 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2037 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2038 {
2039 uint32_t num_indices = draw_indx_common(dwords, level);
2040 enum pc_di_index_size size =
2041 ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2042 void *ptr = &dwords[3];
2043 int sz = 0;
2044
2045 assert(!is_64b());
2046
2047 /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2048 if (!quiet(2)) {
2049 int i;
2050 printf("%sidxs: ", levels[level]);
2051 if (size == INDEX_SIZE_8_BIT) {
2052 uint8_t *idx = ptr;
2053 for (i = 0; i < num_indices; i++)
2054 printf(" %u", idx[i]);
2055 sz = num_indices;
2056 } else if (size == INDEX_SIZE_16_BIT) {
2057 uint16_t *idx = ptr;
2058 for (i = 0; i < num_indices; i++)
2059 printf(" %u", idx[i]);
2060 sz = num_indices * 2;
2061 } else if (size == INDEX_SIZE_32_BIT) {
2062 uint32_t *idx = ptr;
2063 for (i = 0; i < num_indices; i++)
2064 printf(" %u", idx[i]);
2065 sz = num_indices * 4;
2066 }
2067 printf("\n");
2068 dump_hex(ptr, sz / 4, level + 1);
2069 }
2070
2071 /* don't bother dumping registers for the dummy draw_indx's.. */
2072 if (num_indices > 0)
2073 dump_register_summary(level);
2074 }
2075
2076 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2077 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2078 {
2079 uint32_t num_indices = dwords[2];
2080 uint32_t prim_type = dwords[0] & 0x1f;
2081
2082 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2083 print_mode(level);
2084
2085 /* don't bother dumping registers for the dummy draw_indx's.. */
2086 if (num_indices > 0)
2087 dump_register_summary(level);
2088 }
2089
2090 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2091 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2092 {
2093 uint32_t prim_type = dwords[0] & 0x1f;
2094 uint64_t addr;
2095
2096 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2097 print_mode(level);
2098
2099 if (is_64b())
2100 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2101 else
2102 addr = dwords[1];
2103 dump_gpuaddr_size(addr, level, 0x10, 2);
2104
2105 if (is_64b())
2106 addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2107 else
2108 addr = dwords[3];
2109 dump_gpuaddr_size(addr, level, 0x10, 2);
2110
2111 dump_register_summary(level);
2112 }
2113
2114 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2115 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2116 {
2117 uint32_t prim_type = dwords[0] & 0x1f;
2118 uint64_t addr;
2119
2120 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2121 print_mode(level);
2122
2123 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2124 dump_gpuaddr_size(addr, level, 0x10, 2);
2125
2126 dump_register_summary(level);
2127 }
2128
2129 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2130 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2131 {
2132 uint32_t prim_type = dwords[0] & 0x1f;
2133 uint32_t count = dwords[2];
2134
2135 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2136 print_mode(level);
2137
2138 struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2139 uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2140 uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2141 uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2142
2143 if (count_dword) {
2144 uint64_t count_addr =
2145 ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2146 uint32_t *buf = hostptr(count_addr);
2147
2148 /* Don't print more draws than this if we don't know the indirect
2149 * count. It's possible the user will give ~0 or some other large
2150 * value, expecting the GPU to fill in the draw count, and we don't
2151 * want to print a gazillion draws in that case:
2152 */
2153 const uint32_t max_draw_count = 0x100;
2154
2155 /* Assume the indirect count is garbage if it's larger than this
2156 * (quite large) value or 0. Hopefully this catches most cases.
2157 */
2158 const uint32_t max_indirect_draw_count = 0x10000;
2159
2160 if (buf) {
2161 printf("%sindirect count: %u\n", levels[level], *buf);
2162 if (*buf == 0 || *buf > max_indirect_draw_count) {
2163 /* garbage value */
2164 count = MIN2(count, max_draw_count);
2165 } else {
2166 /* not garbage */
2167 count = MIN2(count, *buf);
2168 }
2169 } else {
2170 count = MIN2(count, max_draw_count);
2171 }
2172 }
2173
2174 if (addr_dword && stride_dword) {
2175 uint64_t addr =
2176 ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2177 uint32_t stride = dwords[stride_dword];
2178
2179 for (unsigned i = 0; i < count; i++, addr += stride) {
2180 printf("%sdraw %d:\n", levels[level], i);
2181 dump_gpuaddr_size(addr, level, 0x10, 2);
2182 }
2183 }
2184
2185 dump_register_summary(level);
2186 }
2187
2188 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2189 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2190 {
2191 uint32_t prim_type = dwords[0] & 0x1f;
2192
2193 do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2194 print_mode(level);
2195
2196 dump_register_summary(level);
2197 }
2198
2199 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2200 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2201 {
2202 do_query("COMPUTE", 1);
2203 dump_register_summary(level);
2204 }
2205
2206 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2207 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2208 {
2209 const char *buf = (void *)dwords;
2210 for (int i = 0; i < 4 * sizedwords; i++) {
2211 if (buf[i] == '\0')
2212 break;
2213 if (isascii(buf[i]))
2214 printf("%c", buf[i]);
2215 }
2216 }
2217
2218 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2219 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2220 {
2221 if (quiet(3))
2222 return;
2223
2224 /* NOP is used to encode special debug strings by Turnip.
2225 * See tu_cs_emit_debug_magic_strv(...)
2226 */
2227 static int scope_level = 0;
2228 uint32_t identifier = dwords[0];
2229 bool is_special = false;
2230 if (identifier == CP_NOP_MESG) {
2231 printf("### ");
2232 is_special = true;
2233 } else if (identifier == CP_NOP_BEGN) {
2234 printf(">>> #%d: ", ++scope_level);
2235 is_special = true;
2236 } else if (identifier == CP_NOP_END) {
2237 printf("<<< #%d: ", scope_level--);
2238 is_special = true;
2239 }
2240
2241 if (is_special) {
2242 if (sizedwords > 1) {
2243 print_nop_tail_string(dwords + 1, sizedwords - 1);
2244 printf("\n");
2245 }
2246 return;
2247 }
2248
2249 // blob doesn't use CP_NOP for string_marker but it does
2250 // use it for things that end up looking like, but aren't
2251 // ascii chars:
2252 if (!options->decode_markers)
2253 return;
2254
2255 print_nop_tail_string(dwords, sizedwords);
2256 printf("\n");
2257 }
2258
2259 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2260 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2261 uint64_t *ibaddr, uint32_t *ibsize)
2262 {
2263 if (is_64b()) {
2264 assert(sizedwords == 3);
2265
2266 /* a5xx+.. high 32b of gpu addr, then size: */
2267 *ibaddr = dwords[0];
2268 *ibaddr |= ((uint64_t)dwords[1]) << 32;
2269 *ibsize = dwords[2];
2270
2271 return dwords + 3;
2272 } else {
2273 assert(sizedwords == 2);
2274
2275 *ibaddr = dwords[0];
2276 *ibsize = dwords[1];
2277
2278 return dwords + 2;
2279 }
2280 }
2281
2282 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2283 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2284 {
2285 /* traverse indirect buffers */
2286 uint64_t ibaddr;
2287 uint32_t ibsize;
2288 uint32_t *ptr = NULL;
2289
2290 dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2291
2292 if (!quiet(3)) {
2293 if (is_64b()) {
2294 printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2295 } else {
2296 printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2297 }
2298 printf("%sibsize:%08x\n", levels[level], ibsize);
2299 }
2300
2301 if (options->once && has_dumped(ibaddr, enable_mask))
2302 return;
2303
2304 /* 'query-compare' mode implies 'once' mode, although we need only to
2305 * process the cmdstream for *any* enable_mask mode, since we are
2306 * comparing binning vs draw reg values at the same time, ie. it is
2307 * not useful to process the same draw in both binning and draw pass.
2308 */
2309 if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2310 return;
2311
2312 /* map gpuaddr back to hostptr: */
2313 ptr = hostptr(ibaddr);
2314
2315 if (ptr) {
2316 /* If the GPU hung within the target IB, the trigger point will be
2317 * just after the current CP_INDIRECT_BUFFER. Because the IB is
2318 * executed but never returns. Account for this by checking if
2319 * the IB returned:
2320 */
2321 highlight_gpuaddr(gpuaddr(dwords));
2322
2323 ib++;
2324 ibs[ib].base = ibaddr;
2325 ibs[ib].size = ibsize;
2326
2327 dump_commands(ptr, ibsize, level);
2328 ib--;
2329 } else {
2330 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2331 }
2332 }
2333
2334 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2335 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2336 {
2337 uint64_t ibaddr;
2338 uint32_t ibsize;
2339 uint32_t loopcount;
2340 uint32_t *ptr = NULL;
2341
2342 loopcount = dwords[0];
2343 ibaddr = dwords[1];
2344 ibaddr |= ((uint64_t)dwords[2]) << 32;
2345 ibsize = dwords[3];
2346
2347 /* map gpuaddr back to hostptr: */
2348 ptr = hostptr(ibaddr);
2349
2350 if (ptr) {
2351 /* If the GPU hung within the target IB, the trigger point will be
2352 * just after the current CP_START_BIN. Because the IB is
2353 * executed but never returns. Account for this by checking if
2354 * the IB returned:
2355 */
2356 highlight_gpuaddr(gpuaddr(&dwords[5]));
2357
2358 /* TODO: we should duplicate the body of the loop after each bin, so
2359 * that draws get the correct state. We should also figure out if there
2360 * are any registers that can tell us what bin we're in when we hang so
2361 * that crashdec points to the right place.
2362 */
2363 ib++;
2364 for (uint32_t i = 0; i < loopcount; i++) {
2365 ibs[ib].base = ibaddr;
2366 ibs[ib].size = ibsize;
2367 printl(3, "%sbin %u\n", levels[level], i);
2368 dump_commands(ptr, ibsize, level);
2369 ibaddr += ibsize;
2370 ptr += ibsize;
2371 }
2372 ib--;
2373 } else {
2374 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2375 }
2376 }
2377
2378 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2379 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2380 {
2381 uint64_t ibaddr;
2382 uint32_t ibsize;
2383 uint32_t loopcount;
2384 uint32_t *ptr = NULL;
2385
2386 loopcount = dwords[3];
2387 ibaddr = dwords[0];
2388 ibaddr |= ((uint64_t)dwords[1]) << 32;
2389 ibsize = dwords[2] >> 20;
2390
2391 /* map gpuaddr back to hostptr: */
2392 ptr = hostptr(ibaddr);
2393
2394 if (ptr) {
2395 /* If the GPU hung within the target IB, the trigger point will be
2396 * just after the current CP_START_BIN. Because the IB is
2397 * executed but never returns. Account for this by checking if
2398 * the IB returned:
2399 */
2400 highlight_gpuaddr(gpuaddr(&dwords[5]));
2401
2402 ib++;
2403 for (uint32_t i = 0; i < loopcount; i++) {
2404 ibs[ib].base = ibaddr;
2405 ibs[ib].size = ibsize;
2406 printl(3, "%sdraw %u\n", levels[level], i);
2407 dump_commands(ptr, ibsize, level);
2408 ibaddr += ibsize;
2409 ptr += ibsize;
2410 }
2411 ib--;
2412 } else {
2413 fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2414 }
2415 }
2416
2417 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2418 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2419 {
2420 needs_wfi = false;
2421 }
2422
2423 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2424 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2425 {
2426 if (quiet(2))
2427 return;
2428
2429 if (is_64b()) {
2430 uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2431 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2432 dump_hex(&dwords[2], sizedwords - 2, level + 1);
2433
2434 if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2435 dump_commands(&dwords[2], sizedwords - 2, level + 1);
2436 } else {
2437 uint32_t gpuaddr = dwords[0];
2438 printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2439 dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2440 }
2441 }
2442
2443 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2444 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2445 {
2446 uint32_t val = dwords[0] & 0xffff;
2447 uint32_t and = dwords[1];
2448 uint32_t or = dwords[2];
2449 printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2450 and, or);
2451 if (needs_wfi)
2452 printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2453 and, or);
2454 reg_set(val, (reg_val(val) & and) | or);
2455 }
2456
2457 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2458 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2459 {
2460 uint32_t val = dwords[0] & 0xffff;
2461 printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2462
2463 if (quiet(2))
2464 return;
2465
2466 uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2467 printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2468 void *ptr = hostptr(gpuaddr);
2469 if (ptr) {
2470 uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2471 dump_hex(ptr, cnt, level + 1);
2472 }
2473 }
2474
2475 struct draw_state {
2476 uint16_t enable_mask;
2477 uint16_t flags;
2478 uint32_t count;
2479 uint64_t addr;
2480 };
2481
2482 struct draw_state state[32];
2483
2484 #define FLAG_DIRTY 0x1
2485 #define FLAG_DISABLE 0x2
2486 #define FLAG_DISABLE_ALL_GROUPS 0x4
2487 #define FLAG_LOAD_IMMED 0x8
2488
2489 static int draw_mode;
2490
2491 static void
disable_group(unsigned group_id)2492 disable_group(unsigned group_id)
2493 {
2494 struct draw_state *ds = &state[group_id];
2495 memset(ds, 0, sizeof(*ds));
2496 }
2497
2498 static void
disable_all_groups(void)2499 disable_all_groups(void)
2500 {
2501 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2502 disable_group(i);
2503 }
2504
2505 static void
load_group(unsigned group_id,int level)2506 load_group(unsigned group_id, int level)
2507 {
2508 struct draw_state *ds = &state[group_id];
2509
2510 if (!ds->count)
2511 return;
2512
2513 printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2514 printl(2, "%scount: %d\n", levels[level], ds->count);
2515 printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2516 printl(2, "%sflags: %x\n", levels[level], ds->flags);
2517
2518 if (options->info->chip >= 6) {
2519 printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2520
2521 if (!(ds->enable_mask & enable_mask)) {
2522 printl(2, "%s\tskipped!\n\n", levels[level]);
2523 return;
2524 }
2525 }
2526
2527 void *ptr = hostptr(ds->addr);
2528 if (ptr) {
2529 if (!quiet(2))
2530 dump_hex(ptr, ds->count, level + 1);
2531
2532 ib++;
2533 dump_commands(ptr, ds->count, level + 1);
2534 ib--;
2535 }
2536 }
2537
2538 static void
load_all_groups(int level)2539 load_all_groups(int level)
2540 {
2541 /* sanity check, we should never recursively hit recursion here, and if
2542 * we do bad things happen:
2543 */
2544 static bool loading_groups = false;
2545 if (loading_groups) {
2546 printf("ERROR: nothing in draw state should trigger recursively loading "
2547 "groups!\n");
2548 return;
2549 }
2550 loading_groups = true;
2551 for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2552 load_group(i, level);
2553 loading_groups = false;
2554
2555 /* in 'query-compare' mode, defer disabling all groups until we have a
2556 * chance to process the query:
2557 */
2558 if (!options->query_compare)
2559 disable_all_groups();
2560 }
2561
2562 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2563 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2564 {
2565 uint32_t i;
2566
2567 for (i = 0; i < sizedwords;) {
2568 struct draw_state *ds;
2569 uint32_t count = dwords[i] & 0xffff;
2570 uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2571 uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2572 uint32_t flags = (dwords[i] >> 16) & 0xf;
2573 uint64_t addr;
2574
2575 if (is_64b()) {
2576 addr = dwords[i + 1];
2577 addr |= ((uint64_t)dwords[i + 2]) << 32;
2578 i += 3;
2579 } else {
2580 addr = dwords[i + 1];
2581 i += 2;
2582 }
2583
2584 if (flags & FLAG_DISABLE_ALL_GROUPS) {
2585 disable_all_groups();
2586 continue;
2587 }
2588
2589 if (flags & FLAG_DISABLE) {
2590 disable_group(group_id);
2591 continue;
2592 }
2593
2594 assert(group_id < ARRAY_SIZE(state));
2595 disable_group(group_id);
2596
2597 ds = &state[group_id];
2598
2599 ds->enable_mask = enable_mask;
2600 ds->flags = flags;
2601 ds->count = count;
2602 ds->addr = addr;
2603
2604 if (flags & FLAG_LOAD_IMMED) {
2605 load_group(group_id, level);
2606 disable_group(group_id);
2607 }
2608 }
2609 }
2610
2611 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2612 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2613 {
2614 draw_mode = dwords[0];
2615 }
2616
2617 /* execute compute shader */
2618 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2619 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2620 {
2621 do_query("compute", 0);
2622 dump_register_summary(level);
2623 }
2624
2625 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2626 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2627 {
2628 uint64_t addr;
2629
2630 if (is_64b()) {
2631 addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2632 } else {
2633 addr = dwords[1];
2634 }
2635
2636 printl(3, "%saddr: %016llx\n", levels[level], addr);
2637 dump_gpuaddr_size(addr, level, 0x10, 2);
2638
2639 do_query("compute", 0);
2640 dump_register_summary(level);
2641 }
2642
2643 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2644 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2645 {
2646 uint32_t val = dwords[0] & 0xf;
2647 const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2648
2649 if (!mode) {
2650 static char buf[8];
2651 sprintf(buf, "0x%x", val);
2652 render_mode = buf;
2653 return;
2654 }
2655
2656 render_mode = mode;
2657
2658 if (!strcmp(render_mode, "RM6_BINNING")) {
2659 enable_mask = MODE_BINNING;
2660 } else if (!strcmp(render_mode, "RM6_GMEM")) {
2661 enable_mask = MODE_GMEM;
2662 } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2663 enable_mask = MODE_BYPASS;
2664 }
2665 }
2666
2667 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2668 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2669 {
2670 uint32_t val = dwords[0] & 0x3;
2671 thread = rnn_enumname(rnn, "cp_thread", val);
2672 }
2673
2674 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2675 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2676 {
2677 uint64_t addr;
2678 uint32_t *ptr, len;
2679
2680 assert(is_64b());
2681
2682 /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2683 * not sure if this can come in different sizes.
2684 *
2685 * First ptr doesn't seem to be cmdstream, second one does.
2686 *
2687 * Comment from downstream kernel:
2688 *
2689 * SRM -- set render mode (ex binning, direct render etc)
2690 * SRM is set by UMD usually at start of IB to tell CP the type of
2691 * preemption.
2692 * KMD needs to set SRM to NULL to indicate CP that rendering is
2693 * done by IB.
2694 * ------------------------------------------------------------------
2695 *
2696 * Seems to always be one of these two:
2697 * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2698 * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2699 * 001c2000 00000000
2700 *
2701 */
2702
2703 assert(options->info->chip >= 5);
2704
2705 render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2706
2707 if (sizedwords == 1)
2708 return;
2709
2710 addr = dwords[1];
2711 addr |= ((uint64_t)dwords[2]) << 32;
2712
2713 mode = dwords[3];
2714
2715 dump_gpuaddr(addr, level + 1);
2716
2717 if (sizedwords == 5)
2718 return;
2719
2720 assert(sizedwords == 8);
2721
2722 len = dwords[5];
2723 addr = dwords[6];
2724 addr |= ((uint64_t)dwords[7]) << 32;
2725
2726 printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2727 printl(3, "%slen: 0x%x\n", levels[level], len);
2728
2729 ptr = hostptr(addr);
2730
2731 if (ptr) {
2732 if (!quiet(2)) {
2733 ib++;
2734 dump_commands(ptr, len, level + 1);
2735 ib--;
2736 dump_hex(ptr, len, level + 1);
2737 }
2738 }
2739 }
2740
2741 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2742 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2743 {
2744 uint64_t addr;
2745 uint32_t *ptr, len;
2746
2747 assert(is_64b());
2748 assert(options->info->chip >= 5);
2749
2750 if (sizedwords == 8) {
2751 addr = dwords[5];
2752 addr |= ((uint64_t)dwords[6]) << 32;
2753 len = dwords[7];
2754 } else {
2755 addr = dwords[5];
2756 addr |= ((uint64_t)dwords[6]) << 32;
2757 len = dwords[4];
2758 }
2759
2760 printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2761 printl(3, "%slen: 0x%x\n", levels[level], len);
2762
2763 ptr = hostptr(addr);
2764
2765 if (ptr) {
2766 if (!quiet(2)) {
2767 ib++;
2768 dump_commands(ptr, len, level + 1);
2769 ib--;
2770 dump_hex(ptr, len, level + 1);
2771 }
2772 }
2773 }
2774
2775 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2776 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2777 {
2778 do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2779 print_mode(level);
2780 dump_register_summary(level);
2781 }
2782
2783 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2784 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2785 {
2786 int i;
2787
2788 /* NOTE: seems to write same reg multiple times.. not sure if different parts
2789 * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2790 * actually are?)
2791 */
2792 bool saved_summary = summary;
2793 summary = false;
2794
2795 struct regacc r = regacc(NULL);
2796
2797 for (i = 0; i < sizedwords; i += 2) {
2798 if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2799 dump_register(&r, level + 1);
2800 reg_set(dwords[i + 0], dwords[i + 1]);
2801 }
2802
2803 summary = saved_summary;
2804 }
2805
2806 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2807 * discarding first two dwords??
2808 *
2809 * CP_CONTEXT_REG_BUNCH:
2810 * 0221: 9c1ff606 (rep)(xmov3)mov $usraddr, $data
2811 * ; mov $data, $data
2812 * ; mov $usraddr, $data
2813 * ; mov $data, $data
2814 * 0222: d8000000 waitin
2815 * 0223: 981f0806 mov $01, $data
2816 *
2817 * CP_UNK5D:
2818 * 0224: 981f0006 mov $00, $data
2819 * 0225: 981f0006 mov $00, $data
2820 * 0226: 9c1ff206 (rep)(xmov1)mov $usraddr, $data
2821 * ; mov $data, $data
2822 * 0227: d8000000 waitin
2823 * 0228: 981f0806 mov $01, $data
2824 *
2825 */
2826 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2827 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2828 {
2829 dwords += 2;
2830 sizedwords -= 2;
2831 cp_context_reg_bunch(dwords, sizedwords, level);
2832 }
2833
2834 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2835 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2836 {
2837 uint32_t reg = dwords[1] & 0xffff;
2838
2839 struct regacc r = regacc(NULL);
2840 if (regacc_push(&r, reg, dwords[2]))
2841 dump_register(&r, level + 1);
2842 reg_set(reg, dwords[2]);
2843 }
2844
2845 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2846 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2847 {
2848 uint64_t addr;
2849 uint32_t size = dwords[2] & 0xffff;
2850 void *ptr;
2851
2852 addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2853
2854 if (!quiet(3)) {
2855 printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2856 }
2857
2858 ptr = hostptr(addr);
2859 if (ptr) {
2860 dump_commands(ptr, size, level + 1);
2861 }
2862 }
2863
2864 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2865 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2866 {
2867 skip_ib2_enable_global = dwords[0];
2868 }
2869
2870 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2871 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2872 {
2873 skip_ib2_enable_local = dwords[0];
2874 }
2875
2876 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2877 static const struct type3_op {
2878 const char *name;
2879 void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2880 struct {
2881 bool load_all_groups;
2882 } options;
2883 } type3_op[] = {
2884 CP(NOP, cp_nop),
2885 CP(INDIRECT_BUFFER, cp_indirect),
2886 CP(INDIRECT_BUFFER_PFD, cp_indirect),
2887 CP(WAIT_FOR_IDLE, cp_wfi),
2888 CP(REG_RMW, cp_rmw),
2889 CP(REG_TO_MEM, cp_reg_mem),
2890 CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2891 CP(MEM_WRITE, cp_mem_write),
2892 CP(EVENT_WRITE, cp_event_write),
2893 CP(RUN_OPENCL, cp_run_cl),
2894 CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2895 CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2896 CP(SET_CONSTANT, cp_set_const),
2897 CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2898 CP(WIDE_REG_WRITE, cp_wide_reg_write),
2899
2900 /* for a3xx */
2901 CP(LOAD_STATE, cp_load_state),
2902 CP(SET_BIN, cp_set_bin),
2903
2904 /* for a4xx */
2905 CP(LOAD_STATE4, cp_load_state),
2906 CP(SET_DRAW_STATE, cp_set_draw_state),
2907 CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2908 CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2909 CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2910
2911 /* for a5xx */
2912 CP(SET_RENDER_MODE, cp_set_render_mode),
2913 CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2914 CP(BLIT, cp_blit),
2915 CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2916 CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2917 CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2918 CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2919 CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2920 CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2921
2922 /* for a6xx */
2923 CP(LOAD_STATE6_GEOM, cp_load_state),
2924 CP(LOAD_STATE6_FRAG, cp_load_state),
2925 CP(LOAD_STATE6, cp_load_state),
2926 CP(SET_MODE, cp_set_mode),
2927 CP(SET_MARKER, cp_set_marker),
2928 CP(REG_WRITE, cp_reg_write),
2929 CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
2930
2931 CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2932
2933 CP(START_BIN, cp_start_bin),
2934
2935 CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
2936
2937 /* for a7xx */
2938 CP(THREAD_CONTROL, cp_set_thread_control),
2939 CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
2940 CP(EVENT_WRITE7, cp_event_write),
2941 };
2942
2943 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2944 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2945 {
2946 }
2947
2948 static const struct type3_op *
get_type3_op(unsigned opc)2949 get_type3_op(unsigned opc)
2950 {
2951 static const struct type3_op dummy_op = {
2952 .fxn = noop_fxn,
2953 };
2954 const char *name = pktname(opc);
2955
2956 if (!name)
2957 return &dummy_op;
2958
2959 for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2960 if (!strcmp(name, type3_op[i].name))
2961 return &type3_op[i];
2962
2963 return &dummy_op;
2964 }
2965
2966 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2967 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2968 {
2969 int dwords_left = sizedwords;
2970 uint32_t count = 0; /* dword count including packet header */
2971 uint32_t val;
2972
2973 // assert(dwords);
2974 if (!dwords) {
2975 printf("NULL cmd buffer!\n");
2976 return;
2977 }
2978
2979 assert(ib < ARRAY_SIZE(draws));
2980 draws[ib] = 0;
2981
2982 while (dwords_left > 0) {
2983
2984 current_draw_count = draw_count;
2985
2986 /* hack, this looks like a -1 underflow, in some versions
2987 * when it tries to write zero registers via pkt0
2988 */
2989 // if ((dwords[0] >> 16) == 0xffff)
2990 // goto skip;
2991
2992 if (pkt_is_regwrite(dwords[0], &val, &count)) {
2993 assert(val < regcnt());
2994 printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2995 val);
2996 dump_registers(val, dwords + 1, count - 1, level + 2);
2997 if (!quiet(3))
2998 dump_hex(dwords, count, level + 1);
2999 #if 0
3000 } else if (pkt_is_type1(dwords[0])) {
3001 count = 3;
3002 val = dwords[0] & 0xfff;
3003 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3004 dump_registers(val, dwords+1, 1, level+2);
3005 val = (dwords[0] >> 12) & 0xfff;
3006 printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3007 dump_registers(val, dwords+2, 1, level+2);
3008 if (!quiet(3))
3009 dump_hex(dwords, count, level+1);
3010 #endif
3011 } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3012 const struct type3_op *op = get_type3_op(val);
3013 if (op->options.load_all_groups)
3014 load_all_groups(level + 1);
3015 const char *name = pktname(val);
3016 if (!quiet(2)) {
3017 printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3018 rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3019 count);
3020 }
3021 if (name) {
3022 /* special hack for two packets that decode the same way
3023 * on a6xx:
3024 */
3025 if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3026 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3027 name = "CP_LOAD_STATE6";
3028 dump_domain(dwords + 1, count - 1, level + 2, name);
3029 }
3030 op->fxn(dwords + 1, count - 1, level + 1);
3031 if (!quiet(2))
3032 dump_hex(dwords, count, level + 1);
3033 } else if (pkt_is_type2(dwords[0])) {
3034 printl(3, "%snop\n", levels[level + 1]);
3035 count = 1;
3036 } else {
3037 printf("bad type! %08x\n", dwords[0]);
3038 /* for 5xx+ we can do a passable job of looking for start of next valid
3039 * packet: */
3040 if (options->info->chip >= 5) {
3041 count = find_next_packet(dwords, dwords_left);
3042 } else {
3043 return;
3044 }
3045 }
3046
3047 dwords += count;
3048 dwords_left -= count;
3049 }
3050
3051 if (dwords_left < 0)
3052 printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3053 }
3054