xref: /aosp_15_r20/external/mesa3d/src/freedreno/decode/cffdec.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2012 Rob Clark <[email protected]>
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include <assert.h>
7 #include <ctype.h>
8 #include <err.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <inttypes.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdbool.h>
15 #include <stdint.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <sys/wait.h>
23 
24 #include "freedreno_pm4.h"
25 
26 #include "buffers.h"
27 #include "cffdec.h"
28 #include "disasm.h"
29 #include "redump.h"
30 #include "rnnutil.h"
31 #include "script.h"
32 
33 /* ************************************************************************* */
34 /* originally based on kernel recovery dump code: */
35 
36 static const struct cffdec_options *options;
37 
38 static bool needs_wfi = false;
39 static bool summary = false;
40 static bool in_summary = false;
41 static int vertices;
42 
43 static inline unsigned
regcnt(void)44 regcnt(void)
45 {
46    if (options->info->chip >= 5)
47       return 0x3ffff;
48    else
49       return 0x7fff;
50 }
51 
52 static int
is_64b(void)53 is_64b(void)
54 {
55    return options->info->chip >= 5;
56 }
57 
58 static int draws[4];
59 static struct {
60    uint64_t base;
61    uint32_t size; /* in dwords */
62    /* Generally cmdstream consists of multiple IB calls to different
63     * buffers, which are themselves often re-used for each tile.  The
64     * triggered flag serves two purposes to help make it more clear
65     * what part of the cmdstream is before vs after the the GPU hang:
66     *
67     * 1) if in IB2 we are passed the point within the IB2 buffer where
68     *    the GPU hung, but IB1 is not passed the point within its
69     *    buffer where the GPU had hung, then we know the GPU hang
70     *    happens on a future use of that IB2 buffer.
71     *
72     * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
73     *    hung, but we've already passed the trigger point at the same
74     *    IB level, we know that we are passed the point where the GPU
75     *    had hung.
76     *
77     * So this is a one way switch, false->true.  And a higher #'d
78     * IB level isn't considered triggered unless the lower #'d IB
79     * level is.
80     */
81    bool triggered : 1;
82    bool base_seen : 1;
83 } ibs[4];
84 static int ib;
85 
86 static int draw_count;
87 static int current_draw_count;
88 
89 /* query mode.. to handle symbolic register name queries, we need to
90  * defer parsing query string until after gpu_id is know and rnn db
91  * loaded:
92  */
93 static int *queryvals;
94 
95 static bool
quiet(int lvl)96 quiet(int lvl)
97 {
98    if ((options->draw_filter != -1) &&
99        (options->draw_filter != current_draw_count))
100       return true;
101    if ((lvl >= 3) && (summary || options->querystrs || options->script))
102       return true;
103    if ((lvl >= 2) && (options->querystrs || options->script))
104       return true;
105    return false;
106 }
107 
108 void
printl(int lvl,const char * fmt,...)109 printl(int lvl, const char *fmt, ...)
110 {
111    va_list args;
112    if (quiet(lvl))
113       return;
114    va_start(args, fmt);
115    vprintf(fmt, args);
116    va_end(args);
117 }
118 
119 static const char *levels[] = {
120    "\t",
121    "\t\t",
122    "\t\t\t",
123    "\t\t\t\t",
124    "\t\t\t\t\t",
125    "\t\t\t\t\t\t",
126    "\t\t\t\t\t\t\t",
127    "\t\t\t\t\t\t\t\t",
128    "\t\t\t\t\t\t\t\t\t",
129    "x",
130    "x",
131    "x",
132    "x",
133    "x",
134    "x",
135 };
136 
137 enum state_src_t {
138    STATE_SRC_DIRECT,
139    STATE_SRC_INDIRECT,
140    STATE_SRC_BINDLESS,
141 };
142 
143 /* SDS (CP_SET_DRAW_STATE) helpers: */
144 static void load_all_groups(int level);
145 static void disable_all_groups(void);
146 
147 static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
148                           int level);
149 static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
150 
151 static bool
highlight_gpuaddr(uint64_t gpuaddr)152 highlight_gpuaddr(uint64_t gpuaddr)
153 {
154    if (!options->ibs[ib].base)
155       return false;
156 
157    if ((ib > 0) && options->ibs[ib - 1].base &&
158        !(ibs[ib - 1].triggered || ibs[ib - 1].base_seen))
159       return false;
160 
161    if (ibs[ib].base_seen)
162       return false;
163 
164    if (ibs[ib].triggered)
165       return options->color;
166 
167    if (options->ibs[ib].base != ibs[ib].base)
168       return false;
169 
170    uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
171    uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
172 
173    bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
174 
175    if (triggered && (ib < 2) && options->ibs[ib + 1].crash_found) {
176       ibs[ib].base_seen = true;
177       return false;
178    }
179 
180    ibs[ib].triggered |= triggered;
181 
182    if (triggered)
183       printf("ESTIMATED CRASH LOCATION!\n");
184 
185    return triggered & options->color;
186 }
187 
188 static void
dump_hex(uint32_t * dwords,uint32_t sizedwords,int level)189 dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
190 {
191    int i, j;
192    int lastzero = 1;
193 
194    if (quiet(2))
195       return;
196 
197    bool highlight = highlight_gpuaddr(gpuaddr(dwords) + 4 * sizedwords - 1);
198 
199    for (i = 0; i < sizedwords; i += 8) {
200       int zero = 1;
201 
202       /* always show first row: */
203       if (i == 0)
204          zero = 0;
205 
206       for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
207          if (dwords[i + j])
208             zero = 0;
209 
210       if (zero && !lastzero)
211          printf("*\n");
212 
213       lastzero = zero;
214 
215       if (zero)
216          continue;
217 
218       uint64_t addr = gpuaddr(&dwords[i]);
219 
220       if (highlight)
221          printf("\x1b[0;1;31m");
222 
223       if (is_64b()) {
224          printf("%016" PRIx64 ":%s", addr, levels[level]);
225       } else {
226          printf("%08x:%s", (uint32_t)addr, levels[level]);
227       }
228 
229       if (highlight)
230          printf("\x1b[0m");
231 
232       printf("%04x:", i * 4);
233 
234       for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
235          printf(" %08x", dwords[i + j]);
236       }
237 
238       printf("\n");
239    }
240 }
241 
242 static void
dump_float(float * dwords,uint32_t sizedwords,int level)243 dump_float(float *dwords, uint32_t sizedwords, int level)
244 {
245    int i;
246    for (i = 0; i < sizedwords; i++) {
247       if ((i % 8) == 0) {
248          if (is_64b()) {
249             printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
250          } else {
251             printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
252          }
253       } else {
254          printf(" ");
255       }
256       printf("%8f", *(dwords++));
257       if ((i % 8) == 7)
258          printf("\n");
259    }
260    if (i % 8)
261       printf("\n");
262 }
263 
264 /* I believe the surface format is low bits:
265 #define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
266 comments in sys2gmem_tex_const indicate that address is [31:12], but
267 looks like at least some of the bits above the format have different meaning..
268 */
269 static void
parse_dword_addr(uint32_t dword,uint32_t * gpuaddr,uint32_t * flags,uint32_t mask)270 parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
271                  uint32_t mask)
272 {
273    assert(!is_64b()); /* this is only used on a2xx */
274    *gpuaddr = dword & ~mask;
275    *flags = dword & mask;
276 }
277 
278 static uint32_t type0_reg_vals[0x3ffff + 1];
279 static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
280                                    8]; /* written since last draw */
281 static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
282 static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
283 
284 static bool
reg_rewritten(uint32_t regbase)285 reg_rewritten(uint32_t regbase)
286 {
287    return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
288 }
289 
290 bool
reg_written(uint32_t regbase)291 reg_written(uint32_t regbase)
292 {
293    return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
294 }
295 
296 static void
clear_rewritten(void)297 clear_rewritten(void)
298 {
299    memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
300 }
301 
302 static void
clear_written(void)303 clear_written(void)
304 {
305    memset(type0_reg_written, 0, sizeof(type0_reg_written));
306    clear_rewritten();
307 }
308 
309 uint32_t
reg_lastval(uint32_t regbase)310 reg_lastval(uint32_t regbase)
311 {
312    return lastvals[regbase];
313 }
314 
315 static void
clear_lastvals(void)316 clear_lastvals(void)
317 {
318    memset(lastvals, 0, sizeof(lastvals));
319 }
320 
321 uint32_t
reg_val(uint32_t regbase)322 reg_val(uint32_t regbase)
323 {
324    return type0_reg_vals[regbase];
325 }
326 
327 void
reg_set(uint32_t regbase,uint32_t val)328 reg_set(uint32_t regbase, uint32_t val)
329 {
330    assert(regbase < regcnt());
331    type0_reg_vals[regbase] = val;
332    type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
333    type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
334 }
335 
336 static void
reg_dump_scratch(const char * name,uint32_t dword,int level)337 reg_dump_scratch(const char *name, uint32_t dword, int level)
338 {
339    unsigned r;
340 
341    if (quiet(3))
342       return;
343 
344    r = regbase("CP_SCRATCH[0].REG");
345 
346    // if not, try old a2xx/a3xx version:
347    if (!r)
348       r = regbase("CP_SCRATCH_REG0");
349 
350    if (!r)
351       return;
352 
353    printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
354           reg_val(r + 6), reg_val(r + 7));
355 }
356 
357 static void
dump_gpuaddr_size(uint64_t gpuaddr,int level,int sizedwords,int quietlvl)358 dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
359 {
360    void *buf;
361 
362    if (quiet(quietlvl))
363       return;
364 
365    buf = hostptr(gpuaddr);
366    if (buf) {
367       dump_hex(buf, sizedwords, level + 1);
368    }
369 }
370 
371 static void
dump_gpuaddr(uint64_t gpuaddr,int level)372 dump_gpuaddr(uint64_t gpuaddr, int level)
373 {
374    dump_gpuaddr_size(gpuaddr, level, 64, 3);
375 }
376 
377 static void
reg_dump_gpuaddr(const char * name,uint32_t dword,int level)378 reg_dump_gpuaddr(const char *name, uint32_t dword, int level)
379 {
380    dump_gpuaddr(dword, level);
381 }
382 
383 uint32_t gpuaddr_lo;
384 static void
reg_gpuaddr_lo(const char * name,uint32_t dword,int level)385 reg_gpuaddr_lo(const char *name, uint32_t dword, int level)
386 {
387    gpuaddr_lo = dword;
388 }
389 
390 static void
reg_dump_gpuaddr_hi(const char * name,uint32_t dword,int level)391 reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
392 {
393    dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
394 }
395 
396 static void
reg_dump_gpuaddr64(const char * name,uint64_t qword,int level)397 reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
398 {
399    dump_gpuaddr(qword, level);
400 }
401 
402 static void
dump_shader(const char * ext,void * buf,int bufsz)403 dump_shader(const char *ext, void *buf, int bufsz)
404 {
405    if (options->dump_shaders) {
406       static int n = 0;
407       char filename[16];
408       int fd;
409       sprintf(filename, "%04d.%s", n++, ext);
410       fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
411       if (fd != -1) {
412          write(fd, buf, bufsz);
413          close(fd);
414       }
415    }
416 }
417 
418 static void
disasm_gpuaddr(const char * name,uint64_t gpuaddr,int level)419 disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
420 {
421    void *buf;
422 
423    gpuaddr &= 0xfffffffffffffff0;
424 
425    if (quiet(3))
426       return;
427 
428    buf = hostptr(gpuaddr);
429    if (buf) {
430       uint32_t sizedwords = hostlen(gpuaddr) / 4;
431       const char *ext;
432 
433       dump_hex(buf, MIN2(64, sizedwords), level + 1);
434       try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->info->chip * 100);
435 
436       /* this is a bit ugly way, but oh well.. */
437       if (strstr(name, "SP_VS_OBJ")) {
438          ext = "vo3";
439       } else if (strstr(name, "SP_FS_OBJ")) {
440          ext = "fo3";
441       } else if (strstr(name, "SP_GS_OBJ")) {
442          ext = "go3";
443       } else if (strstr(name, "SP_CS_OBJ")) {
444          ext = "co3";
445       } else {
446          ext = NULL;
447       }
448 
449       if (ext)
450          dump_shader(ext, buf, sizedwords * 4);
451    }
452 }
453 
454 static void
reg_disasm_gpuaddr(const char * name,uint32_t dword,int level)455 reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
456 {
457    disasm_gpuaddr(name, dword, level);
458 }
459 
460 static void
reg_disasm_gpuaddr_hi(const char * name,uint32_t dword,int level)461 reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
462 {
463    disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
464 }
465 
466 static void
reg_disasm_gpuaddr64(const char * name,uint64_t qword,int level)467 reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
468 {
469    disasm_gpuaddr(name, qword, level);
470 }
471 
472 /* Find the value of the TEX_COUNT register that corresponds to the named
473  * TEX_SAMP/TEX_CONST reg.
474  *
475  * Note, this kinda assumes an equal # of samplers and textures, but not
476  * really sure if there is a much better option.  I suppose on a6xx we
477  * could instead decode the bitfields in SP_xS_CONFIG
478  */
479 static int
get_tex_count(const char * name)480 get_tex_count(const char *name)
481 {
482    char count_reg[strlen(name) + 5];
483    char *p;
484 
485    p = strstr(name, "CONST");
486    if (!p)
487       p = strstr(name, "SAMP");
488    if (!p)
489       return 0;
490 
491    int n = p - name;
492    strncpy(count_reg, name, n);
493    strcpy(count_reg + n, "COUNT");
494 
495    return reg_val(regbase(count_reg));
496 }
497 
498 static void
reg_dump_tex_samp_hi(const char * name,uint32_t dword,int level)499 reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
500 {
501    if (!in_summary)
502       return;
503 
504    int num_unit = get_tex_count(name);
505    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
506    void *buf = hostptr(gpuaddr);
507 
508    if (!buf)
509       return;
510 
511    dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
512 }
513 
514 static void
reg_dump_tex_const_hi(const char * name,uint32_t dword,int level)515 reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
516 {
517    if (!in_summary)
518       return;
519 
520    int num_unit = get_tex_count(name);
521    uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
522    void *buf = hostptr(gpuaddr);
523 
524    if (!buf)
525       return;
526 
527    dump_tex_const(buf, num_unit, level + 1);
528 }
529 
530 /*
531  * Registers with special handling (rnndec_decode() handles rest):
532  */
533 #define REG(x, fxn)    { #x, fxn }
534 #define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
535 static struct {
536    const char *regname;
537    void (*fxn)(const char *name, uint32_t dword, int level);
538    void (*fxn64)(const char *name, uint64_t qword, int level);
539    uint32_t regbase;
540    bool is_reg64;
541 } reg_a2xx[] = {
542       REG(CP_SCRATCH_REG0, reg_dump_scratch),
543       REG(CP_SCRATCH_REG1, reg_dump_scratch),
544       REG(CP_SCRATCH_REG2, reg_dump_scratch),
545       REG(CP_SCRATCH_REG3, reg_dump_scratch),
546       REG(CP_SCRATCH_REG4, reg_dump_scratch),
547       REG(CP_SCRATCH_REG5, reg_dump_scratch),
548       REG(CP_SCRATCH_REG6, reg_dump_scratch),
549       REG(CP_SCRATCH_REG7, reg_dump_scratch),
550       {NULL},
551 }, reg_a3xx[] = {
552       REG(CP_SCRATCH_REG0, reg_dump_scratch),
553       REG(CP_SCRATCH_REG1, reg_dump_scratch),
554       REG(CP_SCRATCH_REG2, reg_dump_scratch),
555       REG(CP_SCRATCH_REG3, reg_dump_scratch),
556       REG(CP_SCRATCH_REG4, reg_dump_scratch),
557       REG(CP_SCRATCH_REG5, reg_dump_scratch),
558       REG(CP_SCRATCH_REG6, reg_dump_scratch),
559       REG(CP_SCRATCH_REG7, reg_dump_scratch),
560       REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
561       REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
562       REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
563       REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
564       REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
565       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
566       {NULL},
567 }, reg_a4xx[] = {
568       REG(CP_SCRATCH[0].REG, reg_dump_scratch),
569       REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
570       REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
571       REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
572       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
573       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
574       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
575       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
576       REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
577       REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
578       REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
579       REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
580       REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
581       REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
582       REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
583       REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
584       REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
585       REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
586       REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
587       REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
588       REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
589       REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
590       REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
591       REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
592       REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
593       {NULL},
594 }, reg_a5xx[] = {
595       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
596       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
597       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
598       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
599       REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
600       REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
601       REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
602       REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
603       REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
604       REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
605       REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
606       REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
607       REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
608       REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
609       REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
610       REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
611       REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
612       REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
613       REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
614       REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
615       REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
616       REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
617       REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
618       REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
619       REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
620       REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
621       REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
622       REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
623       REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
624       REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
625       REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
626       REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
627       REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
628       REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
629       REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
630       REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
631       REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
632       REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
633       REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
634       REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
635       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
636       REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
637 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
638 //      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
639 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
640 //      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
641 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
642 //      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
643 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
644 //      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
645 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
646 //      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
647 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
648 //      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
649 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
650 //      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
651 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
652 //      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
653 //      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
654 //      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
655 //      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
656 //      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
657 //      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
658 //      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
659 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
660 //      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
661 //      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
662 //      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
663 
664 //      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
665 //      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
666 //      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
667 //      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
668 //      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
669 //      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
670 //      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
671 //      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
672 
673       {NULL},
674 }, reg_a6xx[] = {
675       REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
676       REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
677       REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
678       REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
679 
680       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
681       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
682       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
683       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
684       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
685       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
686 
687       REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
688       REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
689       REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
690       REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
691       REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
692       REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
693       REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
694       REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
695       REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
696       REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
697       REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
698       REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
699 
700       {NULL},
701 }, reg_a7xx[] = {
702       REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
703       REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
704       REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
705       REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
706       REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
707       REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
708 
709       {NULL},
710 }, *type0_reg;
711 
712 static struct rnn *rnn;
713 
714 static void
init_rnn(const char * gpuname)715 init_rnn(const char *gpuname)
716 {
717    rnn = rnn_new(!options->color);
718 
719    rnn_load(rnn, gpuname);
720 
721    if (options->querystrs) {
722       int i;
723       queryvals = calloc(options->nquery, sizeof(queryvals[0]));
724 
725       for (i = 0; i < options->nquery; i++) {
726          int val = strtol(options->querystrs[i], NULL, 0);
727 
728          if (val == 0)
729             val = regbase(options->querystrs[i]);
730 
731          queryvals[i] = val;
732          printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
733       }
734    }
735 
736    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
737       type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
738       if (!type0_reg[idx].regbase) {
739          printf("invalid register name: %s\n", type0_reg[idx].regname);
740          exit(1);
741       }
742    }
743 }
744 
745 void
reset_regs(void)746 reset_regs(void)
747 {
748    clear_written();
749    clear_lastvals();
750    memset(&ibs, 0, sizeof(ibs));
751 }
752 
753 void
cffdec_init(const struct cffdec_options * _options)754 cffdec_init(const struct cffdec_options *_options)
755 {
756    options = _options;
757    summary = options->summary;
758 
759    /* in case we're decoding multiple files: */
760    free(queryvals);
761    reset_regs();
762    draw_count = 0;
763 
764    if (!options->info)
765       return;
766 
767    switch (options->info->chip) {
768    case 2:
769       type0_reg = reg_a2xx;
770       init_rnn("a2xx");
771       break;
772    case 3:
773       type0_reg = reg_a3xx;
774       init_rnn("a3xx");
775       break;
776    case 4:
777       type0_reg = reg_a4xx;
778       init_rnn("a4xx");
779       break;
780    case 5:
781       type0_reg = reg_a5xx;
782       init_rnn("a5xx");
783       break;
784    case 6:
785       type0_reg = reg_a6xx;
786       init_rnn("a6xx");
787       break;
788    case 7:
789       type0_reg = reg_a7xx;
790       init_rnn("a7xx");
791       break;
792    default:
793       errx(-1, "unsupported generation: %u", options->info->chip);
794    }
795 }
796 
797 const char *
pktname(unsigned opc)798 pktname(unsigned opc)
799 {
800    return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
801 }
802 
803 const char *
regname(uint32_t regbase,int color)804 regname(uint32_t regbase, int color)
805 {
806    return rnn_regname(rnn, regbase, color);
807 }
808 
809 uint32_t
regbase(const char * name)810 regbase(const char *name)
811 {
812    return rnn_regbase(rnn, name);
813 }
814 
815 static int
endswith(uint32_t regbase,const char * suffix)816 endswith(uint32_t regbase, const char *suffix)
817 {
818    const char *name = regname(regbase, 0);
819    const char *s = strstr(name, suffix);
820    if (!s)
821       return 0;
822    return (s - strlen(name) + strlen(suffix)) == name;
823 }
824 
825 struct regacc
regacc(struct rnn * r)826 regacc(struct rnn *r)
827 {
828    if (!r)
829       r = rnn;
830 
831    return (struct regacc){ .rnn = r };
832 }
833 
834 /* returns true if the complete reg value has been accumulated: */
835 bool
regacc_push(struct regacc * r,uint32_t regbase,uint32_t dword)836 regacc_push(struct regacc *r, uint32_t regbase, uint32_t dword)
837 {
838    if (r->has_dword_lo) {
839       /* Work around kernel devcore dumps which accidentially miss half of a 64b reg
840        * see: https://patchwork.freedesktop.org/series/112302/
841        */
842       if (regbase != r->regbase + 1) {
843          printf("WARNING: 64b discontinuity (%x, expected %x)\n", regbase, r->regbase + 1);
844          r->has_dword_lo = false;
845          return true;
846       }
847 
848       r->value |= ((uint64_t)dword) << 32;
849       r->has_dword_lo = false;
850 
851       return true;
852    }
853 
854    r->regbase = regbase;
855    r->value = dword;
856 
857    struct rnndecaddrinfo *info = rnn_reginfo(r->rnn, regbase);
858    r->has_dword_lo = (info->width == 64);
859 
860    /* Workaround for kernel devcore dump bugs: */
861    if ((info->width == 64) && endswith(regbase, "_HI")) {
862       printf("WARNING: 64b discontinuity (no _LO dword for %x)\n", regbase);
863       r->has_dword_lo = false;
864    }
865 
866    rnn_reginfo_free(info);
867 
868    return !r->has_dword_lo;
869 }
870 
871 void
dump_register_val(struct regacc * r,int level)872 dump_register_val(struct regacc *r, int level)
873 {
874    struct rnndecaddrinfo *info = rnn_reginfo(rnn, r->regbase);
875 
876    if (info && info->typeinfo) {
877       uint64_t gpuaddr = 0;
878       char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, r->value);
879       printf("%s%s: %s", levels[level], info->name, decoded);
880 
881       /* Try and figure out if we are looking at a gpuaddr.. this
882        * might be useful for other gen's too, but at least a5xx has
883        * the _HI/_LO suffix we can look for.  Maybe a better approach
884        * would be some special annotation in the xml..
885        * for a6xx use "address" and "waddress" types
886        */
887       if (options->info->chip >= 6) {
888          if (!strcmp(info->typeinfo->name, "address") ||
889              !strcmp(info->typeinfo->name, "waddress")) {
890             gpuaddr = r->value;
891          }
892       } else if (options->info->chip >= 5) {
893          /* TODO we shouldn't rely on reg_val() since reg_set() might
894           * not have been called yet for the other half of the 64b reg.
895           * We can remove this hack once a5xx.xml is converted to reg64
896           * and address/waddess.
897           */
898          if (endswith(r->regbase, "_HI") && endswith(r->regbase - 1, "_LO")) {
899             gpuaddr = (r->value << 32) | reg_val(r->regbase - 1);
900          } else if (endswith(r->regbase, "_LO") && endswith(r->regbase + 1, "_HI")) {
901             gpuaddr = (((uint64_t)reg_val(r->regbase + 1)) << 32) | r->value;
902          }
903       }
904 
905       if (gpuaddr && hostptr(gpuaddr)) {
906          printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
907                 gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
908                 hostlen(gpubaseaddr(gpuaddr)));
909       }
910 
911       printf("\n");
912 
913       free(decoded);
914    } else if (info) {
915       printf("%s%s: %08"PRIx64"\n", levels[level], info->name, r->value);
916    } else {
917       printf("%s<%04x>: %08"PRIx64"\n", levels[level], r->regbase, r->value);
918    }
919 
920    rnn_reginfo_free(info);
921 }
922 
923 static void
dump_register(struct regacc * r,int level)924 dump_register(struct regacc *r, int level)
925 {
926    if (!quiet(3)) {
927       dump_register_val(r, level);
928    }
929 
930    for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
931       if (type0_reg[idx].regbase == r->regbase) {
932          if (type0_reg[idx].is_reg64) {
933             type0_reg[idx].fxn64(type0_reg[idx].regname, r->value, level);
934          } else {
935             type0_reg[idx].fxn(type0_reg[idx].regname, (uint32_t)r->value, level);
936          }
937          break;
938       }
939    }
940 }
941 
942 static bool
is_banked_reg(uint32_t regbase)943 is_banked_reg(uint32_t regbase)
944 {
945    return (0x2000 <= regbase) && (regbase < 0x2400);
946 }
947 
948 static void
dump_registers(uint32_t regbase,uint32_t * dwords,uint32_t sizedwords,int level)949 dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
950                int level)
951 {
952    struct regacc r = regacc(NULL);
953 
954    while (sizedwords--) {
955       int last_summary = summary;
956 
957       /* access to non-banked registers needs a WFI:
958        * TODO banked register range for a2xx??
959        */
960       if (needs_wfi && !is_banked_reg(regbase))
961          printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
962 
963       reg_set(regbase, *dwords);
964       if (regacc_push(&r, regbase, *dwords))
965          dump_register(&r, level);
966       regbase++;
967       dwords++;
968       summary = last_summary;
969    }
970 }
971 
972 static void
dump_domain(uint32_t * dwords,uint32_t sizedwords,int level,const char * name)973 dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
974 {
975    struct rnndomain *dom;
976    int i;
977 
978    dom = rnn_finddomain(rnn->db, name);
979 
980    if (!dom)
981       return;
982 
983    if (script_packet)
984       script_packet(dwords, sizedwords, rnn, dom);
985 
986    if (quiet(2))
987       return;
988 
989    for (i = 0; i < sizedwords; i++) {
990       struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
991       char *decoded;
992       if (!(info && info->typeinfo))
993          break;
994       uint64_t value = dwords[i];
995       if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
996          value |= (uint64_t)dwords[i + 1] << 32;
997          i++; /* skip the next dword since we're printing it now */
998       }
999       decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
1000       /* Unlike the register printing path, we don't print the name
1001        * of the register, so if it doesn't contain other named
1002        * things (i.e. it isn't a bitset) then print the register
1003        * name as if it's a bitset with a single entry. This avoids
1004        * having to create a dummy register with a single entry to
1005        * get a name in the decoding.
1006        */
1007       if (info->typeinfo->type == RNN_TTYPE_BITSET ||
1008           info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
1009          printf("%s%s\n", levels[level], decoded);
1010       } else {
1011          printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
1012                 info->name, rnn->vc->colors->reset, decoded);
1013       }
1014       free(decoded);
1015       free(info->name);
1016       free(info);
1017    }
1018 }
1019 
1020 static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
1021 static unsigned mode;
1022 static const char *render_mode;
1023 static const char *thread;
1024 static enum {
1025    MODE_BINNING = 0x1,
1026    MODE_GMEM = 0x2,
1027    MODE_BYPASS = 0x4,
1028    MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
1029 } enable_mask = MODE_ALL;
1030 static bool skip_ib2_enable_global;
1031 static bool skip_ib2_enable_local;
1032 
1033 static void
print_mode(int level)1034 print_mode(int level)
1035 {
1036    if ((options->info->chip >= 5) && !quiet(2)) {
1037       printf("%smode: %s", levels[level], render_mode);
1038       if (thread)
1039          printf(":%s", thread);
1040       printf("\n");
1041       printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
1042              skip_ib2_enable_local);
1043    }
1044 }
1045 
1046 static bool
skip_query(void)1047 skip_query(void)
1048 {
1049    switch (options->query_mode) {
1050    case QUERY_ALL:
1051       /* never skip: */
1052       return false;
1053    case QUERY_WRITTEN:
1054       for (int i = 0; i < options->nquery; i++) {
1055          uint32_t regbase = queryvals[i];
1056          if (!reg_written(regbase)) {
1057             continue;
1058          }
1059          if (reg_rewritten(regbase)) {
1060             return false;
1061          }
1062       }
1063       return true;
1064    case QUERY_DELTA:
1065       for (int i = 0; i < options->nquery; i++) {
1066          uint32_t regbase = queryvals[i];
1067          if (!reg_written(regbase)) {
1068             continue;
1069          }
1070          uint32_t lastval = reg_val(regbase);
1071          if (lastval != lastvals[regbase]) {
1072             return false;
1073          }
1074       }
1075       return true;
1076    }
1077    return true;
1078 }
1079 
1080 static void
__do_query(const char * primtype,uint32_t num_indices)1081 __do_query(const char *primtype, uint32_t num_indices)
1082 {
1083    int n = 0;
1084 
1085    if ((5 <= options->info->chip) && (options->info->chip < 7)) {
1086       uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
1087       uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
1088 
1089       bin_x1 = scissor_tl & 0xffff;
1090       bin_y1 = scissor_tl >> 16;
1091       bin_x2 = scissor_br & 0xffff;
1092       bin_y2 = scissor_br >> 16;
1093    }
1094 
1095    for (int i = 0; i < options->nquery; i++) {
1096       uint32_t regbase = queryvals[i];
1097       if (!reg_written(regbase))
1098          continue;
1099 
1100       struct regacc r = regacc(NULL);
1101 
1102       /* 64b regs require two successive 32b dwords: */
1103       for (int d = 0; d < 2; d++)
1104          if (regacc_push(&r, regbase + d, reg_val(regbase + d)))
1105             break;
1106 
1107       printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
1108              bin_y1, bin_x2, bin_y2, num_indices);
1109       if (options->info->chip >= 5)
1110          printf("%s:", render_mode);
1111       if (thread)
1112          printf("%s:", thread);
1113       printf("\t%08"PRIx64, r.value);
1114       if (r.value != lastvals[regbase]) {
1115          printf("!");
1116       } else {
1117          printf(" ");
1118       }
1119       if (reg_rewritten(regbase)) {
1120          printf("+");
1121       } else {
1122          printf(" ");
1123       }
1124       dump_register_val(&r, 0);
1125       n++;
1126    }
1127 
1128    if (n > 1)
1129       printf("\n");
1130 }
1131 
1132 static void
do_query_compare(const char * primtype,uint32_t num_indices)1133 do_query_compare(const char *primtype, uint32_t num_indices)
1134 {
1135    unsigned saved_enable_mask = enable_mask;
1136    const char *saved_render_mode = render_mode;
1137 
1138    /* in 'query-compare' mode, we want to see if the register is writtten
1139     * or changed in any mode:
1140     *
1141     * (NOTE: this could cause false-positive for 'query-delta' if the reg
1142     * is written with different values in binning vs sysmem/gmem mode, as
1143     * we don't track previous values per-mode, but I think we can live with
1144     * that)
1145     */
1146    enable_mask = MODE_ALL;
1147 
1148    clear_rewritten();
1149    load_all_groups(0);
1150 
1151    if (!skip_query()) {
1152       /* dump binning pass values: */
1153       enable_mask = MODE_BINNING;
1154       render_mode = "BINNING";
1155       clear_rewritten();
1156       load_all_groups(0);
1157       __do_query(primtype, num_indices);
1158 
1159       /* dump draw pass values: */
1160       enable_mask = MODE_GMEM | MODE_BYPASS;
1161       render_mode = "DRAW";
1162       clear_rewritten();
1163       load_all_groups(0);
1164       __do_query(primtype, num_indices);
1165 
1166       printf("\n");
1167    }
1168 
1169    enable_mask = saved_enable_mask;
1170    render_mode = saved_render_mode;
1171 
1172    disable_all_groups();
1173 }
1174 
1175 /* well, actually query and script..
1176  * NOTE: call this before dump_register_summary()
1177  */
1178 static void
do_query(const char * primtype,uint32_t num_indices)1179 do_query(const char *primtype, uint32_t num_indices)
1180 {
1181    if (script_draw)
1182       script_draw(primtype, num_indices);
1183 
1184    if (options->query_compare) {
1185       do_query_compare(primtype, num_indices);
1186       return;
1187    }
1188 
1189    if (skip_query())
1190       return;
1191 
1192    __do_query(primtype, num_indices);
1193 }
1194 
1195 static void
cp_im_loadi(uint32_t * dwords,uint32_t sizedwords,int level)1196 cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
1197 {
1198    uint32_t start = dwords[1] >> 16;
1199    uint32_t size = dwords[1] & 0xffff;
1200    const char *type = NULL, *ext = NULL;
1201    gl_shader_stage disasm_type;
1202 
1203    switch (dwords[0]) {
1204    case 0:
1205       type = "vertex";
1206       ext = "vo";
1207       disasm_type = MESA_SHADER_VERTEX;
1208       break;
1209    case 1:
1210       type = "fragment";
1211       ext = "fo";
1212       disasm_type = MESA_SHADER_FRAGMENT;
1213       break;
1214    default:
1215       type = "<unknown>";
1216       disasm_type = 0;
1217       break;
1218    }
1219 
1220    printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
1221           size);
1222    disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
1223 
1224    /* dump raw shader: */
1225    if (ext)
1226       dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
1227 }
1228 
1229 static void
cp_wide_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)1230 cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
1231 {
1232    uint32_t reg = dwords[0] & 0xffff;
1233    struct regacc r = regacc(NULL);
1234    for (int i = 1; i < sizedwords; i++) {
1235       if (regacc_push(&r, reg, dwords[i]))
1236          dump_register(&r, level + 1);
1237       reg_set(reg, dwords[i]);
1238       reg++;
1239    }
1240 }
1241 
1242 enum state_t {
1243    TEX_SAMP = 1,
1244    TEX_CONST,
1245    TEX_MIPADDR, /* a3xx only */
1246    SHADER_PROG,
1247    SHADER_CONST,
1248 
1249    // image/ssbo state:
1250    SSBO_0,
1251    SSBO_1,
1252    SSBO_2,
1253 
1254    UBO,
1255 
1256    // unknown things, just to hexdumps:
1257    UNKNOWN_DWORDS,
1258    UNKNOWN_2DWORDS,
1259    UNKNOWN_4DWORDS,
1260 };
1261 
1262 enum adreno_state_block {
1263    SB_VERT_TEX = 0,
1264    SB_VERT_MIPADDR = 1,
1265    SB_FRAG_TEX = 2,
1266    SB_FRAG_MIPADDR = 3,
1267    SB_VERT_SHADER = 4,
1268    SB_GEOM_SHADER = 5,
1269    SB_FRAG_SHADER = 6,
1270    SB_COMPUTE_SHADER = 7,
1271 };
1272 
1273 /* TODO there is probably a clever way to let rnndec parse things so
1274  * we don't have to care about packet format differences across gens
1275  */
1276 
1277 static void
a3xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1278 a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1279                     enum state_t *state, enum state_src_t *src)
1280 {
1281    unsigned state_block_id = (dwords[0] >> 19) & 0x7;
1282    unsigned state_type = dwords[1] & 0x3;
1283    static const struct {
1284       gl_shader_stage stage;
1285       enum state_t state;
1286    } lookup[0xf][0x3] = {
1287       [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1288       [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1289       [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1290       [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1291       [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1292       [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1293       [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1294       [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1295    };
1296 
1297    *stage = lookup[state_block_id][state_type].stage;
1298    *state = lookup[state_block_id][state_type].state;
1299    unsigned state_src = (dwords[0] >> 16) & 0x7;
1300    if (state_src == 0 /* SS_DIRECT */)
1301       *src = STATE_SRC_DIRECT;
1302    else
1303       *src = STATE_SRC_INDIRECT;
1304 }
1305 
1306 static enum state_src_t
_get_state_src(unsigned dword0)1307 _get_state_src(unsigned dword0)
1308 {
1309    switch ((dword0 >> 16) & 0x3) {
1310    case 0: /* SS4_DIRECT / SS6_DIRECT */
1311       return STATE_SRC_DIRECT;
1312    case 2: /* SS4_INDIRECT / SS6_INDIRECT */
1313       return STATE_SRC_INDIRECT;
1314    case 1: /* SS6_BINDLESS */
1315       return STATE_SRC_BINDLESS;
1316    default:
1317       return STATE_SRC_DIRECT;
1318    }
1319 }
1320 
1321 static void
_get_state_type(unsigned state_block_id,unsigned state_type,gl_shader_stage * stage,enum state_t * state)1322 _get_state_type(unsigned state_block_id, unsigned state_type,
1323                 gl_shader_stage *stage, enum state_t *state)
1324 {
1325    static const struct {
1326       gl_shader_stage stage;
1327       enum state_t state;
1328    } lookup[0x10][0x4] = {
1329       // SB4_VS_TEX:
1330       [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
1331       [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
1332       [0x0][2] = {MESA_SHADER_VERTEX, UBO},
1333       // SB4_HS_TEX:
1334       [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
1335       [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
1336       [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
1337       // SB4_DS_TEX:
1338       [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
1339       [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
1340       [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
1341       // SB4_GS_TEX:
1342       [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
1343       [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
1344       [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
1345       // SB4_FS_TEX:
1346       [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
1347       [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
1348       [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
1349       // SB4_CS_TEX:
1350       [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
1351       [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
1352       [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
1353       // SB4_VS_SHADER:
1354       [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
1355       [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
1356       [0x8][2] = {MESA_SHADER_VERTEX, UBO},
1357       // SB4_HS_SHADER
1358       [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
1359       [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
1360       [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
1361       // SB4_DS_SHADER
1362       [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
1363       [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
1364       [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
1365       // SB4_GS_SHADER
1366       [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
1367       [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
1368       [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
1369       // SB4_FS_SHADER:
1370       [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
1371       [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
1372       [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
1373       // SB4_CS_SHADER:
1374       [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
1375       [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
1376       [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
1377       [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
1378       // SB4_SSBO (shared across all stages)
1379       [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
1380       [0xe][1] = {0, SSBO_1},
1381       [0xe][2] = {0, SSBO_2},
1382       // SB4_CS_SSBO
1383       [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
1384       [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
1385       [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
1386       // unknown things
1387       /* This looks like combined UBO state for 3d stages (a5xx and
1388        * before??  I think a6xx has UBO state per shader stage:
1389        */
1390       [0x6][2] = {0, UBO},
1391       [0x7][1] = {0, UNKNOWN_2DWORDS},
1392    };
1393 
1394    *stage = lookup[state_block_id][state_type].stage;
1395    *state = lookup[state_block_id][state_type].state;
1396 }
1397 
1398 static void
a4xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1399 a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1400                     enum state_t *state, enum state_src_t *src)
1401 {
1402    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1403    unsigned state_type = dwords[1] & 0x3;
1404    _get_state_type(state_block_id, state_type, stage, state);
1405    *src = _get_state_src(dwords[0]);
1406 }
1407 
1408 static void
a6xx_get_state_type(uint32_t * dwords,gl_shader_stage * stage,enum state_t * state,enum state_src_t * src)1409 a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
1410                     enum state_t *state, enum state_src_t *src)
1411 {
1412    unsigned state_block_id = (dwords[0] >> 18) & 0xf;
1413    unsigned state_type = (dwords[0] >> 14) & 0x3;
1414    _get_state_type(state_block_id, state_type, stage, state);
1415    *src = _get_state_src(dwords[0]);
1416 }
1417 
1418 static void
dump_tex_samp(uint32_t * texsamp,enum state_src_t src,int num_unit,int level)1419 dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
1420 {
1421    for (int i = 0; i < num_unit; i++) {
1422       /* work-around to reduce noise for opencl blob which always
1423        * writes the max # regardless of # of textures used
1424        */
1425       if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
1426          break;
1427 
1428       if (options->info->chip == 3) {
1429          dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
1430          dump_hex(texsamp, 2, level + 1);
1431          texsamp += 2;
1432       } else if (options->info->chip == 4) {
1433          dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
1434          dump_hex(texsamp, 2, level + 1);
1435          texsamp += 2;
1436       } else if (options->info->chip == 5) {
1437          dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
1438          dump_hex(texsamp, 4, level + 1);
1439          texsamp += 4;
1440       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1441          dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
1442          dump_hex(texsamp, 4, level + 1);
1443          texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
1444       }
1445    }
1446 }
1447 
1448 static void
dump_tex_const(uint32_t * texconst,int num_unit,int level)1449 dump_tex_const(uint32_t *texconst, int num_unit, int level)
1450 {
1451    for (int i = 0; i < num_unit; i++) {
1452       /* work-around to reduce noise for opencl blob which always
1453        * writes the max # regardless of # of textures used
1454        */
1455       if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
1456           (texconst[2] == 0) && (texconst[3] == 0))
1457          break;
1458 
1459       if (options->info->chip == 3) {
1460          dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
1461          dump_hex(texconst, 4, level + 1);
1462          texconst += 4;
1463       } else if (options->info->chip == 4) {
1464          dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
1465          if (options->dump_textures) {
1466             uint32_t addr = texconst[4] & ~0x1f;
1467             dump_gpuaddr(addr, level - 2);
1468          }
1469          dump_hex(texconst, 8, level + 1);
1470          texconst += 8;
1471       } else if (options->info->chip == 5) {
1472          dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
1473          if (options->dump_textures) {
1474             uint64_t addr =
1475                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1476             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1477          }
1478          dump_hex(texconst, 12, level + 1);
1479          texconst += 12;
1480       } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1481          dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
1482          if (options->dump_textures) {
1483             uint64_t addr =
1484                (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
1485             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1486          }
1487          dump_hex(texconst, 16, level + 1);
1488          texconst += 16;
1489       }
1490    }
1491 }
1492 
1493 static void
cp_load_state(uint32_t * dwords,uint32_t sizedwords,int level)1494 cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
1495 {
1496    gl_shader_stage stage;
1497    enum state_t state;
1498    enum state_src_t src;
1499    uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
1500    uint64_t ext_src_addr;
1501    void *contents;
1502    int i;
1503 
1504    if (quiet(2) && !options->script)
1505       return;
1506 
1507    if (options->info->chip >= 6)
1508       a6xx_get_state_type(dwords, &stage, &state, &src);
1509    else if (options->info->chip >= 4)
1510       a4xx_get_state_type(dwords, &stage, &state, &src);
1511    else
1512       a3xx_get_state_type(dwords, &stage, &state, &src);
1513 
1514    switch (src) {
1515    case STATE_SRC_DIRECT:
1516       ext_src_addr = 0;
1517       break;
1518    case STATE_SRC_INDIRECT:
1519       if (is_64b()) {
1520          ext_src_addr = dwords[1] & 0xfffffffc;
1521          ext_src_addr |= ((uint64_t)dwords[2]) << 32;
1522       } else {
1523          ext_src_addr = dwords[1] & 0xfffffffc;
1524       }
1525 
1526       break;
1527    case STATE_SRC_BINDLESS: {
1528       const unsigned base_reg = stage == MESA_SHADER_COMPUTE
1529                                    ? regbase("HLSQ_CS_BINDLESS_BASE[0].DESCRIPTOR")
1530                                    : regbase("HLSQ_BINDLESS_BASE[0].DESCRIPTOR");
1531 
1532       if (is_64b()) {
1533          const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
1534          ext_src_addr = reg_val(reg) & 0xfffffffc;
1535          ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
1536       } else {
1537          const unsigned reg = base_reg + (dwords[1] >> 28);
1538          ext_src_addr = reg_val(reg) & 0xfffffffc;
1539       }
1540 
1541       ext_src_addr += 4 * (dwords[1] & 0xffffff);
1542       break;
1543    }
1544    }
1545 
1546    if (ext_src_addr)
1547       contents = hostptr(ext_src_addr);
1548    else
1549       contents = is_64b() ? dwords + 3 : dwords + 2;
1550 
1551    if (!contents)
1552       return;
1553 
1554    switch (state) {
1555    case SHADER_PROG: {
1556       const char *ext = NULL;
1557 
1558       if (quiet(2))
1559          return;
1560 
1561       if (options->info->chip >= 4)
1562          num_unit *= 16;
1563       else if (options->info->chip >= 3)
1564          num_unit *= 4;
1565 
1566       /* shaders:
1567        *
1568        * note: num_unit seems to be # of instruction groups, where
1569        * an instruction group has 4 64bit instructions.
1570        */
1571       if (stage == MESA_SHADER_VERTEX) {
1572          ext = "vo3";
1573       } else if (stage == MESA_SHADER_GEOMETRY) {
1574          ext = "go3";
1575       } else if (stage == MESA_SHADER_COMPUTE) {
1576          ext = "co3";
1577       } else if (stage == MESA_SHADER_FRAGMENT) {
1578          ext = "fo3";
1579       }
1580 
1581       if (contents)
1582          try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
1583                          options->info->chip * 100);
1584 
1585       /* dump raw shader: */
1586       if (ext)
1587          dump_shader(ext, contents, num_unit * 2 * 4);
1588 
1589       break;
1590    }
1591    case SHADER_CONST: {
1592       if (quiet(2))
1593          return;
1594 
1595       /* uniforms/consts:
1596        *
1597        * note: num_unit seems to be # of pairs of dwords??
1598        */
1599 
1600       if (options->info->chip >= 4)
1601          num_unit *= 2;
1602 
1603       dump_float(contents, num_unit * 2, level + 1);
1604       dump_hex(contents, num_unit * 2, level + 1);
1605 
1606       break;
1607    }
1608    case TEX_MIPADDR: {
1609       uint32_t *addrs = contents;
1610 
1611       if (quiet(2))
1612          return;
1613 
1614       /* mipmap consts block just appears to be array of num_unit gpu addr's: */
1615       for (i = 0; i < num_unit; i++) {
1616          void *ptr = hostptr(addrs[i]);
1617          printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
1618          if (options->dump_textures) {
1619             printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
1620             dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
1621          }
1622       }
1623       break;
1624    }
1625    case TEX_SAMP: {
1626       dump_tex_samp(contents, src, num_unit, level);
1627       break;
1628    }
1629    case TEX_CONST: {
1630       dump_tex_const(contents, num_unit, level);
1631       break;
1632    }
1633    case SSBO_0: {
1634       uint32_t *ssboconst = (uint32_t *)contents;
1635 
1636       for (i = 0; i < num_unit; i++) {
1637          int sz = 4;
1638          if (options->info->chip == 4) {
1639             dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
1640          } else if (options->info->chip == 5) {
1641             dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
1642          } else if ((6 <= options->info->chip) && (options->info->chip < 8)) {
1643             sz = 16;
1644             dump_domain(ssboconst, 16, level + 2, "A6XX_TEX_CONST");
1645          }
1646          dump_hex(ssboconst, sz, level + 1);
1647          ssboconst += sz;
1648       }
1649       break;
1650    }
1651    case SSBO_1: {
1652       uint32_t *ssboconst = (uint32_t *)contents;
1653 
1654       for (i = 0; i < num_unit; i++) {
1655          if (options->info->chip == 4)
1656             dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
1657          else if (options->info->chip == 5)
1658             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
1659          dump_hex(ssboconst, 2, level + 1);
1660          ssboconst += 2;
1661       }
1662       break;
1663    }
1664    case SSBO_2: {
1665       uint32_t *ssboconst = (uint32_t *)contents;
1666 
1667       for (i = 0; i < num_unit; i++) {
1668          /* TODO a4xx and a5xx might be same: */
1669          if (options->info->chip == 5) {
1670             dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
1671             dump_hex(ssboconst, 2, level + 1);
1672          }
1673          if (options->dump_textures) {
1674             uint64_t addr =
1675                (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
1676             dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
1677          }
1678          ssboconst += 2;
1679       }
1680       break;
1681    }
1682    case UBO: {
1683       uint32_t *uboconst = (uint32_t *)contents;
1684 
1685       for (i = 0; i < num_unit; i++) {
1686          // TODO probably similar on a4xx..
1687          if (options->info->chip == 5)
1688             dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
1689          else if (options->info->chip == 6)
1690             dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
1691          dump_hex(uboconst, 2, level + 1);
1692          uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
1693       }
1694       break;
1695    }
1696    case UNKNOWN_DWORDS: {
1697       if (quiet(2))
1698          return;
1699       dump_hex(contents, num_unit, level + 1);
1700       break;
1701    }
1702    case UNKNOWN_2DWORDS: {
1703       if (quiet(2))
1704          return;
1705       dump_hex(contents, num_unit * 2, level + 1);
1706       break;
1707    }
1708    case UNKNOWN_4DWORDS: {
1709       if (quiet(2))
1710          return;
1711       dump_hex(contents, num_unit * 4, level + 1);
1712       break;
1713    }
1714    default:
1715       if (quiet(2))
1716          return;
1717       /* hmm.. */
1718       dump_hex(contents, num_unit, level + 1);
1719       break;
1720    }
1721 }
1722 
1723 static void
cp_set_bin(uint32_t * dwords,uint32_t sizedwords,int level)1724 cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
1725 {
1726    bin_x1 = dwords[1] & 0xffff;
1727    bin_y1 = dwords[1] >> 16;
1728    bin_x2 = dwords[2] & 0xffff;
1729    bin_y2 = dwords[2] >> 16;
1730 }
1731 
1732 static void
dump_a2xx_tex_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1733 dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1734                     int level)
1735 {
1736    uint32_t w, h, p;
1737    uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
1738    uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
1739    static const char *filter[] = {
1740       "point",
1741       "bilinear",
1742       "bicubic",
1743    };
1744    static const char *clamp[] = {
1745       "wrap",
1746       "mirror",
1747       "clamp-last-texel",
1748    };
1749    static const char swiznames[] = "xyzw01??";
1750 
1751    /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
1752 
1753    /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
1754     * RFMode=ZeroClamp-1, Dim=1:2d, pitch
1755     */
1756    p = (dwords[0] >> 22) << 5;
1757    clamp_x = (dwords[0] >> 10) & 0x3;
1758    clamp_y = (dwords[0] >> 13) & 0x3;
1759    clamp_z = (dwords[0] >> 16) & 0x3;
1760 
1761    /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
1762     * NearestClamp=1:OGL Mode
1763     */
1764    parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
1765 
1766    /* Width, Height, EndianSwap=0:None */
1767    w = (dwords[2] & 0x1fff) + 1;
1768    h = ((dwords[2] >> 13) & 0x1fff) + 1;
1769 
1770    /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
1771     * Mip=2:BaseMap
1772     */
1773    mag = (dwords[3] >> 19) & 0x3;
1774    min = (dwords[3] >> 21) & 0x3;
1775    swiz = (dwords[3] >> 1) & 0xfff;
1776 
1777    /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
1778     * Dim3d=0
1779     */
1780    // XXX
1781 
1782    /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
1783     * Dim=1:2d, MipPacking=0
1784     */
1785    parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
1786 
1787    printf("%sset texture const %04x\n", levels[level], val);
1788    printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
1789           clamp[clamp_y], clamp[clamp_z]);
1790    printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
1791           filter[mag]);
1792    printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
1793           swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
1794           swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
1795    printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
1796           levels[level + 1], gpuaddr, flags, w, h, p,
1797           rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
1798    printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
1799           mip_flags);
1800 }
1801 
1802 static void
dump_a2xx_shader_const(uint32_t * dwords,uint32_t sizedwords,uint32_t val,int level)1803 dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
1804                        int level)
1805 {
1806    int i;
1807    printf("%sset shader const %04x\n", levels[level], val);
1808    for (i = 0; i < sizedwords;) {
1809       uint32_t gpuaddr, flags;
1810       parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
1811       void *addr = hostptr(gpuaddr);
1812       if (addr) {
1813          const char *fmt =
1814             rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
1815          uint32_t size = dwords[i++];
1816          printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
1817                 size, fmt);
1818          // TODO maybe dump these as bytes instead of dwords?
1819          size = (size + 3) / 4; // for now convert to dwords
1820          dump_hex(addr, MIN2(size, 64), level + 1);
1821          if (size > MIN2(size, 64))
1822             printf("%s\t\t...\n", levels[level + 1]);
1823          dump_float(addr, MIN2(size, 64), level + 1);
1824          if (size > MIN2(size, 64))
1825             printf("%s\t\t...\n", levels[level + 1]);
1826       }
1827    }
1828 }
1829 
1830 static void
cp_set_const(uint32_t * dwords,uint32_t sizedwords,int level)1831 cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
1832 {
1833    uint32_t val = dwords[0] & 0xffff;
1834    switch ((dwords[0] >> 16) & 0xf) {
1835    case 0x0:
1836       dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
1837       break;
1838    case 0x1:
1839       /* need to figure out how const space is partitioned between
1840        * attributes, textures, etc..
1841        */
1842       if (val < 0x78) {
1843          dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
1844       } else {
1845          dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
1846       }
1847       break;
1848    case 0x2:
1849       printf("%sset bool const %04x\n", levels[level], val);
1850       break;
1851    case 0x3:
1852       printf("%sset loop const %04x\n", levels[level], val);
1853       break;
1854    case 0x4:
1855       val += 0x2000;
1856       if (dwords[0] & 0x80000000) {
1857          uint32_t srcreg = dwords[1];
1858          uint32_t dstval = dwords[2];
1859 
1860          /* TODO: not sure what happens w/ payload != 2.. */
1861          assert(sizedwords == 3);
1862          assert(srcreg < ARRAY_SIZE(type0_reg_vals));
1863 
1864          /* note: rnn_regname uses a static buf so we can't do
1865           * two regname() calls for one printf..
1866           */
1867          printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
1868          printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
1869 
1870          dstval += type0_reg_vals[srcreg];
1871 
1872          dump_registers(val, &dstval, 1, level + 1);
1873       } else {
1874          dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
1875       }
1876       break;
1877    }
1878 }
1879 
1880 static void dump_register_summary(int level);
1881 
1882 static void
cp_event_write(uint32_t * dwords,uint32_t sizedwords,int level)1883 cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
1884 {
1885    const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0] & 0xff);
1886    printl(2, "%sevent %s\n", levels[level], name);
1887 
1888    if (name && (options->info->chip > 5)) {
1889       char eventname[64];
1890       snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
1891       if (!strcmp(name, "BLIT") || !strcmp(name, "LRZ_CLEAR")) {
1892          do_query(eventname, 0);
1893          print_mode(level);
1894          dump_register_summary(level);
1895       }
1896    }
1897 }
1898 
1899 static void
dump_register_summary(int level)1900 dump_register_summary(int level)
1901 {
1902    uint32_t i;
1903    bool saved_summary = summary;
1904    summary = false;
1905 
1906    in_summary = true;
1907 
1908    struct regacc r = regacc(NULL);
1909 
1910    /* dump current state of registers: */
1911    printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
1912 
1913    bool changed = false;
1914    bool written = false;
1915 
1916    for (i = 0; i < regcnt(); i++) {
1917       uint32_t regbase = i;
1918       uint32_t lastval = reg_val(regbase);
1919       /* skip registers that haven't been updated since last draw/blit: */
1920       if (!(options->allregs || reg_rewritten(regbase)))
1921          continue;
1922       if (!reg_written(regbase))
1923          continue;
1924       if (lastval != lastvals[regbase]) {
1925          changed |= true;
1926          lastvals[regbase] = lastval;
1927       }
1928       if (reg_rewritten(regbase)) {
1929          written |= true;
1930       }
1931       if (!quiet(2)) {
1932          if (regacc_push(&r, regbase, lastval)) {
1933             if (changed) {
1934                printl(2, "!");
1935             } else {
1936                printl(2, " ");
1937             }
1938             if (written) {
1939                printl(2, "+");
1940             } else {
1941                printl(2, " ");
1942             }
1943             printl(2, "\t%08"PRIx64, r.value);
1944             dump_register(&r, level);
1945 
1946             changed = written = false;
1947          }
1948       }
1949    }
1950 
1951    clear_rewritten();
1952 
1953    in_summary = false;
1954 
1955    draw_count++;
1956    summary = saved_summary;
1957 }
1958 
1959 static uint32_t
draw_indx_common(uint32_t * dwords,int level)1960 draw_indx_common(uint32_t *dwords, int level)
1961 {
1962    uint32_t prim_type = dwords[1] & 0x1f;
1963    uint32_t source_select = (dwords[1] >> 6) & 0x3;
1964    uint32_t num_indices = dwords[2];
1965    const char *primtype;
1966 
1967    primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
1968 
1969    do_query(primtype, num_indices);
1970 
1971    printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
1972    printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
1973    printl(2, "%ssource_select: %s (%d)\n", levels[level],
1974           rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
1975    printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
1976 
1977    vertices += num_indices;
1978 
1979    draws[ib]++;
1980 
1981    return num_indices;
1982 }
1983 
1984 enum pc_di_index_size {
1985    INDEX_SIZE_IGN = 0,
1986    INDEX_SIZE_16_BIT = 0,
1987    INDEX_SIZE_32_BIT = 1,
1988    INDEX_SIZE_8_BIT = 2,
1989    INDEX_SIZE_INVALID = 0,
1990 };
1991 
1992 static void
cp_draw_indx(uint32_t * dwords,uint32_t sizedwords,int level)1993 cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
1994 {
1995    uint32_t num_indices = draw_indx_common(dwords, level);
1996 
1997    assert(!is_64b());
1998 
1999    /* if we have an index buffer, dump that: */
2000    if (sizedwords == 5) {
2001       void *ptr = hostptr(dwords[3]);
2002       printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
2003       printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
2004       if (ptr) {
2005          enum pc_di_index_size size =
2006             ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2007          if (!quiet(2)) {
2008             int i;
2009             printf("%sidxs:         ", levels[level]);
2010             if (size == INDEX_SIZE_8_BIT) {
2011                uint8_t *idx = ptr;
2012                for (i = 0; i < dwords[4]; i++)
2013                   printf(" %u", idx[i]);
2014             } else if (size == INDEX_SIZE_16_BIT) {
2015                uint16_t *idx = ptr;
2016                for (i = 0; i < dwords[4] / 2; i++)
2017                   printf(" %u", idx[i]);
2018             } else if (size == INDEX_SIZE_32_BIT) {
2019                uint32_t *idx = ptr;
2020                for (i = 0; i < dwords[4] / 4; i++)
2021                   printf(" %u", idx[i]);
2022             }
2023             printf("\n");
2024             dump_hex(ptr, dwords[4] / 4, level + 1);
2025          }
2026       }
2027    }
2028 
2029    /* don't bother dumping registers for the dummy draw_indx's.. */
2030    if (num_indices > 0)
2031       dump_register_summary(level);
2032 
2033    needs_wfi = true;
2034 }
2035 
2036 static void
cp_draw_indx_2(uint32_t * dwords,uint32_t sizedwords,int level)2037 cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
2038 {
2039    uint32_t num_indices = draw_indx_common(dwords, level);
2040    enum pc_di_index_size size =
2041       ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
2042    void *ptr = &dwords[3];
2043    int sz = 0;
2044 
2045    assert(!is_64b());
2046 
2047    /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
2048    if (!quiet(2)) {
2049       int i;
2050       printf("%sidxs:         ", levels[level]);
2051       if (size == INDEX_SIZE_8_BIT) {
2052          uint8_t *idx = ptr;
2053          for (i = 0; i < num_indices; i++)
2054             printf(" %u", idx[i]);
2055          sz = num_indices;
2056       } else if (size == INDEX_SIZE_16_BIT) {
2057          uint16_t *idx = ptr;
2058          for (i = 0; i < num_indices; i++)
2059             printf(" %u", idx[i]);
2060          sz = num_indices * 2;
2061       } else if (size == INDEX_SIZE_32_BIT) {
2062          uint32_t *idx = ptr;
2063          for (i = 0; i < num_indices; i++)
2064             printf(" %u", idx[i]);
2065          sz = num_indices * 4;
2066       }
2067       printf("\n");
2068       dump_hex(ptr, sz / 4, level + 1);
2069    }
2070 
2071    /* don't bother dumping registers for the dummy draw_indx's.. */
2072    if (num_indices > 0)
2073       dump_register_summary(level);
2074 }
2075 
2076 static void
cp_draw_indx_offset(uint32_t * dwords,uint32_t sizedwords,int level)2077 cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
2078 {
2079    uint32_t num_indices = dwords[2];
2080    uint32_t prim_type = dwords[0] & 0x1f;
2081 
2082    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
2083    print_mode(level);
2084 
2085    /* don't bother dumping registers for the dummy draw_indx's.. */
2086    if (num_indices > 0)
2087       dump_register_summary(level);
2088 }
2089 
2090 static void
cp_draw_indx_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2091 cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2092 {
2093    uint32_t prim_type = dwords[0] & 0x1f;
2094    uint64_t addr;
2095 
2096    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2097    print_mode(level);
2098 
2099    if (is_64b())
2100       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2101    else
2102       addr = dwords[1];
2103    dump_gpuaddr_size(addr, level, 0x10, 2);
2104 
2105    if (is_64b())
2106       addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
2107    else
2108       addr = dwords[3];
2109    dump_gpuaddr_size(addr, level, 0x10, 2);
2110 
2111    dump_register_summary(level);
2112 }
2113 
2114 static void
cp_draw_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2115 cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2116 {
2117    uint32_t prim_type = dwords[0] & 0x1f;
2118    uint64_t addr;
2119 
2120    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2121    print_mode(level);
2122 
2123    addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2124    dump_gpuaddr_size(addr, level, 0x10, 2);
2125 
2126    dump_register_summary(level);
2127 }
2128 
2129 static void
cp_draw_indirect_multi(uint32_t * dwords,uint32_t sizedwords,int level)2130 cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
2131 {
2132    uint32_t prim_type = dwords[0] & 0x1f;
2133    uint32_t count = dwords[2];
2134 
2135    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2136    print_mode(level);
2137 
2138    struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
2139    uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
2140    uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
2141    uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
2142 
2143    if (count_dword) {
2144       uint64_t count_addr =
2145          ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
2146       uint32_t *buf = hostptr(count_addr);
2147 
2148       /* Don't print more draws than this if we don't know the indirect
2149        * count. It's possible the user will give ~0 or some other large
2150        * value, expecting the GPU to fill in the draw count, and we don't
2151        * want to print a gazillion draws in that case:
2152        */
2153       const uint32_t max_draw_count = 0x100;
2154 
2155       /* Assume the indirect count is garbage if it's larger than this
2156        * (quite large) value or 0. Hopefully this catches most cases.
2157        */
2158       const uint32_t max_indirect_draw_count = 0x10000;
2159 
2160       if (buf) {
2161          printf("%sindirect count: %u\n", levels[level], *buf);
2162          if (*buf == 0 || *buf > max_indirect_draw_count) {
2163             /* garbage value */
2164             count = MIN2(count, max_draw_count);
2165          } else {
2166             /* not garbage */
2167             count = MIN2(count, *buf);
2168          }
2169       } else {
2170          count = MIN2(count, max_draw_count);
2171       }
2172    }
2173 
2174    if (addr_dword && stride_dword) {
2175       uint64_t addr =
2176          ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
2177       uint32_t stride = dwords[stride_dword];
2178 
2179       for (unsigned i = 0; i < count; i++, addr += stride) {
2180          printf("%sdraw %d:\n", levels[level], i);
2181          dump_gpuaddr_size(addr, level, 0x10, 2);
2182       }
2183    }
2184 
2185    dump_register_summary(level);
2186 }
2187 
2188 static void
cp_draw_auto(uint32_t * dwords,uint32_t sizedwords,int level)2189 cp_draw_auto(uint32_t *dwords, uint32_t sizedwords, int level)
2190 {
2191    uint32_t prim_type = dwords[0] & 0x1f;
2192 
2193    do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
2194    print_mode(level);
2195 
2196    dump_register_summary(level);
2197 }
2198 
2199 static void
cp_run_cl(uint32_t * dwords,uint32_t sizedwords,int level)2200 cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
2201 {
2202    do_query("COMPUTE", 1);
2203    dump_register_summary(level);
2204 }
2205 
2206 static void
print_nop_tail_string(uint32_t * dwords,uint32_t sizedwords)2207 print_nop_tail_string(uint32_t *dwords, uint32_t sizedwords)
2208 {
2209    const char *buf = (void *)dwords;
2210    for (int i = 0; i < 4 * sizedwords; i++) {
2211       if (buf[i] == '\0')
2212          break;
2213       if (isascii(buf[i]))
2214          printf("%c", buf[i]);
2215    }
2216 }
2217 
2218 static void
cp_nop(uint32_t * dwords,uint32_t sizedwords,int level)2219 cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
2220 {
2221    if (quiet(3))
2222       return;
2223 
2224    /* NOP is used to encode special debug strings by Turnip.
2225     * See tu_cs_emit_debug_magic_strv(...)
2226     */
2227    static int scope_level = 0;
2228    uint32_t identifier = dwords[0];
2229    bool is_special = false;
2230    if (identifier == CP_NOP_MESG) {
2231       printf("### ");
2232       is_special = true;
2233    } else if (identifier == CP_NOP_BEGN) {
2234       printf(">>> #%d: ", ++scope_level);
2235       is_special = true;
2236    } else if (identifier == CP_NOP_END) {
2237       printf("<<< #%d: ", scope_level--);
2238       is_special = true;
2239    }
2240 
2241    if (is_special) {
2242       if (sizedwords > 1) {
2243          print_nop_tail_string(dwords + 1, sizedwords - 1);
2244          printf("\n");
2245       }
2246       return;
2247    }
2248 
2249    // blob doesn't use CP_NOP for string_marker but it does
2250    // use it for things that end up looking like, but aren't
2251    // ascii chars:
2252    if (!options->decode_markers)
2253       return;
2254 
2255    print_nop_tail_string(dwords, sizedwords);
2256    printf("\n");
2257 }
2258 
2259 uint32_t *
parse_cp_indirect(uint32_t * dwords,uint32_t sizedwords,uint64_t * ibaddr,uint32_t * ibsize)2260 parse_cp_indirect(uint32_t *dwords, uint32_t sizedwords,
2261                   uint64_t *ibaddr, uint32_t *ibsize)
2262 {
2263    if (is_64b()) {
2264       assert(sizedwords == 3);
2265 
2266       /* a5xx+.. high 32b of gpu addr, then size: */
2267       *ibaddr = dwords[0];
2268       *ibaddr |= ((uint64_t)dwords[1]) << 32;
2269       *ibsize = dwords[2];
2270 
2271       return dwords + 3;
2272    } else {
2273       assert(sizedwords == 2);
2274 
2275       *ibaddr = dwords[0];
2276       *ibsize = dwords[1];
2277 
2278       return dwords + 2;
2279    }
2280 }
2281 
2282 static void
cp_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2283 cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2284 {
2285    /* traverse indirect buffers */
2286    uint64_t ibaddr;
2287    uint32_t ibsize;
2288    uint32_t *ptr = NULL;
2289 
2290    dwords = parse_cp_indirect(dwords, sizedwords, &ibaddr, &ibsize);
2291 
2292    if (!quiet(3)) {
2293       if (is_64b()) {
2294          printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
2295       } else {
2296          printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
2297       }
2298       printf("%sibsize:%08x\n", levels[level], ibsize);
2299    }
2300 
2301    if (options->once && has_dumped(ibaddr, enable_mask))
2302       return;
2303 
2304    /* 'query-compare' mode implies 'once' mode, although we need only to
2305     * process the cmdstream for *any* enable_mask mode, since we are
2306     * comparing binning vs draw reg values at the same time, ie. it is
2307     * not useful to process the same draw in both binning and draw pass.
2308     */
2309    if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
2310       return;
2311 
2312    /* map gpuaddr back to hostptr: */
2313    ptr = hostptr(ibaddr);
2314 
2315    if (ptr) {
2316       /* If the GPU hung within the target IB, the trigger point will be
2317        * just after the current CP_INDIRECT_BUFFER.  Because the IB is
2318        * executed but never returns.  Account for this by checking if
2319        * the IB returned:
2320        */
2321       highlight_gpuaddr(gpuaddr(dwords));
2322 
2323       ib++;
2324       ibs[ib].base = ibaddr;
2325       ibs[ib].size = ibsize;
2326 
2327       dump_commands(ptr, ibsize, level);
2328       ib--;
2329    } else {
2330       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2331    }
2332 }
2333 
2334 static void
cp_start_bin(uint32_t * dwords,uint32_t sizedwords,int level)2335 cp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
2336 {
2337    uint64_t ibaddr;
2338    uint32_t ibsize;
2339    uint32_t loopcount;
2340    uint32_t *ptr = NULL;
2341 
2342    loopcount = dwords[0];
2343    ibaddr = dwords[1];
2344    ibaddr |= ((uint64_t)dwords[2]) << 32;
2345    ibsize = dwords[3];
2346 
2347    /* map gpuaddr back to hostptr: */
2348    ptr = hostptr(ibaddr);
2349 
2350    if (ptr) {
2351       /* If the GPU hung within the target IB, the trigger point will be
2352        * just after the current CP_START_BIN.  Because the IB is
2353        * executed but never returns.  Account for this by checking if
2354        * the IB returned:
2355        */
2356       highlight_gpuaddr(gpuaddr(&dwords[5]));
2357 
2358       /* TODO: we should duplicate the body of the loop after each bin, so
2359        * that draws get the correct state. We should also figure out if there
2360        * are any registers that can tell us what bin we're in when we hang so
2361        * that crashdec points to the right place.
2362        */
2363       ib++;
2364       for (uint32_t i = 0; i < loopcount; i++) {
2365          ibs[ib].base = ibaddr;
2366          ibs[ib].size = ibsize;
2367          printl(3, "%sbin %u\n", levels[level], i);
2368          dump_commands(ptr, ibsize, level);
2369          ibaddr += ibsize;
2370          ptr += ibsize;
2371       }
2372       ib--;
2373    } else {
2374       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2375    }
2376 }
2377 
2378 static void
cp_fixed_stride_draw_table(uint32_t * dwords,uint32_t sizedwords,int level)2379 cp_fixed_stride_draw_table(uint32_t *dwords, uint32_t sizedwords, int level)
2380 {
2381    uint64_t ibaddr;
2382    uint32_t ibsize;
2383    uint32_t loopcount;
2384    uint32_t *ptr = NULL;
2385 
2386    loopcount = dwords[3];
2387    ibaddr = dwords[0];
2388    ibaddr |= ((uint64_t)dwords[1]) << 32;
2389    ibsize = dwords[2] >> 20;
2390 
2391    /* map gpuaddr back to hostptr: */
2392    ptr = hostptr(ibaddr);
2393 
2394    if (ptr) {
2395       /* If the GPU hung within the target IB, the trigger point will be
2396        * just after the current CP_START_BIN.  Because the IB is
2397        * executed but never returns.  Account for this by checking if
2398        * the IB returned:
2399        */
2400       highlight_gpuaddr(gpuaddr(&dwords[5]));
2401 
2402       ib++;
2403       for (uint32_t i = 0; i < loopcount; i++) {
2404          ibs[ib].base = ibaddr;
2405          ibs[ib].size = ibsize;
2406          printl(3, "%sdraw %u\n", levels[level], i);
2407          dump_commands(ptr, ibsize, level);
2408          ibaddr += ibsize;
2409          ptr += ibsize;
2410       }
2411       ib--;
2412    } else {
2413       fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
2414    }
2415 }
2416 
2417 static void
cp_wfi(uint32_t * dwords,uint32_t sizedwords,int level)2418 cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
2419 {
2420    needs_wfi = false;
2421 }
2422 
2423 static void
cp_mem_write(uint32_t * dwords,uint32_t sizedwords,int level)2424 cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
2425 {
2426    if (quiet(2))
2427       return;
2428 
2429    if (is_64b()) {
2430       uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
2431       printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2432       dump_hex(&dwords[2], sizedwords - 2, level + 1);
2433 
2434       if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
2435          dump_commands(&dwords[2], sizedwords - 2, level + 1);
2436    } else {
2437       uint32_t gpuaddr = dwords[0];
2438       printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
2439       dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
2440    }
2441 }
2442 
2443 static void
cp_rmw(uint32_t * dwords,uint32_t sizedwords,int level)2444 cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
2445 {
2446    uint32_t val = dwords[0] & 0xffff;
2447    uint32_t and = dwords[1];
2448    uint32_t or = dwords[2];
2449    printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
2450           and, or);
2451    if (needs_wfi)
2452       printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
2453              and, or);
2454    reg_set(val, (reg_val(val) & and) | or);
2455 }
2456 
2457 static void
cp_reg_mem(uint32_t * dwords,uint32_t sizedwords,int level)2458 cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
2459 {
2460    uint32_t val = dwords[0] & 0xffff;
2461    printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
2462 
2463    if (quiet(2))
2464       return;
2465 
2466    uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
2467    printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
2468    void *ptr = hostptr(gpuaddr);
2469    if (ptr) {
2470       uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
2471       dump_hex(ptr, cnt, level + 1);
2472    }
2473 }
2474 
2475 struct draw_state {
2476    uint16_t enable_mask;
2477    uint16_t flags;
2478    uint32_t count;
2479    uint64_t addr;
2480 };
2481 
2482 struct draw_state state[32];
2483 
2484 #define FLAG_DIRTY              0x1
2485 #define FLAG_DISABLE            0x2
2486 #define FLAG_DISABLE_ALL_GROUPS 0x4
2487 #define FLAG_LOAD_IMMED         0x8
2488 
2489 static int draw_mode;
2490 
2491 static void
disable_group(unsigned group_id)2492 disable_group(unsigned group_id)
2493 {
2494    struct draw_state *ds = &state[group_id];
2495    memset(ds, 0, sizeof(*ds));
2496 }
2497 
2498 static void
disable_all_groups(void)2499 disable_all_groups(void)
2500 {
2501    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2502       disable_group(i);
2503 }
2504 
2505 static void
load_group(unsigned group_id,int level)2506 load_group(unsigned group_id, int level)
2507 {
2508    struct draw_state *ds = &state[group_id];
2509 
2510    if (!ds->count)
2511       return;
2512 
2513    printl(2, "%sgroup_id: %u\n", levels[level], group_id);
2514    printl(2, "%scount: %d\n", levels[level], ds->count);
2515    printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
2516    printl(2, "%sflags: %x\n", levels[level], ds->flags);
2517 
2518    if (options->info->chip >= 6) {
2519       printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
2520 
2521       if (!(ds->enable_mask & enable_mask)) {
2522          printl(2, "%s\tskipped!\n\n", levels[level]);
2523          return;
2524       }
2525    }
2526 
2527    void *ptr = hostptr(ds->addr);
2528    if (ptr) {
2529       if (!quiet(2))
2530          dump_hex(ptr, ds->count, level + 1);
2531 
2532       ib++;
2533       dump_commands(ptr, ds->count, level + 1);
2534       ib--;
2535    }
2536 }
2537 
2538 static void
load_all_groups(int level)2539 load_all_groups(int level)
2540 {
2541    /* sanity check, we should never recursively hit recursion here, and if
2542     * we do bad things happen:
2543     */
2544    static bool loading_groups = false;
2545    if (loading_groups) {
2546       printf("ERROR: nothing in draw state should trigger recursively loading "
2547              "groups!\n");
2548       return;
2549    }
2550    loading_groups = true;
2551    for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
2552       load_group(i, level);
2553    loading_groups = false;
2554 
2555    /* in 'query-compare' mode, defer disabling all groups until we have a
2556     * chance to process the query:
2557     */
2558    if (!options->query_compare)
2559       disable_all_groups();
2560 }
2561 
2562 static void
cp_set_draw_state(uint32_t * dwords,uint32_t sizedwords,int level)2563 cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
2564 {
2565    uint32_t i;
2566 
2567    for (i = 0; i < sizedwords;) {
2568       struct draw_state *ds;
2569       uint32_t count = dwords[i] & 0xffff;
2570       uint32_t group_id = (dwords[i] >> 24) & 0x1f;
2571       uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
2572       uint32_t flags = (dwords[i] >> 16) & 0xf;
2573       uint64_t addr;
2574 
2575       if (is_64b()) {
2576          addr = dwords[i + 1];
2577          addr |= ((uint64_t)dwords[i + 2]) << 32;
2578          i += 3;
2579       } else {
2580          addr = dwords[i + 1];
2581          i += 2;
2582       }
2583 
2584       if (flags & FLAG_DISABLE_ALL_GROUPS) {
2585          disable_all_groups();
2586          continue;
2587       }
2588 
2589       if (flags & FLAG_DISABLE) {
2590          disable_group(group_id);
2591          continue;
2592       }
2593 
2594       assert(group_id < ARRAY_SIZE(state));
2595       disable_group(group_id);
2596 
2597       ds = &state[group_id];
2598 
2599       ds->enable_mask = enable_mask;
2600       ds->flags = flags;
2601       ds->count = count;
2602       ds->addr = addr;
2603 
2604       if (flags & FLAG_LOAD_IMMED) {
2605          load_group(group_id, level);
2606          disable_group(group_id);
2607       }
2608    }
2609 }
2610 
2611 static void
cp_set_mode(uint32_t * dwords,uint32_t sizedwords,int level)2612 cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2613 {
2614    draw_mode = dwords[0];
2615 }
2616 
2617 /* execute compute shader */
2618 static void
cp_exec_cs(uint32_t * dwords,uint32_t sizedwords,int level)2619 cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
2620 {
2621    do_query("compute", 0);
2622    dump_register_summary(level);
2623 }
2624 
2625 static void
cp_exec_cs_indirect(uint32_t * dwords,uint32_t sizedwords,int level)2626 cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
2627 {
2628    uint64_t addr;
2629 
2630    if (is_64b()) {
2631       addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
2632    } else {
2633       addr = dwords[1];
2634    }
2635 
2636    printl(3, "%saddr: %016llx\n", levels[level], addr);
2637    dump_gpuaddr_size(addr, level, 0x10, 2);
2638 
2639    do_query("compute", 0);
2640    dump_register_summary(level);
2641 }
2642 
2643 static void
cp_set_marker(uint32_t * dwords,uint32_t sizedwords,int level)2644 cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
2645 {
2646    uint32_t val = dwords[0] & 0xf;
2647    const char *mode = rnn_enumname(rnn, "a6xx_marker", val);
2648 
2649    if (!mode) {
2650       static char buf[8];
2651       sprintf(buf, "0x%x", val);
2652       render_mode = buf;
2653       return;
2654    }
2655 
2656    render_mode = mode;
2657 
2658    if (!strcmp(render_mode, "RM6_BINNING")) {
2659       enable_mask = MODE_BINNING;
2660    } else if (!strcmp(render_mode, "RM6_GMEM")) {
2661       enable_mask = MODE_GMEM;
2662    } else if (!strcmp(render_mode, "RM6_BYPASS")) {
2663       enable_mask = MODE_BYPASS;
2664    }
2665 }
2666 
2667 static void
cp_set_thread_control(uint32_t * dwords,uint32_t sizedwords,int level)2668 cp_set_thread_control(uint32_t *dwords, uint32_t sizedwords, int level)
2669 {
2670    uint32_t val = dwords[0] & 0x3;
2671    thread = rnn_enumname(rnn, "cp_thread", val);
2672 }
2673 
2674 static void
cp_set_render_mode(uint32_t * dwords,uint32_t sizedwords,int level)2675 cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
2676 {
2677    uint64_t addr;
2678    uint32_t *ptr, len;
2679 
2680    assert(is_64b());
2681 
2682    /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
2683     * not sure if this can come in different sizes.
2684     *
2685     * First ptr doesn't seem to be cmdstream, second one does.
2686     *
2687     * Comment from downstream kernel:
2688     *
2689     * SRM -- set render mode (ex binning, direct render etc)
2690     * SRM is set by UMD usually at start of IB to tell CP the type of
2691     * preemption.
2692     * KMD needs to set SRM to NULL to indicate CP that rendering is
2693     * done by IB.
2694     * ------------------------------------------------------------------
2695     *
2696     * Seems to always be one of these two:
2697     * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
2698     * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
2699     * 001c2000 00000000
2700     *
2701     */
2702 
2703    assert(options->info->chip >= 5);
2704 
2705    render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
2706 
2707    if (sizedwords == 1)
2708       return;
2709 
2710    addr = dwords[1];
2711    addr |= ((uint64_t)dwords[2]) << 32;
2712 
2713    mode = dwords[3];
2714 
2715    dump_gpuaddr(addr, level + 1);
2716 
2717    if (sizedwords == 5)
2718       return;
2719 
2720    assert(sizedwords == 8);
2721 
2722    len = dwords[5];
2723    addr = dwords[6];
2724    addr |= ((uint64_t)dwords[7]) << 32;
2725 
2726    printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
2727    printl(3, "%slen:  0x%x\n", levels[level], len);
2728 
2729    ptr = hostptr(addr);
2730 
2731    if (ptr) {
2732       if (!quiet(2)) {
2733          ib++;
2734          dump_commands(ptr, len, level + 1);
2735          ib--;
2736          dump_hex(ptr, len, level + 1);
2737       }
2738    }
2739 }
2740 
2741 static void
cp_compute_checkpoint(uint32_t * dwords,uint32_t sizedwords,int level)2742 cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
2743 {
2744    uint64_t addr;
2745    uint32_t *ptr, len;
2746 
2747    assert(is_64b());
2748    assert(options->info->chip >= 5);
2749 
2750    if (sizedwords == 8) {
2751       addr = dwords[5];
2752       addr |= ((uint64_t)dwords[6]) << 32;
2753       len = dwords[7];
2754    } else {
2755       addr = dwords[5];
2756       addr |= ((uint64_t)dwords[6]) << 32;
2757       len = dwords[4];
2758    }
2759 
2760    printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
2761    printl(3, "%slen:  0x%x\n", levels[level], len);
2762 
2763    ptr = hostptr(addr);
2764 
2765    if (ptr) {
2766       if (!quiet(2)) {
2767          ib++;
2768          dump_commands(ptr, len, level + 1);
2769          ib--;
2770          dump_hex(ptr, len, level + 1);
2771       }
2772    }
2773 }
2774 
2775 static void
cp_blit(uint32_t * dwords,uint32_t sizedwords,int level)2776 cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
2777 {
2778    do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
2779    print_mode(level);
2780    dump_register_summary(level);
2781 }
2782 
2783 static void
cp_context_reg_bunch(uint32_t * dwords,uint32_t sizedwords,int level)2784 cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
2785 {
2786    int i;
2787 
2788    /* NOTE: seems to write same reg multiple times.. not sure if different parts
2789     * of these are triggered by the FLUSH_SO_n events?? (if that is what they
2790     * actually are?)
2791     */
2792    bool saved_summary = summary;
2793    summary = false;
2794 
2795    struct regacc r = regacc(NULL);
2796 
2797    for (i = 0; i < sizedwords; i += 2) {
2798       if (regacc_push(&r, dwords[i + 0], dwords[i + 1]))
2799          dump_register(&r, level + 1);
2800       reg_set(dwords[i + 0], dwords[i + 1]);
2801    }
2802 
2803    summary = saved_summary;
2804 }
2805 
2806 /* Looks similar to CP_CONTEXT_REG_BUNCH, but not quite the same...
2807  * discarding first two dwords??
2808  *
2809  *   CP_CONTEXT_REG_BUNCH:
2810  *        0221: 9c1ff606  (rep)(xmov3)mov $usraddr, $data
2811  *        ; mov $data, $data
2812  *        ; mov $usraddr, $data
2813  *        ; mov $data, $data
2814  *        0222: d8000000  waitin
2815  *        0223: 981f0806  mov $01, $data
2816  *
2817  *   CP_UNK5D:
2818  *        0224: 981f0006  mov $00, $data
2819  *        0225: 981f0006  mov $00, $data
2820  *        0226: 9c1ff206  (rep)(xmov1)mov $usraddr, $data
2821  *        ; mov $data, $data
2822  *        0227: d8000000  waitin
2823  *        0228: 981f0806  mov $01, $data
2824  *
2825  */
2826 static void
cp_context_reg_bunch2(uint32_t * dwords,uint32_t sizedwords,int level)2827 cp_context_reg_bunch2(uint32_t *dwords, uint32_t sizedwords, int level)
2828 {
2829    dwords += 2;
2830    sizedwords -= 2;
2831    cp_context_reg_bunch(dwords, sizedwords, level);
2832 }
2833 
2834 static void
cp_reg_write(uint32_t * dwords,uint32_t sizedwords,int level)2835 cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
2836 {
2837    uint32_t reg = dwords[1] & 0xffff;
2838 
2839    struct regacc r = regacc(NULL);
2840    if (regacc_push(&r, reg, dwords[2]))
2841       dump_register(&r, level + 1);
2842    reg_set(reg, dwords[2]);
2843 }
2844 
2845 static void
cp_set_ctxswitch_ib(uint32_t * dwords,uint32_t sizedwords,int level)2846 cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
2847 {
2848    uint64_t addr;
2849    uint32_t size = dwords[2] & 0xffff;
2850    void *ptr;
2851 
2852    addr = dwords[0] | ((uint64_t)dwords[1] << 32);
2853 
2854    if (!quiet(3)) {
2855       printf("%saddr=%" PRIx64 "\n", levels[level], addr);
2856    }
2857 
2858    ptr = hostptr(addr);
2859    if (ptr) {
2860       dump_commands(ptr, size, level + 1);
2861    }
2862 }
2863 
2864 static void
cp_skip_ib2_enable_global(uint32_t * dwords,uint32_t sizedwords,int level)2865 cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
2866 {
2867    skip_ib2_enable_global = dwords[0];
2868 }
2869 
2870 static void
cp_skip_ib2_enable_local(uint32_t * dwords,uint32_t sizedwords,int level)2871 cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
2872 {
2873    skip_ib2_enable_local = dwords[0];
2874 }
2875 
2876 #define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
2877 static const struct type3_op {
2878    const char *name;
2879    void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
2880    struct {
2881       bool load_all_groups;
2882    } options;
2883 } type3_op[] = {
2884    CP(NOP, cp_nop),
2885    CP(INDIRECT_BUFFER, cp_indirect),
2886    CP(INDIRECT_BUFFER_PFD, cp_indirect),
2887    CP(WAIT_FOR_IDLE, cp_wfi),
2888    CP(REG_RMW, cp_rmw),
2889    CP(REG_TO_MEM, cp_reg_mem),
2890    CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
2891    CP(MEM_WRITE, cp_mem_write),
2892    CP(EVENT_WRITE, cp_event_write),
2893    CP(RUN_OPENCL, cp_run_cl),
2894    CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
2895    CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
2896    CP(SET_CONSTANT, cp_set_const),
2897    CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
2898    CP(WIDE_REG_WRITE, cp_wide_reg_write),
2899 
2900    /* for a3xx */
2901    CP(LOAD_STATE, cp_load_state),
2902    CP(SET_BIN, cp_set_bin),
2903 
2904    /* for a4xx */
2905    CP(LOAD_STATE4, cp_load_state),
2906    CP(SET_DRAW_STATE, cp_set_draw_state),
2907    CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
2908    CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
2909    CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
2910 
2911    /* for a5xx */
2912    CP(SET_RENDER_MODE, cp_set_render_mode),
2913    CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
2914    CP(BLIT, cp_blit),
2915    CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
2916    CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
2917    CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
2918    CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
2919    CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
2920    CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
2921 
2922    /* for a6xx */
2923    CP(LOAD_STATE6_GEOM, cp_load_state),
2924    CP(LOAD_STATE6_FRAG, cp_load_state),
2925    CP(LOAD_STATE6, cp_load_state),
2926    CP(SET_MODE, cp_set_mode),
2927    CP(SET_MARKER, cp_set_marker),
2928    CP(REG_WRITE, cp_reg_write),
2929    CP(DRAW_AUTO, cp_draw_auto, {.load_all_groups = true}),
2930 
2931    CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
2932 
2933    CP(START_BIN, cp_start_bin),
2934 
2935    CP(FIXED_STRIDE_DRAW_TABLE, cp_fixed_stride_draw_table),
2936 
2937    /* for a7xx */
2938    CP(THREAD_CONTROL, cp_set_thread_control),
2939    CP(CONTEXT_REG_BUNCH2, cp_context_reg_bunch2),
2940    CP(EVENT_WRITE7, cp_event_write),
2941 };
2942 
2943 static void
noop_fxn(uint32_t * dwords,uint32_t sizedwords,int level)2944 noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
2945 {
2946 }
2947 
2948 static const struct type3_op *
get_type3_op(unsigned opc)2949 get_type3_op(unsigned opc)
2950 {
2951    static const struct type3_op dummy_op = {
2952       .fxn = noop_fxn,
2953    };
2954    const char *name = pktname(opc);
2955 
2956    if (!name)
2957       return &dummy_op;
2958 
2959    for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
2960       if (!strcmp(name, type3_op[i].name))
2961          return &type3_op[i];
2962 
2963    return &dummy_op;
2964 }
2965 
2966 void
dump_commands(uint32_t * dwords,uint32_t sizedwords,int level)2967 dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
2968 {
2969    int dwords_left = sizedwords;
2970    uint32_t count = 0; /* dword count including packet header */
2971    uint32_t val;
2972 
2973    //	assert(dwords);
2974    if (!dwords) {
2975       printf("NULL cmd buffer!\n");
2976       return;
2977    }
2978 
2979    assert(ib < ARRAY_SIZE(draws));
2980    draws[ib] = 0;
2981 
2982    while (dwords_left > 0) {
2983 
2984       current_draw_count = draw_count;
2985 
2986       /* hack, this looks like a -1 underflow, in some versions
2987        * when it tries to write zero registers via pkt0
2988        */
2989       //		if ((dwords[0] >> 16) == 0xffff)
2990       //			goto skip;
2991 
2992       if (pkt_is_regwrite(dwords[0], &val, &count)) {
2993          assert(val < regcnt());
2994          printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
2995                 val);
2996          dump_registers(val, dwords + 1, count - 1, level + 2);
2997          if (!quiet(3))
2998             dump_hex(dwords, count, level + 1);
2999 #if 0
3000       } else if (pkt_is_type1(dwords[0])) {
3001          count = 3;
3002          val = dwords[0] & 0xfff;
3003          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3004          dump_registers(val, dwords+1, 1, level+2);
3005          val = (dwords[0] >> 12) & 0xfff;
3006          printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
3007          dump_registers(val, dwords+2, 1, level+2);
3008          if (!quiet(3))
3009             dump_hex(dwords, count, level+1);
3010 #endif
3011       } else if (pkt_is_opcode(dwords[0], &val, &count)) {
3012          const struct type3_op *op = get_type3_op(val);
3013          if (op->options.load_all_groups)
3014             load_all_groups(level + 1);
3015          const char *name = pktname(val);
3016          if (!quiet(2)) {
3017             printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
3018                    rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
3019                    count);
3020          }
3021          if (name) {
3022             /* special hack for two packets that decode the same way
3023              * on a6xx:
3024              */
3025             if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
3026                 !strcmp(name, "CP_LOAD_STATE6_GEOM"))
3027                name = "CP_LOAD_STATE6";
3028             dump_domain(dwords + 1, count - 1, level + 2, name);
3029          }
3030          op->fxn(dwords + 1, count - 1, level + 1);
3031          if (!quiet(2))
3032             dump_hex(dwords, count, level + 1);
3033       } else if (pkt_is_type2(dwords[0])) {
3034          printl(3, "%snop\n", levels[level + 1]);
3035          count = 1;
3036       } else {
3037          printf("bad type! %08x\n", dwords[0]);
3038          /* for 5xx+ we can do a passable job of looking for start of next valid
3039           * packet: */
3040          if (options->info->chip >= 5) {
3041             count = find_next_packet(dwords, dwords_left);
3042          } else {
3043             return;
3044          }
3045       }
3046 
3047       dwords += count;
3048       dwords_left -= count;
3049    }
3050 
3051    if (dwords_left < 0)
3052       printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
3053 }
3054