xref: /aosp_15_r20/external/mesa3d/src/asahi/lib/decode.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2017-2019 Alyssa Rosenzweig
3  * Copyright 2017-2019 Connor Abbott
4  * Copyright 2019 Collabora, Ltd.
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include <ctype.h>
9 #include <memory.h>
10 #include <stdarg.h>
11 #include <stdbool.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include "util/u_dynarray.h"
15 #include "util/u_math.h"
16 #include <sys/mman.h>
17 #include <agx_pack.h>
18 
19 #include "util/u_hexdump.h"
20 #include "decode.h"
21 #include "unstable_asahi_drm.h"
22 #ifdef __APPLE__
23 #include "agx_iokit.h"
24 #endif
25 
26 struct libagxdecode_config lib_config;
27 
28 static void
agx_disassemble(void * _code,size_t maxlen,FILE * fp)29 agx_disassemble(void *_code, size_t maxlen, FILE *fp)
30 {
31    /* stub */
32 }
33 
34 FILE *agxdecode_dump_stream;
35 
36 #define MAX_MAPPINGS 4096
37 
38 struct agxdecode_ctx {
39    struct util_dynarray mmap_array;
40    uint64_t shader_base;
41 };
42 
43 static uint64_t
decode_usc(struct agxdecode_ctx * ctx,uint64_t addr)44 decode_usc(struct agxdecode_ctx *ctx, uint64_t addr)
45 {
46    return ctx->shader_base + addr;
47 }
48 
49 struct agxdecode_ctx *
agxdecode_new_context(uint64_t shader_base)50 agxdecode_new_context(uint64_t shader_base)
51 {
52    struct agxdecode_ctx *ctx = calloc(1, sizeof(struct agxdecode_ctx));
53    ctx->shader_base = shader_base;
54    return ctx;
55 }
56 
57 void
agxdecode_destroy_context(struct agxdecode_ctx * ctx)58 agxdecode_destroy_context(struct agxdecode_ctx *ctx)
59 {
60    free(ctx);
61 }
62 
63 static struct agx_bo *
agxdecode_find_mapped_gpu_mem_containing(struct agxdecode_ctx * ctx,uint64_t addr)64 agxdecode_find_mapped_gpu_mem_containing(struct agxdecode_ctx *ctx,
65                                          uint64_t addr)
66 {
67    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
68       if (it->va && addr >= it->va->addr && (addr - it->va->addr) < it->size)
69          return it;
70    }
71 
72    return NULL;
73 }
74 
75 static struct agx_bo *
agxdecode_find_handle(struct agxdecode_ctx * ctx,unsigned handle,unsigned type)76 agxdecode_find_handle(struct agxdecode_ctx *ctx, unsigned handle, unsigned type)
77 {
78    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
79       if (it->handle == handle)
80          return it;
81    }
82 
83    return NULL;
84 }
85 
86 static size_t
__agxdecode_fetch_gpu_mem(struct agxdecode_ctx * ctx,const struct agx_bo * mem,uint64_t gpu_va,size_t size,void * buf,int line,const char * filename)87 __agxdecode_fetch_gpu_mem(struct agxdecode_ctx *ctx, const struct agx_bo *mem,
88                           uint64_t gpu_va, size_t size, void *buf, int line,
89                           const char *filename)
90 {
91    if (lib_config.read_gpu_mem)
92       return lib_config.read_gpu_mem(gpu_va, size, buf);
93 
94    if (!mem)
95       mem = agxdecode_find_mapped_gpu_mem_containing(ctx, gpu_va);
96 
97    if (!mem) {
98       fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", gpu_va,
99               filename, line);
100       fflush(agxdecode_dump_stream);
101       assert(0);
102    }
103 
104    assert(mem);
105 
106    if (size + (gpu_va - mem->va->addr) > mem->size) {
107       fprintf(stderr,
108               "Overflowing to unknown memory %" PRIx64
109               " of size %zu (max size %zu) in %s:%d\n",
110               gpu_va, size, (size_t)(mem->size - (gpu_va - mem->va->addr)),
111               filename, line);
112       fflush(agxdecode_dump_stream);
113       assert(0);
114    }
115 
116    memcpy(buf, mem->map + gpu_va - mem->va->addr, size);
117 
118    return size;
119 }
120 
121 #define agxdecode_fetch_gpu_mem(ctx, gpu_va, size, buf)                        \
122    __agxdecode_fetch_gpu_mem(ctx, NULL, gpu_va, size, buf, __LINE__, __FILE__)
123 
124 #define agxdecode_fetch_gpu_array(ctx, gpu_va, buf)                            \
125    agxdecode_fetch_gpu_mem(ctx, gpu_va, sizeof(buf), buf)
126 
127 /* Helpers for parsing the cmdstream */
128 
129 #define DUMP_UNPACKED(T, var, str)                                             \
130    {                                                                           \
131       agxdecode_log(str);                                                      \
132       agx_print(agxdecode_dump_stream, T, var, 2);                             \
133    }
134 
135 #define DUMP_CL(T, cl, str)                                                    \
136    {                                                                           \
137       agx_unpack(agxdecode_dump_stream, cl, T, temp);                          \
138       DUMP_UNPACKED(T, temp, str "\n");                                        \
139    }
140 
141 #define DUMP_FIELD(struct, fmt, field)                                         \
142    {                                                                           \
143       fprintf(agxdecode_dump_stream, #field " = " fmt "\n", struct->field);    \
144    }
145 
146 #define agxdecode_log(str) fputs(str, agxdecode_dump_stream)
147 #define agxdecode_msg(str) fprintf(agxdecode_dump_stream, "// %s", str)
148 
149 typedef struct drm_asahi_params_global decoder_params;
150 
151 /* Abstraction for command stream parsing */
152 typedef unsigned (*decode_cmd)(struct agxdecode_ctx *ctx, const uint8_t *map,
153                                uint64_t *link, bool verbose,
154                                decoder_params *params, void *data);
155 
156 #define STATE_DONE (0xFFFFFFFFu)
157 #define STATE_LINK (0xFFFFFFFEu)
158 #define STATE_CALL (0xFFFFFFFDu)
159 #define STATE_RET  (0xFFFFFFFCu)
160 
161 static void
agxdecode_stateful(struct agxdecode_ctx * ctx,uint64_t va,const char * label,decode_cmd decoder,bool verbose,decoder_params * params,void * data)162 agxdecode_stateful(struct agxdecode_ctx *ctx, uint64_t va, const char *label,
163                    decode_cmd decoder, bool verbose, decoder_params *params,
164                    void *data)
165 {
166    uint64_t stack[16];
167    unsigned sp = 0;
168 
169    uint8_t buf[1024];
170    size_t size = sizeof(buf);
171    if (!lib_config.read_gpu_mem) {
172       struct agx_bo *alloc = agxdecode_find_mapped_gpu_mem_containing(ctx, va);
173       assert(alloc != NULL && "nonexistent object");
174       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ", handle %u)\n", label, va,
175               alloc->handle);
176       size = MIN2(size, alloc->size - (va - alloc->va->addr));
177    } else {
178       fprintf(agxdecode_dump_stream, "%s (%" PRIx64 ")\n", label, va);
179    }
180    fflush(agxdecode_dump_stream);
181 
182    int len = agxdecode_fetch_gpu_mem(ctx, va, size, buf);
183 
184    int left = len;
185    uint8_t *map = buf;
186    uint64_t link = 0;
187 
188    fflush(agxdecode_dump_stream);
189 
190    while (left) {
191       if (len <= 0) {
192          fprintf(agxdecode_dump_stream, "!! Failed to read GPU memory\n");
193          fflush(agxdecode_dump_stream);
194          return;
195       }
196 
197       unsigned count = decoder(ctx, map, &link, verbose, params, data);
198 
199       /* If we fail to decode, default to a hexdump (don't hang) */
200       if (count == 0) {
201          u_hexdump(agxdecode_dump_stream, map, 8, false);
202          count = 8;
203       }
204 
205       fflush(agxdecode_dump_stream);
206       if (count == STATE_DONE) {
207          break;
208       } else if (count == STATE_LINK) {
209          fprintf(agxdecode_dump_stream, "Linking to 0x%" PRIx64 "\n\n", link);
210          va = link;
211          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
212          map = buf;
213       } else if (count == STATE_CALL) {
214          fprintf(agxdecode_dump_stream,
215                  "Calling 0x%" PRIx64 " (return = 0x%" PRIx64 ")\n\n", link,
216                  va + 8);
217          assert(sp < ARRAY_SIZE(stack));
218          stack[sp++] = va + 8;
219          va = link;
220          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
221          map = buf;
222       } else if (count == STATE_RET) {
223          assert(sp > 0);
224          va = stack[--sp];
225          fprintf(agxdecode_dump_stream, "Returning to 0x%" PRIx64 "\n\n", va);
226          left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
227          map = buf;
228       } else {
229          va += count;
230          map += count;
231          left -= count;
232 
233          if (left < 512 && len == sizeof(buf)) {
234             left = len = agxdecode_fetch_gpu_array(ctx, va, buf);
235             map = buf;
236          }
237       }
238    }
239 }
240 
241 static void
agxdecode_texture_pbe(struct agxdecode_ctx * ctx,const void * map)242 agxdecode_texture_pbe(struct agxdecode_ctx *ctx, const void *map)
243 {
244    struct AGX_TEXTURE tex;
245    struct AGX_PBE pbe;
246 
247    bool valid_texture = AGX_TEXTURE_unpack(NULL, map, &tex);
248    bool valid_pbe = AGX_PBE_unpack(NULL, map, &pbe);
249 
250    /* Try to guess if it's texture or PBE */
251    valid_texture &=
252       tex.swizzle_r <= AGX_CHANNEL_0 && tex.swizzle_g <= AGX_CHANNEL_0 &&
253       tex.swizzle_b <= AGX_CHANNEL_0 && tex.swizzle_a <= AGX_CHANNEL_0;
254 
255    if (valid_texture && !valid_pbe) {
256       DUMP_CL(TEXTURE, map, "Texture");
257    } else if (valid_pbe && !valid_texture) {
258       DUMP_CL(PBE, map, "PBE");
259    } else {
260       if (!valid_texture) {
261          assert(!valid_pbe);
262          fprintf(agxdecode_dump_stream, "XXX: invalid texture/PBE\n");
263       }
264 
265       DUMP_CL(TEXTURE, map, "Texture");
266       DUMP_CL(PBE, map, "PBE");
267    }
268 }
269 
270 static unsigned
agxdecode_usc(struct agxdecode_ctx * ctx,const uint8_t * map,UNUSED uint64_t * link,UNUSED bool verbose,decoder_params * params,UNUSED void * data)271 agxdecode_usc(struct agxdecode_ctx *ctx, const uint8_t *map,
272               UNUSED uint64_t *link, UNUSED bool verbose,
273               decoder_params *params, UNUSED void *data)
274 {
275    enum agx_sampler_states *sampler_states = data;
276    enum agx_usc_control type = map[0];
277    uint8_t buf[8192];
278 
279    bool extended_samplers =
280       (sampler_states != NULL) &&
281       (((*sampler_states) == AGX_SAMPLER_STATES_8_EXTENDED) ||
282        ((*sampler_states) == AGX_SAMPLER_STATES_16_EXTENDED));
283 
284 #define USC_CASE(name, human)                                                  \
285    case AGX_USC_CONTROL_##name: {                                              \
286       DUMP_CL(USC_##name, map, human);                                         \
287       return AGX_USC_##name##_LENGTH;                                          \
288    }
289 
290    switch (type) {
291    case AGX_USC_CONTROL_NO_PRESHADER: {
292       DUMP_CL(USC_NO_PRESHADER, map, "No preshader");
293       return STATE_DONE;
294    }
295 
296    case AGX_USC_CONTROL_PRESHADER: {
297       agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl);
298       DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n");
299 
300       agx_disassemble(
301          buf, agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, ctrl.code), buf),
302          agxdecode_dump_stream);
303 
304       return STATE_DONE;
305    }
306 
307    case AGX_USC_CONTROL_SHADER: {
308       agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl);
309       DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n");
310 
311       agxdecode_log("\n");
312       agx_disassemble(
313          buf, agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, ctrl.code), buf),
314          agxdecode_dump_stream);
315       agxdecode_log("\n");
316 
317       return AGX_USC_SHADER_LENGTH;
318    }
319 
320    case AGX_USC_CONTROL_SAMPLER: {
321       agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp);
322       DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n");
323 
324       size_t stride =
325          AGX_SAMPLER_LENGTH + (extended_samplers ? AGX_BORDER_LENGTH : 0);
326       uint8_t *samp = alloca(stride * temp.count);
327 
328       agxdecode_fetch_gpu_mem(ctx, temp.buffer, stride * temp.count, samp);
329 
330       for (unsigned i = 0; i < temp.count; ++i) {
331          DUMP_CL(SAMPLER, samp, "Sampler");
332          samp += AGX_SAMPLER_LENGTH;
333 
334          if (extended_samplers) {
335             DUMP_CL(BORDER, samp, "Border");
336             samp += AGX_BORDER_LENGTH;
337          }
338       }
339 
340       return AGX_USC_SAMPLER_LENGTH;
341    }
342 
343    case AGX_USC_CONTROL_TEXTURE: {
344       agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp);
345       DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n");
346 
347       uint8_t buf[AGX_TEXTURE_LENGTH * temp.count];
348       uint8_t *tex = buf;
349 
350       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
351 
352       /* Note: samplers only need 8 byte alignment? */
353       for (unsigned i = 0; i < temp.count; ++i) {
354          fprintf(agxdecode_dump_stream, "ts%u: \n", temp.start + i);
355          agxdecode_texture_pbe(ctx, tex);
356 
357          tex += AGX_TEXTURE_LENGTH;
358       }
359 
360       return AGX_USC_TEXTURE_LENGTH;
361    }
362 
363    case AGX_USC_CONTROL_UNIFORM: {
364       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM, temp);
365       DUMP_UNPACKED(USC_UNIFORM, temp, "Uniform\n");
366 
367       uint8_t buf[2 * temp.size_halfs];
368       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
369       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
370 
371       return AGX_USC_UNIFORM_LENGTH;
372    }
373 
374    case AGX_USC_CONTROL_UNIFORM_HIGH: {
375       agx_unpack(agxdecode_dump_stream, map, USC_UNIFORM_HIGH, temp);
376       DUMP_UNPACKED(USC_UNIFORM_HIGH, temp, "Uniform (high)\n");
377 
378       uint8_t buf[2 * temp.size_halfs];
379       agxdecode_fetch_gpu_array(ctx, temp.buffer, buf);
380       u_hexdump(agxdecode_dump_stream, buf, 2 * temp.size_halfs, false);
381 
382       return AGX_USC_UNIFORM_HIGH_LENGTH;
383    }
384 
385       USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties");
386       USC_CASE(SHARED, "Shared");
387       USC_CASE(REGISTERS, "Registers");
388 
389    default:
390       fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", type);
391       u_hexdump(agxdecode_dump_stream, map, 8, false);
392       return 8;
393    }
394 
395 #undef USC_CASE
396 }
397 
398 #define PPP_PRINT(map, header_name, struct_name, human)                        \
399    if (hdr.header_name) {                                                      \
400       if (((map + AGX_##struct_name##_LENGTH) > (base + size))) {              \
401          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");     \
402          return;                                                               \
403       }                                                                        \
404       DUMP_CL(struct_name, map, human);                                        \
405       map += AGX_##struct_name##_LENGTH;                                       \
406       fflush(agxdecode_dump_stream);                                           \
407    }
408 
409 static void
agxdecode_record(struct agxdecode_ctx * ctx,uint64_t va,size_t size,bool verbose,decoder_params * params)410 agxdecode_record(struct agxdecode_ctx *ctx, uint64_t va, size_t size,
411                  bool verbose, decoder_params *params)
412 {
413    uint8_t buf[size];
414    uint8_t *base = buf;
415    uint8_t *map = base;
416 
417    agxdecode_fetch_gpu_array(ctx, va, buf);
418 
419    agx_unpack(agxdecode_dump_stream, map, PPP_HEADER, hdr);
420    map += AGX_PPP_HEADER_LENGTH;
421 
422    PPP_PRINT(map, fragment_control, FRAGMENT_CONTROL, "Fragment control");
423    PPP_PRINT(map, fragment_control_2, FRAGMENT_CONTROL, "Fragment control 2");
424    PPP_PRINT(map, fragment_front_face, FRAGMENT_FACE, "Front face");
425    PPP_PRINT(map, fragment_front_face_2, FRAGMENT_FACE_2, "Front face 2");
426    PPP_PRINT(map, fragment_front_stencil, FRAGMENT_STENCIL, "Front stencil");
427    PPP_PRINT(map, fragment_back_face, FRAGMENT_FACE, "Back face");
428    PPP_PRINT(map, fragment_back_face_2, FRAGMENT_FACE_2, "Back face 2");
429    PPP_PRINT(map, fragment_back_stencil, FRAGMENT_STENCIL, "Back stencil");
430    PPP_PRINT(map, depth_bias_scissor, DEPTH_BIAS_SCISSOR, "Depth bias/scissor");
431 
432    if (hdr.region_clip) {
433       if (((map + (AGX_REGION_CLIP_LENGTH * hdr.viewport_count)) >
434            (base + size))) {
435          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
436          return;
437       }
438 
439       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
440          DUMP_CL(REGION_CLIP, map, "Region clip");
441          map += AGX_REGION_CLIP_LENGTH;
442          fflush(agxdecode_dump_stream);
443       }
444    }
445 
446    if (hdr.viewport) {
447       if (((map + AGX_VIEWPORT_CONTROL_LENGTH +
448             (AGX_VIEWPORT_LENGTH * hdr.viewport_count)) > (base + size))) {
449          fprintf(agxdecode_dump_stream, "Buffer overrun in PPP update\n");
450          return;
451       }
452 
453       DUMP_CL(VIEWPORT_CONTROL, map, "Viewport control");
454       map += AGX_VIEWPORT_CONTROL_LENGTH;
455 
456       for (unsigned i = 0; i < hdr.viewport_count; ++i) {
457          DUMP_CL(VIEWPORT, map, "Viewport");
458          map += AGX_VIEWPORT_LENGTH;
459          fflush(agxdecode_dump_stream);
460       }
461    }
462 
463    PPP_PRINT(map, w_clamp, W_CLAMP, "W clamp");
464    PPP_PRINT(map, output_select, OUTPUT_SELECT, "Output select");
465    PPP_PRINT(map, varying_counts_32, VARYING_COUNTS, "Varying counts 32");
466    PPP_PRINT(map, varying_counts_16, VARYING_COUNTS, "Varying counts 16");
467    PPP_PRINT(map, cull, CULL, "Cull");
468    PPP_PRINT(map, cull_2, CULL_2, "Cull 2");
469 
470    if (hdr.fragment_shader) {
471       agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER_WORD_0, frag_0);
472       agx_unpack(agxdecode_dump_stream, map + 4, FRAGMENT_SHADER_WORD_1,
473                  frag_1);
474       agx_unpack(agxdecode_dump_stream, map + 8, FRAGMENT_SHADER_WORD_2,
475                  frag_2);
476       agxdecode_stateful(ctx, decode_usc(ctx, frag_1.pipeline),
477                          "Fragment pipeline", agxdecode_usc, verbose, params,
478                          &frag_0.sampler_state_register_count);
479 
480       if (frag_2.cf_bindings) {
481          uint8_t buf[128];
482          uint8_t *cf = buf;
483 
484          agxdecode_fetch_gpu_array(ctx, decode_usc(ctx, frag_2.cf_bindings),
485                                    buf);
486          u_hexdump(agxdecode_dump_stream, cf, 128, false);
487 
488          DUMP_CL(CF_BINDING_HEADER, cf, "Coefficient binding header:");
489          cf += AGX_CF_BINDING_HEADER_LENGTH;
490 
491          for (unsigned i = 0; i < frag_0.cf_binding_count; ++i) {
492             DUMP_CL(CF_BINDING, cf, "Coefficient binding:");
493             cf += AGX_CF_BINDING_LENGTH;
494          }
495       }
496 
497       DUMP_CL(FRAGMENT_SHADER_WORD_0, map, "Fragment shader word 0");
498       DUMP_CL(FRAGMENT_SHADER_WORD_1, map + 4, "Fragment shader word 1");
499       DUMP_CL(FRAGMENT_SHADER_WORD_2, map + 8, "Fragment shader word 2");
500       DUMP_CL(FRAGMENT_SHADER_WORD_3, map + 12, "Fragment shader word 3");
501       map += 16;
502    }
503 
504    PPP_PRINT(map, occlusion_query, FRAGMENT_OCCLUSION_QUERY, "Occlusion query");
505    PPP_PRINT(map, occlusion_query_2, FRAGMENT_OCCLUSION_QUERY_2,
506              "Occlusion query 2");
507    PPP_PRINT(map, output_unknown, OUTPUT_UNKNOWN, "Output unknown");
508    PPP_PRINT(map, output_size, OUTPUT_SIZE, "Output size");
509    PPP_PRINT(map, varying_word_2, VARYING_2, "Varying word 2");
510 
511    /* PPP print checks we don't read too much, now check we read enough */
512    assert(map == (base + size) && "invalid size of PPP update");
513 }
514 
515 static unsigned
agxdecode_cdm(struct agxdecode_ctx * ctx,const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)516 agxdecode_cdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
517               bool verbose, decoder_params *params, UNUSED void *data)
518 {
519    /* Bits 29-31 contain the block type */
520    enum agx_cdm_block_type block_type = (map[3] >> 5);
521 
522    switch (block_type) {
523    case AGX_CDM_BLOCK_TYPE_LAUNCH: {
524       size_t length =
525          AGX_CDM_LAUNCH_WORD_0_LENGTH + AGX_CDM_LAUNCH_WORD_1_LENGTH;
526 
527 #define CDM_PRINT(STRUCT_NAME, human)                                          \
528    do {                                                                        \
529       DUMP_CL(CDM_##STRUCT_NAME, map, human);                                  \
530       map += AGX_CDM_##STRUCT_NAME##_LENGTH;                                   \
531       length += AGX_CDM_##STRUCT_NAME##_LENGTH;                                \
532    } while (0);
533 
534       agx_unpack(agxdecode_dump_stream, map + 0, CDM_LAUNCH_WORD_0, hdr0);
535       agx_unpack(agxdecode_dump_stream, map + 4, CDM_LAUNCH_WORD_1, hdr1);
536 
537       agxdecode_stateful(ctx, decode_usc(ctx, hdr1.pipeline), "Pipeline",
538                          agxdecode_usc, verbose, params,
539                          &hdr0.sampler_state_register_count);
540       DUMP_UNPACKED(CDM_LAUNCH_WORD_0, hdr0, "Compute\n");
541       DUMP_UNPACKED(CDM_LAUNCH_WORD_1, hdr1, "Compute\n");
542       map += 8;
543 
544       /* Added in G14X */
545       if (params->gpu_generation >= 14 && params->num_clusters_total > 1)
546          CDM_PRINT(UNK_G14X, "Unknown G14X");
547 
548       switch (hdr0.mode) {
549       case AGX_CDM_MODE_DIRECT:
550          CDM_PRINT(GLOBAL_SIZE, "Global size");
551          CDM_PRINT(LOCAL_SIZE, "Local size");
552          break;
553       case AGX_CDM_MODE_INDIRECT_GLOBAL:
554          CDM_PRINT(INDIRECT, "Indirect buffer");
555          CDM_PRINT(LOCAL_SIZE, "Local size");
556          break;
557       case AGX_CDM_MODE_INDIRECT_LOCAL:
558          CDM_PRINT(INDIRECT, "Indirect buffer");
559          break;
560       default:
561          fprintf(agxdecode_dump_stream, "Unknown CDM mode: %u\n", hdr0.mode);
562          break;
563       }
564 
565       return length;
566    }
567 
568    case AGX_CDM_BLOCK_TYPE_STREAM_LINK: {
569       agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr);
570       DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n");
571       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
572       return STATE_LINK;
573    }
574 
575    case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: {
576       DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate");
577       return STATE_DONE;
578    }
579 
580    case AGX_CDM_BLOCK_TYPE_BARRIER: {
581       DUMP_CL(CDM_BARRIER, map, "Barrier");
582       return AGX_CDM_BARRIER_LENGTH;
583    }
584 
585    default:
586       fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n",
587               block_type);
588       u_hexdump(agxdecode_dump_stream, map, 8, false);
589       return 8;
590    }
591 }
592 
593 static unsigned
agxdecode_vdm(struct agxdecode_ctx * ctx,const uint8_t * map,uint64_t * link,bool verbose,decoder_params * params,UNUSED void * data)594 agxdecode_vdm(struct agxdecode_ctx *ctx, const uint8_t *map, uint64_t *link,
595               bool verbose, decoder_params *params, UNUSED void *data)
596 {
597    /* Bits 29-31 contain the block type */
598    enum agx_vdm_block_type block_type = (map[3] >> 5);
599 
600    switch (block_type) {
601    case AGX_VDM_BLOCK_TYPE_BARRIER: {
602       agx_unpack(agxdecode_dump_stream, map, VDM_BARRIER, hdr);
603       DUMP_UNPACKED(VDM_BARRIER, hdr, "Barrier\n");
604       return hdr.returns ? STATE_RET : AGX_VDM_BARRIER_LENGTH;
605    }
606 
607    case AGX_VDM_BLOCK_TYPE_PPP_STATE_UPDATE: {
608       agx_unpack(agxdecode_dump_stream, map, PPP_STATE, cmd);
609 
610       uint64_t address = (((uint64_t)cmd.pointer_hi) << 32) | cmd.pointer_lo;
611 
612       if (!lib_config.read_gpu_mem) {
613          struct agx_bo *mem =
614             agxdecode_find_mapped_gpu_mem_containing(ctx, address);
615 
616          if (!mem) {
617             DUMP_UNPACKED(PPP_STATE, cmd, "Non-existent record (XXX)\n");
618             return AGX_PPP_STATE_LENGTH;
619          }
620       }
621 
622       agxdecode_record(ctx, address, cmd.size_words * 4, verbose, params);
623       return AGX_PPP_STATE_LENGTH;
624    }
625 
626    case AGX_VDM_BLOCK_TYPE_VDM_STATE_UPDATE: {
627       size_t length = AGX_VDM_STATE_LENGTH;
628       agx_unpack(agxdecode_dump_stream, map, VDM_STATE, hdr);
629       map += AGX_VDM_STATE_LENGTH;
630 
631 #define VDM_PRINT(header_name, STRUCT_NAME, human)                             \
632    if (hdr.header_name##_present) {                                            \
633       DUMP_CL(VDM_STATE_##STRUCT_NAME, map, human);                            \
634       map += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                             \
635       length += AGX_VDM_STATE_##STRUCT_NAME##_LENGTH;                          \
636    }
637 
638       VDM_PRINT(restart_index, RESTART_INDEX, "Restart index");
639 
640       /* If word 1 is present but word 0 is not, fallback to compact samplers */
641       enum agx_sampler_states sampler_states = 0;
642 
643       if (hdr.vertex_shader_word_0_present) {
644          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_0,
645                     word_0);
646          sampler_states = word_0.sampler_state_register_count;
647       }
648 
649       VDM_PRINT(vertex_shader_word_0, VERTEX_SHADER_WORD_0,
650                 "Vertex shader word 0");
651 
652       if (hdr.vertex_shader_word_1_present) {
653          agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1,
654                     word_1);
655          fprintf(agxdecode_dump_stream, "Pipeline %X\n",
656                  (uint32_t)word_1.pipeline);
657          agxdecode_stateful(ctx, decode_usc(ctx, word_1.pipeline), "Pipeline",
658                             agxdecode_usc, verbose, params, &sampler_states);
659       }
660 
661       VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1,
662                 "Vertex shader word 1");
663       VDM_PRINT(vertex_outputs, VERTEX_OUTPUTS, "Vertex outputs");
664       VDM_PRINT(tessellation, TESSELLATION, "Tessellation");
665       VDM_PRINT(vertex_unknown, VERTEX_UNKNOWN, "Vertex unknown");
666       VDM_PRINT(tessellation_scale, TESSELLATION_SCALE, "Tessellation scale");
667 
668 #undef VDM_PRINT
669       return hdr.tessellation_scale_present ? length : ALIGN_POT(length, 8);
670    }
671 
672    case AGX_VDM_BLOCK_TYPE_INDEX_LIST: {
673       size_t length = AGX_INDEX_LIST_LENGTH;
674       agx_unpack(agxdecode_dump_stream, map, INDEX_LIST, hdr);
675       DUMP_UNPACKED(INDEX_LIST, hdr, "Index List\n");
676       map += AGX_INDEX_LIST_LENGTH;
677 
678 #define IDX_PRINT(header_name, STRUCT_NAME, human)                             \
679    if (hdr.header_name##_present) {                                            \
680       DUMP_CL(INDEX_LIST_##STRUCT_NAME, map, human);                           \
681       map += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                            \
682       length += AGX_INDEX_LIST_##STRUCT_NAME##_LENGTH;                         \
683    }
684 
685       IDX_PRINT(index_buffer, BUFFER_LO, "Index buffer");
686       IDX_PRINT(index_count, COUNT, "Index count");
687       IDX_PRINT(instance_count, INSTANCES, "Instance count");
688       IDX_PRINT(start, START, "Start");
689       IDX_PRINT(indirect_buffer, INDIRECT_BUFFER, "Indirect buffer");
690       IDX_PRINT(index_buffer_size, BUFFER_SIZE, "Index buffer size");
691 
692 #undef IDX_PRINT
693       return length;
694    }
695 
696    case AGX_VDM_BLOCK_TYPE_STREAM_LINK: {
697       agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr);
698       DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n");
699       *link = hdr.target_lo | (((uint64_t)hdr.target_hi) << 32);
700       return hdr.with_return ? STATE_CALL : STATE_LINK;
701    }
702 
703    case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: {
704       DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate");
705       return STATE_DONE;
706    }
707 
708    case AGX_VDM_BLOCK_TYPE_TESSELLATE: {
709       size_t length = AGX_VDM_TESSELLATE_LENGTH;
710       agx_unpack(agxdecode_dump_stream, map, VDM_TESSELLATE, hdr);
711       DUMP_UNPACKED(VDM_TESSELLATE, hdr, "Tessellate List\n");
712       map += AGX_VDM_TESSELLATE_LENGTH;
713 
714 #define TESS_PRINT(header_name, STRUCT_NAME, human)                            \
715    if (hdr.header_name##_present) {                                            \
716       DUMP_CL(VDM_TESSELLATE_##STRUCT_NAME, map, human);                       \
717       map += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                        \
718       length += AGX_VDM_TESSELLATE_##STRUCT_NAME##_LENGTH;                     \
719    }
720 
721       TESS_PRINT(factor_buffer, FACTOR_BUFFER, "Factor buffer");
722       TESS_PRINT(patch_count, PATCH_COUNT, "Patch");
723       TESS_PRINT(instance_count, INSTANCE_COUNT, "Instance count");
724       TESS_PRINT(base_patch, BASE_PATCH, "Base patch");
725       TESS_PRINT(base_instance, BASE_INSTANCE, "Base instance");
726       TESS_PRINT(instance_stride, INSTANCE_STRIDE, "Instance stride");
727       TESS_PRINT(indirect, INDIRECT, "Indirect");
728       TESS_PRINT(unknown, UNKNOWN, "Unknown");
729 
730 #undef TESS_PRINT
731       return length;
732    }
733 
734    default:
735       fprintf(agxdecode_dump_stream, "Unknown VDM block type: %u\n",
736               block_type);
737       u_hexdump(agxdecode_dump_stream, map, 8, false);
738       return 8;
739    }
740 }
741 
742 static void
agxdecode_cs(struct agxdecode_ctx * ctx,uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)743 agxdecode_cs(struct agxdecode_ctx *ctx, uint32_t *cmdbuf, uint64_t encoder,
744              bool verbose, decoder_params *params)
745 {
746    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs);
747    DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n");
748 
749    agxdecode_stateful(ctx, encoder, "Encoder", agxdecode_cdm, verbose, params,
750                       NULL);
751 
752    fprintf(agxdecode_dump_stream, "Context switch program:\n");
753    uint8_t buf[1024];
754    agx_disassemble(buf,
755                    agxdecode_fetch_gpu_array(
756                       ctx, decode_usc(ctx, cs.context_switch_program), buf),
757                    agxdecode_dump_stream);
758 }
759 
760 static void
agxdecode_gfx(struct agxdecode_ctx * ctx,uint32_t * cmdbuf,uint64_t encoder,bool verbose,decoder_params * params)761 agxdecode_gfx(struct agxdecode_ctx *ctx, uint32_t *cmdbuf, uint64_t encoder,
762               bool verbose, decoder_params *params)
763 {
764    agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx);
765    DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n");
766 
767    agxdecode_stateful(ctx, encoder, "Encoder", agxdecode_vdm, verbose, params,
768                       NULL);
769 
770    if (gfx.clear_pipeline_unk) {
771       fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk);
772       agxdecode_stateful(ctx, decode_usc(ctx, gfx.clear_pipeline),
773                          "Clear pipeline", agxdecode_usc, verbose, params,
774                          NULL);
775    }
776 
777    if (gfx.store_pipeline_unk) {
778       assert(gfx.store_pipeline_unk == 0x4);
779       agxdecode_stateful(ctx, decode_usc(ctx, gfx.store_pipeline),
780                          "Store pipeline", agxdecode_usc, verbose, params,
781                          NULL);
782    }
783 
784    assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4);
785    if (gfx.partial_reload_pipeline) {
786       agxdecode_stateful(ctx, decode_usc(ctx, gfx.partial_reload_pipeline),
787                          "Partial reload pipeline", agxdecode_usc, verbose,
788                          params, NULL);
789    }
790 
791    if (gfx.partial_store_pipeline) {
792       agxdecode_stateful(ctx, decode_usc(ctx, gfx.partial_store_pipeline),
793                          "Partial store pipeline", agxdecode_usc, verbose,
794                          params, NULL);
795    }
796 }
797 
798 static void
agxdecode_sampler_heap(struct agxdecode_ctx * ctx,uint64_t heap,unsigned count)799 agxdecode_sampler_heap(struct agxdecode_ctx *ctx, uint64_t heap, unsigned count)
800 {
801    if (!heap)
802       return;
803 
804    struct agx_sampler_packed samp[1024];
805    agxdecode_fetch_gpu_array(ctx, heap, samp);
806 
807    for (unsigned i = 0; i < count; ++i) {
808       bool nonzero = false;
809       for (unsigned j = 0; j < ARRAY_SIZE(samp[i].opaque); ++j) {
810          nonzero |= samp[i].opaque[j] != 0;
811       }
812 
813       if (nonzero) {
814          fprintf(agxdecode_dump_stream, "Heap sampler %u\n", i);
815 
816          agx_unpack(agxdecode_dump_stream, samp + i, SAMPLER, temp);
817          agx_print(agxdecode_dump_stream, SAMPLER, temp, 2);
818       }
819    }
820 }
821 
822 void
agxdecode_image_heap(struct agxdecode_ctx * ctx,uint64_t heap,unsigned nr_entries)823 agxdecode_image_heap(struct agxdecode_ctx *ctx, uint64_t heap,
824                      unsigned nr_entries)
825 {
826    agxdecode_dump_file_open();
827 
828    fprintf(agxdecode_dump_stream, "Image heap:\n");
829    struct agx_texture_packed *map = calloc(nr_entries, AGX_TEXTURE_LENGTH);
830    agxdecode_fetch_gpu_mem(ctx, heap, AGX_TEXTURE_LENGTH * nr_entries, map);
831 
832    for (unsigned i = 0; i < nr_entries; ++i) {
833       bool nonzero = false;
834       for (unsigned j = 0; j < ARRAY_SIZE(map[i].opaque); ++j) {
835          nonzero |= map[i].opaque[j] != 0;
836       }
837 
838       if (nonzero) {
839          fprintf(agxdecode_dump_stream, "%u: \n", i);
840          agxdecode_texture_pbe(ctx, map + i);
841          fprintf(agxdecode_dump_stream, "\n");
842       }
843    }
844 
845    free(map);
846 }
847 
848 void
agxdecode_drm_cmd_render(struct agxdecode_ctx * ctx,struct drm_asahi_params_global * params,struct drm_asahi_cmd_render * c,bool verbose)849 agxdecode_drm_cmd_render(struct agxdecode_ctx *ctx,
850                          struct drm_asahi_params_global *params,
851                          struct drm_asahi_cmd_render *c, bool verbose)
852 {
853    agxdecode_dump_file_open();
854 
855    DUMP_FIELD(c, "%llx", flags);
856    DUMP_FIELD(c, "0x%llx", encoder_ptr);
857    agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_vdm, verbose,
858                       params, NULL);
859    DUMP_FIELD(c, "0x%x", encoder_id);
860    DUMP_FIELD(c, "0x%x", cmd_ta_id);
861    DUMP_FIELD(c, "0x%x", cmd_3d_id);
862    DUMP_FIELD(c, "0x%x", ppp_ctrl);
863    DUMP_FIELD(c, "0x%llx", ppp_multisamplectl);
864    DUMP_CL(ZLS_CONTROL, &c->zls_ctrl, "ZLS Control");
865    DUMP_FIELD(c, "0x%llx", depth_buffer_load);
866    DUMP_FIELD(c, "0x%llx", depth_buffer_store);
867    DUMP_FIELD(c, "0x%llx", depth_buffer_partial);
868    DUMP_FIELD(c, "0x%llx", stencil_buffer_load);
869    DUMP_FIELD(c, "0x%llx", stencil_buffer_store);
870    DUMP_FIELD(c, "0x%llx", stencil_buffer_partial);
871    DUMP_FIELD(c, "0x%llx", scissor_array);
872    DUMP_FIELD(c, "0x%llx", depth_bias_array);
873    DUMP_FIELD(c, "%d", fb_width);
874    DUMP_FIELD(c, "%d", fb_height);
875    DUMP_FIELD(c, "%d", layers);
876    DUMP_FIELD(c, "%d", samples);
877    DUMP_FIELD(c, "%d", sample_size);
878    DUMP_FIELD(c, "%d", tib_blocks);
879    DUMP_FIELD(c, "%d", utile_width);
880    DUMP_FIELD(c, "%d", utile_height);
881    DUMP_FIELD(c, "0x%x", load_pipeline);
882    DUMP_FIELD(c, "0x%x", load_pipeline_bind);
883    agxdecode_stateful(ctx, decode_usc(ctx, c->load_pipeline & ~0x7),
884                       "Load pipeline", agxdecode_usc, verbose, params, NULL);
885    DUMP_FIELD(c, "0x%x", store_pipeline);
886    DUMP_FIELD(c, "0x%x", store_pipeline_bind);
887    agxdecode_stateful(ctx, decode_usc(ctx, c->store_pipeline & ~0x7),
888                       "Store pipeline", agxdecode_usc, verbose, params, NULL);
889    DUMP_FIELD(c, "0x%x", partial_reload_pipeline);
890    DUMP_FIELD(c, "0x%x", partial_reload_pipeline_bind);
891    agxdecode_stateful(ctx, decode_usc(ctx, c->partial_reload_pipeline & ~0x7),
892                       "Partial reload pipeline", agxdecode_usc, verbose, params,
893                       NULL);
894    DUMP_FIELD(c, "0x%x", partial_store_pipeline);
895    DUMP_FIELD(c, "0x%x", partial_store_pipeline_bind);
896    agxdecode_stateful(ctx, decode_usc(ctx, c->partial_store_pipeline & ~0x7),
897                       "Partial store pipeline", agxdecode_usc, verbose, params,
898                       NULL);
899 
900    DUMP_FIELD(c, "0x%x", depth_dimensions);
901    DUMP_FIELD(c, "0x%x", isp_bgobjdepth);
902    DUMP_FIELD(c, "0x%x", isp_bgobjvals);
903 
904    agxdecode_sampler_heap(ctx, c->vertex_sampler_array,
905                           c->vertex_sampler_count);
906 
907    /* Linux driver doesn't use this, at least for now */
908    assert(c->fragment_sampler_array == c->vertex_sampler_array);
909    assert(c->fragment_sampler_count == c->vertex_sampler_count);
910 
911    DUMP_FIELD(c, "%d", vertex_attachment_count);
912    struct drm_asahi_attachment *vertex_attachments =
913       (void *)c->vertex_attachments;
914    for (unsigned i = 0; i < c->vertex_attachment_count; i++) {
915       DUMP_FIELD((&vertex_attachments[i]), "0x%x", order);
916       DUMP_FIELD((&vertex_attachments[i]), "0x%llx", size);
917       DUMP_FIELD((&vertex_attachments[i]), "0x%llx", pointer);
918    }
919    DUMP_FIELD(c, "%d", fragment_attachment_count);
920    struct drm_asahi_attachment *fragment_attachments =
921       (void *)c->fragment_attachments;
922    for (unsigned i = 0; i < c->fragment_attachment_count; i++) {
923       DUMP_FIELD((&fragment_attachments[i]), "0x%x", order);
924       DUMP_FIELD((&fragment_attachments[i]), "0x%llx", size);
925       DUMP_FIELD((&fragment_attachments[i]), "0x%llx", pointer);
926    }
927 }
928 
929 void
agxdecode_drm_cmd_compute(struct agxdecode_ctx * ctx,struct drm_asahi_params_global * params,struct drm_asahi_cmd_compute * c,bool verbose)930 agxdecode_drm_cmd_compute(struct agxdecode_ctx *ctx,
931                           struct drm_asahi_params_global *params,
932                           struct drm_asahi_cmd_compute *c, bool verbose)
933 {
934    agxdecode_dump_file_open();
935 
936    DUMP_FIELD(c, "%llx", flags);
937    DUMP_FIELD(c, "0x%llx", encoder_ptr);
938    agxdecode_stateful(ctx, c->encoder_ptr, "Encoder", agxdecode_cdm, verbose,
939                       params, NULL);
940    DUMP_FIELD(c, "0x%x", encoder_id);
941    DUMP_FIELD(c, "0x%x", cmd_id);
942 
943    agxdecode_sampler_heap(ctx, c->sampler_array, c->sampler_count);
944 
945    if (c->helper_program & 1) {
946       fprintf(agxdecode_dump_stream, "Helper program:\n");
947       uint8_t buf[1024];
948       agx_disassemble(buf,
949                       agxdecode_fetch_gpu_array(
950                          ctx, decode_usc(ctx, c->helper_program & ~1), buf),
951                       agxdecode_dump_stream);
952    }
953 }
954 
955 static void
chip_id_to_params(decoder_params * params,uint32_t chip_id)956 chip_id_to_params(decoder_params *params, uint32_t chip_id)
957 {
958    switch (chip_id) {
959    case 0x6000 ... 0x6002:
960       *params = (decoder_params){
961          .gpu_generation = 13,
962          .gpu_variant = "SCD"[chip_id & 15],
963          .chip_id = chip_id,
964          .num_clusters_total = 2 << (chip_id & 15),
965       };
966       break;
967    case 0x6020 ... 0x6022:
968       *params = (decoder_params){
969          .gpu_generation = 14,
970          .gpu_variant = "SCD"[chip_id & 15],
971          .chip_id = chip_id,
972          .num_clusters_total = 2 << (chip_id & 15),
973       };
974       break;
975    case 0x8112:
976       *params = (decoder_params){
977          .gpu_generation = 14,
978          .gpu_variant = 'G',
979          .chip_id = chip_id,
980          .num_clusters_total = 1,
981       };
982       break;
983    case 0x8103:
984    default:
985       *params = (decoder_params){
986          .gpu_generation = 13,
987          .gpu_variant = 'G',
988          .chip_id = chip_id,
989          .num_clusters_total = 1,
990       };
991       break;
992    }
993 }
994 
995 #ifdef __APPLE__
996 
997 void
agxdecode_cmdstream(struct agxdecode_ctx * ctx,unsigned cmdbuf_handle,unsigned map_handle,bool verbose)998 agxdecode_cmdstream(struct agxdecode_ctx *ctx, unsigned cmdbuf_handle,
999                     unsigned map_handle, bool verbose)
1000 {
1001    agxdecode_dump_file_open();
1002 
1003    struct agx_bo *cmdbuf =
1004       agxdecode_find_handle(cmdbuf_handle, AGX_ALLOC_CMDBUF);
1005    struct agx_bo *map = agxdecode_find_handle(map_handle, AGX_ALLOC_MEMMAP);
1006    assert(cmdbuf != NULL && "nonexistent command buffer");
1007    assert(map != NULL && "nonexistent mapping");
1008 
1009    /* Print the IOGPU stuff */
1010    agx_unpack(agxdecode_dump_stream, cmdbuf->map, IOGPU_HEADER, cmd);
1011    DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n");
1012 
1013    DUMP_CL(IOGPU_ATTACHMENT_COUNT,
1014            ((uint8_t *)cmdbuf->map + cmd.attachment_offset),
1015            "Attachment count");
1016 
1017    uint32_t *attachments =
1018       (uint32_t *)((uint8_t *)cmdbuf->map + cmd.attachment_offset);
1019    unsigned attachment_count = attachments[3];
1020    for (unsigned i = 0; i < attachment_count; ++i) {
1021       uint32_t *ptr = attachments + 4 + (i * AGX_IOGPU_ATTACHMENT_LENGTH / 4);
1022       DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment");
1023    }
1024 
1025    struct drm_asahi_params_global params;
1026 
1027    chip_id_to_params(&params, 0x8103);
1028 
1029    if (cmd.unk_5 == 3)
1030       agxdecode_cs((uint32_t *)cmdbuf->map, cmd.encoder, verbose, &params);
1031    else
1032       agxdecode_gfx((uint32_t *)cmdbuf->map, cmd.encoder, verbose, &params);
1033 }
1034 
1035 #endif
1036 
1037 void
agxdecode_track_alloc(struct agxdecode_ctx * ctx,struct agx_bo * alloc)1038 agxdecode_track_alloc(struct agxdecode_ctx *ctx, struct agx_bo *alloc)
1039 {
1040    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
1041       bool match = (it->handle == alloc->handle);
1042       assert(!match && "tried to alloc already allocated BO");
1043    }
1044 
1045    util_dynarray_append(&ctx->mmap_array, struct agx_bo, *alloc);
1046 }
1047 
1048 void
agxdecode_track_free(struct agxdecode_ctx * ctx,struct agx_bo * bo)1049 agxdecode_track_free(struct agxdecode_ctx *ctx, struct agx_bo *bo)
1050 {
1051    bool found = false;
1052 
1053    util_dynarray_foreach(&ctx->mmap_array, struct agx_bo, it) {
1054       if (it->handle == bo->handle) {
1055          assert(!found && "mapped multiple times!");
1056          found = true;
1057 
1058          memset(it, 0, sizeof(*it));
1059       }
1060    }
1061 
1062    assert(found && "freed unmapped memory");
1063 }
1064 
1065 static int agxdecode_dump_frame_count = 0;
1066 
1067 void
agxdecode_dump_file_open(void)1068 agxdecode_dump_file_open(void)
1069 {
1070    if (agxdecode_dump_stream)
1071       return;
1072 
1073    /* This does a getenv every frame, so it is possible to use
1074     * setenv to change the base at runtime.
1075     */
1076    const char *dump_file_base =
1077       getenv("AGXDECODE_DUMP_FILE") ?: "agxdecode.dump";
1078    if (!strcmp(dump_file_base, "stderr"))
1079       agxdecode_dump_stream = stderr;
1080    else {
1081       char buffer[1024];
1082       snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base,
1083                agxdecode_dump_frame_count);
1084       printf("agxdecode: dump command stream to file %s\n", buffer);
1085       agxdecode_dump_stream = fopen(buffer, "w");
1086       if (!agxdecode_dump_stream) {
1087          fprintf(stderr,
1088                  "agxdecode: failed to open command stream log file %s\n",
1089                  buffer);
1090       }
1091    }
1092 }
1093 
1094 static void
agxdecode_dump_file_close(void)1095 agxdecode_dump_file_close(void)
1096 {
1097    if (agxdecode_dump_stream && agxdecode_dump_stream != stderr) {
1098       fclose(agxdecode_dump_stream);
1099       agxdecode_dump_stream = NULL;
1100    }
1101 }
1102 
1103 void
agxdecode_next_frame(void)1104 agxdecode_next_frame(void)
1105 {
1106    agxdecode_dump_file_close();
1107    agxdecode_dump_frame_count++;
1108 }
1109 
1110 void
agxdecode_close(void)1111 agxdecode_close(void)
1112 {
1113    agxdecode_dump_file_close();
1114 }
1115 
1116 static ssize_t
libagxdecode_writer(void * cookie,const char * buffer,size_t size)1117 libagxdecode_writer(void *cookie, const char *buffer, size_t size)
1118 {
1119    return lib_config.stream_write(buffer, size);
1120 }
1121 
1122 #ifdef _GNU_SOURCE
1123 static cookie_io_functions_t funcs = {.write = libagxdecode_writer};
1124 #endif
1125 
1126 static decoder_params lib_params;
1127 
1128 void
libagxdecode_init(struct libagxdecode_config * config)1129 libagxdecode_init(struct libagxdecode_config *config)
1130 {
1131 #ifdef _GNU_SOURCE
1132    lib_config = *config;
1133    agxdecode_dump_stream = fopencookie(NULL, "w", funcs);
1134 
1135    chip_id_to_params(&lib_params, config->chip_id);
1136 #else
1137    /* fopencookie is a glibc extension */
1138    unreachable("libagxdecode only available with glibc");
1139 #endif
1140 }
1141 
1142 void
libagxdecode_vdm(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1143 libagxdecode_vdm(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1144                  bool verbose)
1145 {
1146    agxdecode_stateful(ctx, addr, label, agxdecode_vdm, verbose, &lib_params,
1147                       NULL);
1148 }
1149 
1150 void
libagxdecode_cdm(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1151 libagxdecode_cdm(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1152                  bool verbose)
1153 {
1154    agxdecode_stateful(ctx, addr, label, agxdecode_cdm, verbose, &lib_params,
1155                       NULL);
1156 }
1157 void
libagxdecode_usc(struct agxdecode_ctx * ctx,uint64_t addr,const char * label,bool verbose)1158 libagxdecode_usc(struct agxdecode_ctx *ctx, uint64_t addr, const char *label,
1159                  bool verbose)
1160 {
1161    agxdecode_stateful(ctx, addr, label, agxdecode_usc, verbose, &lib_params,
1162                       NULL);
1163 }
1164 void
libagxdecode_shutdown(void)1165 libagxdecode_shutdown(void)
1166 {
1167    agxdecode_dump_file_close();
1168 }
1169