xref: /aosp_15_r20/external/mesa3d/src/panfrost/lib/genxml/decode_common.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (C) 2019 Alyssa Rosenzweig
3  * Copyright (C) 2017-2018 Lyude Paul
4  * Copyright (C) 2019 Collabora, Ltd.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/mman.h>
32 
33 #include "util/macros.h"
34 #include "util/u_debug.h"
35 #include "util/u_hexdump.h"
36 #include "lib/pan_props.h"
37 #include "decode.h"
38 
39 #include "compiler/bifrost/disassemble.h"
40 #include "compiler/valhall/disassemble.h"
41 #include "midgard/disassemble.h"
42 
43 /* Used to distiguish dumped files, otherwise we would have to print the ctx
44  * pointer, which is annoying for the user since it changes with every run */
45 static int num_ctxs = 0;
46 
47 #define to_mapped_memory(x)                                                    \
48    rb_node_data(struct pandecode_mapped_memory, x, node)
49 
50 /*
51  * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
52  * is contained in the interval the node represents. This lets us store
53  * intervals in our tree.
54  */
55 static int
pandecode_cmp_key(const struct rb_node * lhs,const void * key)56 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
57 {
58    struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
59    uint64_t *gpu_va = (uint64_t *)key;
60 
61    if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
62       return 0;
63    else
64       return mem->gpu_va - *gpu_va;
65 }
66 
67 static int
pandecode_cmp(const struct rb_node * lhs,const struct rb_node * rhs)68 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
69 {
70    return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
71 }
72 
73 static struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context * ctx,uint64_t addr)74 pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
75                                             uint64_t addr)
76 {
77    simple_mtx_assert_locked(&ctx->lock);
78 
79    struct rb_node *node =
80       rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);
81 
82    return to_mapped_memory(node);
83 }
84 
85 struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(struct pandecode_context * ctx,uint64_t addr)86 pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
87                                          uint64_t addr)
88 {
89    simple_mtx_assert_locked(&ctx->lock);
90 
91    struct pandecode_mapped_memory *mem =
92       pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);
93 
94    if (mem && mem->addr && !mem->ro) {
95       mprotect(mem->addr, mem->length, PROT_READ);
96       mem->ro = true;
97       util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
98                            mem);
99    }
100 
101    return mem;
102 }
103 
104 /*
105  * To check for memory safety issues, validates that the given pointer in GPU
106  * memory is valid, containing at least sz bytes. This function is a tool to
107  * detect GPU-side memory bugs by validating pointers.
108  */
109 void
pandecode_validate_buffer(struct pandecode_context * ctx,mali_ptr addr,size_t sz)110 pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr,
111                           size_t sz)
112 {
113    if (!addr) {
114       pandecode_log(ctx, "// XXX: null pointer deref\n");
115       return;
116    }
117 
118    /* Find a BO */
119 
120    struct pandecode_mapped_memory *bo =
121       pandecode_find_mapped_gpu_mem_containing(ctx, addr);
122 
123    if (!bo) {
124       pandecode_log(ctx, "// XXX: invalid memory dereference\n");
125       return;
126    }
127 
128    /* Bounds check */
129 
130    unsigned offset = addr - bo->gpu_va;
131    unsigned total = offset + sz;
132 
133    if (total > bo->length) {
134       pandecode_log(ctx,
135                     "// XXX: buffer overrun. "
136                     "Chunk of size %zu at offset %d in buffer of size %zu. "
137                     "Overrun by %zu bytes. \n",
138                     sz, offset, bo->length, total - bo->length);
139       return;
140    }
141 }
142 
143 void
pandecode_map_read_write(struct pandecode_context * ctx)144 pandecode_map_read_write(struct pandecode_context *ctx)
145 {
146    simple_mtx_assert_locked(&ctx->lock);
147 
148    util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
149                          mem) {
150       (*mem)->ro = false;
151       mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
152    }
153    util_dynarray_clear(&ctx->ro_mappings);
154 }
155 
156 static void
pandecode_add_name(struct pandecode_context * ctx,struct pandecode_mapped_memory * mem,uint64_t gpu_va,const char * name)157 pandecode_add_name(struct pandecode_context *ctx,
158                    struct pandecode_mapped_memory *mem, uint64_t gpu_va,
159                    const char *name)
160 {
161    simple_mtx_assert_locked(&ctx->lock);
162 
163    if (!name) {
164       /* If we don't have a name, assign one */
165 
166       snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
167    } else {
168       assert((strlen(name) + 1) < sizeof(mem->name));
169       memcpy(mem->name, name, strlen(name) + 1);
170    }
171 }
172 
173 void
pandecode_inject_mmap(struct pandecode_context * ctx,uint64_t gpu_va,void * cpu,unsigned sz,const char * name)174 pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu,
175                       unsigned sz, const char *name)
176 {
177    simple_mtx_lock(&ctx->lock);
178 
179    /* First, search if we already mapped this and are just updating an address */
180 
181    struct pandecode_mapped_memory *existing =
182       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
183 
184    if (existing && existing->gpu_va == gpu_va) {
185       existing->length = sz;
186       existing->addr = cpu;
187       pandecode_add_name(ctx, existing, gpu_va, name);
188    } else {
189       /* Otherwise, add a fresh mapping */
190       struct pandecode_mapped_memory *mapped_mem = NULL;
191 
192       mapped_mem = calloc(1, sizeof(*mapped_mem));
193       mapped_mem->gpu_va = gpu_va;
194       mapped_mem->length = sz;
195       mapped_mem->addr = cpu;
196       pandecode_add_name(ctx, mapped_mem, gpu_va, name);
197 
198       /* Add it to the tree */
199       rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
200    }
201 
202    simple_mtx_unlock(&ctx->lock);
203 }
204 
205 void
pandecode_inject_free(struct pandecode_context * ctx,uint64_t gpu_va,unsigned sz)206 pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
207                       unsigned sz)
208 {
209    simple_mtx_lock(&ctx->lock);
210 
211    struct pandecode_mapped_memory *mem =
212       pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
213 
214    if (mem) {
215       assert(mem->gpu_va == gpu_va);
216       assert(mem->length == sz);
217 
218       rb_tree_remove(&ctx->mmap_tree, &mem->node);
219       free(mem);
220    }
221 
222    simple_mtx_unlock(&ctx->lock);
223 }
224 
225 char *
pointer_as_memory_reference(struct pandecode_context * ctx,uint64_t ptr)226 pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
227 {
228    simple_mtx_assert_locked(&ctx->lock);
229 
230    struct pandecode_mapped_memory *mapped;
231    char *out = malloc(128);
232 
233    /* Try to find the corresponding mapped zone */
234 
235    mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);
236 
237    if (mapped) {
238       snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
239       return out;
240    }
241 
242    /* Just use the raw address if other options are exhausted */
243 
244    snprintf(out, 128, "0x%" PRIx64, ptr);
245    return out;
246 }
247 
248 void
pandecode_dump_file_open(struct pandecode_context * ctx)249 pandecode_dump_file_open(struct pandecode_context *ctx)
250 {
251    simple_mtx_assert_locked(&ctx->lock);
252 
253    /* This does a getenv every frame, so it is possible to use
254     * setenv to change the base at runtime.
255     */
256    const char *dump_file_base =
257       debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
258    if (!strcmp(dump_file_base, "stderr"))
259       ctx->dump_stream = stderr;
260    else if (!ctx->dump_stream) {
261       char buffer[1024];
262       snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
263                ctx->id, ctx->dump_frame_count);
264       printf("pandecode: dump command stream to file %s\n", buffer);
265       ctx->dump_stream = fopen(buffer, "w");
266       if (!ctx->dump_stream)
267          fprintf(stderr,
268                  "pandecode: failed to open command stream log file %s\n",
269                  buffer);
270    }
271 }
272 
273 static void
pandecode_dump_file_close(struct pandecode_context * ctx)274 pandecode_dump_file_close(struct pandecode_context *ctx)
275 {
276    simple_mtx_assert_locked(&ctx->lock);
277 
278    if (ctx->dump_stream && ctx->dump_stream != stderr) {
279       if (fclose(ctx->dump_stream))
280          perror("pandecode: dump file");
281 
282       ctx->dump_stream = NULL;
283    }
284 }
285 
286 struct pandecode_context *
pandecode_create_context(bool to_stderr)287 pandecode_create_context(bool to_stderr)
288 {
289    struct pandecode_context *ctx = calloc(1, sizeof(*ctx));
290 
291    /* Not thread safe, but we shouldn't ever hit this, and even if we do, the
292     * worst that could happen is having the files dumped with their filenames
293     * in a different order. */
294    ctx->id = num_ctxs++;
295 
296    /* This will be initialized later and can be changed at run time through
297     * the PANDECODE_DUMP_FILE environment variable.
298     */
299    ctx->dump_stream = to_stderr ? stderr : NULL;
300 
301    rb_tree_init(&ctx->mmap_tree);
302    util_dynarray_init(&ctx->ro_mappings, NULL);
303 
304    simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
305    memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));
306 
307    return ctx;
308 }
309 
310 void
pandecode_next_frame(struct pandecode_context * ctx)311 pandecode_next_frame(struct pandecode_context *ctx)
312 {
313    simple_mtx_lock(&ctx->lock);
314 
315    pandecode_dump_file_close(ctx);
316    ctx->dump_frame_count++;
317 
318    simple_mtx_unlock(&ctx->lock);
319 }
320 
321 void
pandecode_destroy_context(struct pandecode_context * ctx)322 pandecode_destroy_context(struct pandecode_context *ctx)
323 {
324    simple_mtx_lock(&ctx->lock);
325 
326    rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
327                         node) {
328       rb_tree_remove(&ctx->mmap_tree, &it->node);
329       free(it);
330    }
331 
332    util_dynarray_fini(&ctx->ro_mappings);
333    pandecode_dump_file_close(ctx);
334 
335    simple_mtx_unlock(&ctx->lock);
336 
337    free(ctx);
338 }
339 
340 void
pandecode_dump_mappings(struct pandecode_context * ctx)341 pandecode_dump_mappings(struct pandecode_context *ctx)
342 {
343    simple_mtx_lock(&ctx->lock);
344 
345    pandecode_dump_file_open(ctx);
346 
347    rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
348       if (!it->addr || !it->length)
349          continue;
350 
351       fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
352               it->gpu_va);
353 
354       u_hexdump(ctx->dump_stream, it->addr, it->length, false);
355       fprintf(ctx->dump_stream, "\n");
356    }
357 
358    fflush(ctx->dump_stream);
359    simple_mtx_unlock(&ctx->lock);
360 }
361 
362 void
pandecode_abort_on_fault(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)363 pandecode_abort_on_fault(struct pandecode_context *ctx, mali_ptr jc_gpu_va,
364                          unsigned gpu_id)
365 {
366    simple_mtx_lock(&ctx->lock);
367 
368    switch (pan_arch(gpu_id)) {
369    case 4:
370       pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
371       break;
372    case 5:
373       pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
374       break;
375    case 6:
376       pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
377       break;
378    case 7:
379       pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
380       break;
381    case 9:
382       pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
383       break;
384    default:
385       unreachable("Unsupported architecture");
386    }
387 
388    simple_mtx_unlock(&ctx->lock);
389 }
390 
391 void
pandecode_jc(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)392 pandecode_jc(struct pandecode_context *ctx, mali_ptr jc_gpu_va, unsigned gpu_id)
393 {
394    simple_mtx_lock(&ctx->lock);
395 
396    switch (pan_arch(gpu_id)) {
397    case 4:
398       pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
399       break;
400    case 5:
401       pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
402       break;
403    case 6:
404       pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
405       break;
406    case 7:
407       pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
408       break;
409    case 9:
410       pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
411       break;
412    default:
413       unreachable("Unsupported architecture");
414    }
415 
416    simple_mtx_unlock(&ctx->lock);
417 }
418 
419 void
pandecode_cs(struct pandecode_context * ctx,mali_ptr queue_gpu_va,uint32_t size,unsigned gpu_id,uint32_t * regs)420 pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va,
421              uint32_t size, unsigned gpu_id, uint32_t *regs)
422 {
423    simple_mtx_lock(&ctx->lock);
424 
425    switch (pan_arch(gpu_id)) {
426    case 10:
427       pandecode_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
428       break;
429    default:
430       unreachable("Unsupported architecture");
431    }
432 
433    simple_mtx_unlock(&ctx->lock);
434 }
435 
436 void
pandecode_shader_disassemble(struct pandecode_context * ctx,mali_ptr shader_ptr,unsigned gpu_id)437 pandecode_shader_disassemble(struct pandecode_context *ctx, mali_ptr shader_ptr,
438                              unsigned gpu_id)
439 {
440    uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);
441 
442    /* Compute maximum possible size */
443    struct pandecode_mapped_memory *mem =
444       pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
445    size_t sz = mem->length - (shader_ptr - mem->gpu_va);
446 
447    /* Print some boilerplate to clearly denote the assembly (which doesn't
448     * obey indentation rules), and actually do the disassembly! */
449 
450    pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
451                       code, shader_ptr, sz);
452 
453    if (pan_arch(gpu_id) >= 9) {
454       disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
455    } else if (pan_arch(gpu_id) >= 6)
456       disassemble_bifrost(ctx->dump_stream, code, sz, false);
457    else
458       disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);
459 
460    pandecode_log_cont(ctx, "\n\n");
461 }
462