1 /*
2 * Copyright (C) 2019 Alyssa Rosenzweig
3 * Copyright (C) 2017-2018 Lyude Paul
4 * Copyright (C) 2019 Collabora, Ltd.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include <assert.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <sys/mman.h>
32
33 #include "util/macros.h"
34 #include "util/u_debug.h"
35 #include "util/u_hexdump.h"
36 #include "lib/pan_props.h"
37 #include "decode.h"
38
39 #include "compiler/bifrost/disassemble.h"
40 #include "compiler/valhall/disassemble.h"
41 #include "midgard/disassemble.h"
42
43 /* Used to distiguish dumped files, otherwise we would have to print the ctx
44 * pointer, which is annoying for the user since it changes with every run */
45 static int num_ctxs = 0;
46
47 #define to_mapped_memory(x) \
48 rb_node_data(struct pandecode_mapped_memory, x, node)
49
50 /*
51 * Compare a GPU VA to a node, considering a GPU VA to be equal to a node if it
52 * is contained in the interval the node represents. This lets us store
53 * intervals in our tree.
54 */
55 static int
pandecode_cmp_key(const struct rb_node * lhs,const void * key)56 pandecode_cmp_key(const struct rb_node *lhs, const void *key)
57 {
58 struct pandecode_mapped_memory *mem = to_mapped_memory(lhs);
59 uint64_t *gpu_va = (uint64_t *)key;
60
61 if (mem->gpu_va <= *gpu_va && *gpu_va < (mem->gpu_va + mem->length))
62 return 0;
63 else
64 return mem->gpu_va - *gpu_va;
65 }
66
67 static int
pandecode_cmp(const struct rb_node * lhs,const struct rb_node * rhs)68 pandecode_cmp(const struct rb_node *lhs, const struct rb_node *rhs)
69 {
70 return to_mapped_memory(lhs)->gpu_va - to_mapped_memory(rhs)->gpu_va;
71 }
72
73 static struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context * ctx,uint64_t addr)74 pandecode_find_mapped_gpu_mem_containing_rw(struct pandecode_context *ctx,
75 uint64_t addr)
76 {
77 simple_mtx_assert_locked(&ctx->lock);
78
79 struct rb_node *node =
80 rb_tree_search(&ctx->mmap_tree, &addr, pandecode_cmp_key);
81
82 return to_mapped_memory(node);
83 }
84
85 struct pandecode_mapped_memory *
pandecode_find_mapped_gpu_mem_containing(struct pandecode_context * ctx,uint64_t addr)86 pandecode_find_mapped_gpu_mem_containing(struct pandecode_context *ctx,
87 uint64_t addr)
88 {
89 simple_mtx_assert_locked(&ctx->lock);
90
91 struct pandecode_mapped_memory *mem =
92 pandecode_find_mapped_gpu_mem_containing_rw(ctx, addr);
93
94 if (mem && mem->addr && !mem->ro) {
95 mprotect(mem->addr, mem->length, PROT_READ);
96 mem->ro = true;
97 util_dynarray_append(&ctx->ro_mappings, struct pandecode_mapped_memory *,
98 mem);
99 }
100
101 return mem;
102 }
103
104 /*
105 * To check for memory safety issues, validates that the given pointer in GPU
106 * memory is valid, containing at least sz bytes. This function is a tool to
107 * detect GPU-side memory bugs by validating pointers.
108 */
109 void
pandecode_validate_buffer(struct pandecode_context * ctx,mali_ptr addr,size_t sz)110 pandecode_validate_buffer(struct pandecode_context *ctx, mali_ptr addr,
111 size_t sz)
112 {
113 if (!addr) {
114 pandecode_log(ctx, "// XXX: null pointer deref\n");
115 return;
116 }
117
118 /* Find a BO */
119
120 struct pandecode_mapped_memory *bo =
121 pandecode_find_mapped_gpu_mem_containing(ctx, addr);
122
123 if (!bo) {
124 pandecode_log(ctx, "// XXX: invalid memory dereference\n");
125 return;
126 }
127
128 /* Bounds check */
129
130 unsigned offset = addr - bo->gpu_va;
131 unsigned total = offset + sz;
132
133 if (total > bo->length) {
134 pandecode_log(ctx,
135 "// XXX: buffer overrun. "
136 "Chunk of size %zu at offset %d in buffer of size %zu. "
137 "Overrun by %zu bytes. \n",
138 sz, offset, bo->length, total - bo->length);
139 return;
140 }
141 }
142
143 void
pandecode_map_read_write(struct pandecode_context * ctx)144 pandecode_map_read_write(struct pandecode_context *ctx)
145 {
146 simple_mtx_assert_locked(&ctx->lock);
147
148 util_dynarray_foreach(&ctx->ro_mappings, struct pandecode_mapped_memory *,
149 mem) {
150 (*mem)->ro = false;
151 mprotect((*mem)->addr, (*mem)->length, PROT_READ | PROT_WRITE);
152 }
153 util_dynarray_clear(&ctx->ro_mappings);
154 }
155
156 static void
pandecode_add_name(struct pandecode_context * ctx,struct pandecode_mapped_memory * mem,uint64_t gpu_va,const char * name)157 pandecode_add_name(struct pandecode_context *ctx,
158 struct pandecode_mapped_memory *mem, uint64_t gpu_va,
159 const char *name)
160 {
161 simple_mtx_assert_locked(&ctx->lock);
162
163 if (!name) {
164 /* If we don't have a name, assign one */
165
166 snprintf(mem->name, sizeof(mem->name) - 1, "memory_%" PRIx64, gpu_va);
167 } else {
168 assert((strlen(name) + 1) < sizeof(mem->name));
169 memcpy(mem->name, name, strlen(name) + 1);
170 }
171 }
172
173 void
pandecode_inject_mmap(struct pandecode_context * ctx,uint64_t gpu_va,void * cpu,unsigned sz,const char * name)174 pandecode_inject_mmap(struct pandecode_context *ctx, uint64_t gpu_va, void *cpu,
175 unsigned sz, const char *name)
176 {
177 simple_mtx_lock(&ctx->lock);
178
179 /* First, search if we already mapped this and are just updating an address */
180
181 struct pandecode_mapped_memory *existing =
182 pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
183
184 if (existing && existing->gpu_va == gpu_va) {
185 existing->length = sz;
186 existing->addr = cpu;
187 pandecode_add_name(ctx, existing, gpu_va, name);
188 } else {
189 /* Otherwise, add a fresh mapping */
190 struct pandecode_mapped_memory *mapped_mem = NULL;
191
192 mapped_mem = calloc(1, sizeof(*mapped_mem));
193 mapped_mem->gpu_va = gpu_va;
194 mapped_mem->length = sz;
195 mapped_mem->addr = cpu;
196 pandecode_add_name(ctx, mapped_mem, gpu_va, name);
197
198 /* Add it to the tree */
199 rb_tree_insert(&ctx->mmap_tree, &mapped_mem->node, pandecode_cmp);
200 }
201
202 simple_mtx_unlock(&ctx->lock);
203 }
204
205 void
pandecode_inject_free(struct pandecode_context * ctx,uint64_t gpu_va,unsigned sz)206 pandecode_inject_free(struct pandecode_context *ctx, uint64_t gpu_va,
207 unsigned sz)
208 {
209 simple_mtx_lock(&ctx->lock);
210
211 struct pandecode_mapped_memory *mem =
212 pandecode_find_mapped_gpu_mem_containing_rw(ctx, gpu_va);
213
214 if (mem) {
215 assert(mem->gpu_va == gpu_va);
216 assert(mem->length == sz);
217
218 rb_tree_remove(&ctx->mmap_tree, &mem->node);
219 free(mem);
220 }
221
222 simple_mtx_unlock(&ctx->lock);
223 }
224
225 char *
pointer_as_memory_reference(struct pandecode_context * ctx,uint64_t ptr)226 pointer_as_memory_reference(struct pandecode_context *ctx, uint64_t ptr)
227 {
228 simple_mtx_assert_locked(&ctx->lock);
229
230 struct pandecode_mapped_memory *mapped;
231 char *out = malloc(128);
232
233 /* Try to find the corresponding mapped zone */
234
235 mapped = pandecode_find_mapped_gpu_mem_containing_rw(ctx, ptr);
236
237 if (mapped) {
238 snprintf(out, 128, "%s + %d", mapped->name, (int)(ptr - mapped->gpu_va));
239 return out;
240 }
241
242 /* Just use the raw address if other options are exhausted */
243
244 snprintf(out, 128, "0x%" PRIx64, ptr);
245 return out;
246 }
247
248 void
pandecode_dump_file_open(struct pandecode_context * ctx)249 pandecode_dump_file_open(struct pandecode_context *ctx)
250 {
251 simple_mtx_assert_locked(&ctx->lock);
252
253 /* This does a getenv every frame, so it is possible to use
254 * setenv to change the base at runtime.
255 */
256 const char *dump_file_base =
257 debug_get_option("PANDECODE_DUMP_FILE", "pandecode.dump");
258 if (!strcmp(dump_file_base, "stderr"))
259 ctx->dump_stream = stderr;
260 else if (!ctx->dump_stream) {
261 char buffer[1024];
262 snprintf(buffer, sizeof(buffer), "%s.ctx-%d.%04d", dump_file_base,
263 ctx->id, ctx->dump_frame_count);
264 printf("pandecode: dump command stream to file %s\n", buffer);
265 ctx->dump_stream = fopen(buffer, "w");
266 if (!ctx->dump_stream)
267 fprintf(stderr,
268 "pandecode: failed to open command stream log file %s\n",
269 buffer);
270 }
271 }
272
273 static void
pandecode_dump_file_close(struct pandecode_context * ctx)274 pandecode_dump_file_close(struct pandecode_context *ctx)
275 {
276 simple_mtx_assert_locked(&ctx->lock);
277
278 if (ctx->dump_stream && ctx->dump_stream != stderr) {
279 if (fclose(ctx->dump_stream))
280 perror("pandecode: dump file");
281
282 ctx->dump_stream = NULL;
283 }
284 }
285
286 struct pandecode_context *
pandecode_create_context(bool to_stderr)287 pandecode_create_context(bool to_stderr)
288 {
289 struct pandecode_context *ctx = calloc(1, sizeof(*ctx));
290
291 /* Not thread safe, but we shouldn't ever hit this, and even if we do, the
292 * worst that could happen is having the files dumped with their filenames
293 * in a different order. */
294 ctx->id = num_ctxs++;
295
296 /* This will be initialized later and can be changed at run time through
297 * the PANDECODE_DUMP_FILE environment variable.
298 */
299 ctx->dump_stream = to_stderr ? stderr : NULL;
300
301 rb_tree_init(&ctx->mmap_tree);
302 util_dynarray_init(&ctx->ro_mappings, NULL);
303
304 simple_mtx_t mtx_init = SIMPLE_MTX_INITIALIZER;
305 memcpy(&ctx->lock, &mtx_init, sizeof(simple_mtx_t));
306
307 return ctx;
308 }
309
310 void
pandecode_next_frame(struct pandecode_context * ctx)311 pandecode_next_frame(struct pandecode_context *ctx)
312 {
313 simple_mtx_lock(&ctx->lock);
314
315 pandecode_dump_file_close(ctx);
316 ctx->dump_frame_count++;
317
318 simple_mtx_unlock(&ctx->lock);
319 }
320
321 void
pandecode_destroy_context(struct pandecode_context * ctx)322 pandecode_destroy_context(struct pandecode_context *ctx)
323 {
324 simple_mtx_lock(&ctx->lock);
325
326 rb_tree_foreach_safe(struct pandecode_mapped_memory, it, &ctx->mmap_tree,
327 node) {
328 rb_tree_remove(&ctx->mmap_tree, &it->node);
329 free(it);
330 }
331
332 util_dynarray_fini(&ctx->ro_mappings);
333 pandecode_dump_file_close(ctx);
334
335 simple_mtx_unlock(&ctx->lock);
336
337 free(ctx);
338 }
339
340 void
pandecode_dump_mappings(struct pandecode_context * ctx)341 pandecode_dump_mappings(struct pandecode_context *ctx)
342 {
343 simple_mtx_lock(&ctx->lock);
344
345 pandecode_dump_file_open(ctx);
346
347 rb_tree_foreach(struct pandecode_mapped_memory, it, &ctx->mmap_tree, node) {
348 if (!it->addr || !it->length)
349 continue;
350
351 fprintf(ctx->dump_stream, "Buffer: %s gpu %" PRIx64 "\n\n", it->name,
352 it->gpu_va);
353
354 u_hexdump(ctx->dump_stream, it->addr, it->length, false);
355 fprintf(ctx->dump_stream, "\n");
356 }
357
358 fflush(ctx->dump_stream);
359 simple_mtx_unlock(&ctx->lock);
360 }
361
362 void
pandecode_abort_on_fault(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)363 pandecode_abort_on_fault(struct pandecode_context *ctx, mali_ptr jc_gpu_va,
364 unsigned gpu_id)
365 {
366 simple_mtx_lock(&ctx->lock);
367
368 switch (pan_arch(gpu_id)) {
369 case 4:
370 pandecode_abort_on_fault_v4(ctx, jc_gpu_va);
371 break;
372 case 5:
373 pandecode_abort_on_fault_v5(ctx, jc_gpu_va);
374 break;
375 case 6:
376 pandecode_abort_on_fault_v6(ctx, jc_gpu_va);
377 break;
378 case 7:
379 pandecode_abort_on_fault_v7(ctx, jc_gpu_va);
380 break;
381 case 9:
382 pandecode_abort_on_fault_v9(ctx, jc_gpu_va);
383 break;
384 default:
385 unreachable("Unsupported architecture");
386 }
387
388 simple_mtx_unlock(&ctx->lock);
389 }
390
391 void
pandecode_jc(struct pandecode_context * ctx,mali_ptr jc_gpu_va,unsigned gpu_id)392 pandecode_jc(struct pandecode_context *ctx, mali_ptr jc_gpu_va, unsigned gpu_id)
393 {
394 simple_mtx_lock(&ctx->lock);
395
396 switch (pan_arch(gpu_id)) {
397 case 4:
398 pandecode_jc_v4(ctx, jc_gpu_va, gpu_id);
399 break;
400 case 5:
401 pandecode_jc_v5(ctx, jc_gpu_va, gpu_id);
402 break;
403 case 6:
404 pandecode_jc_v6(ctx, jc_gpu_va, gpu_id);
405 break;
406 case 7:
407 pandecode_jc_v7(ctx, jc_gpu_va, gpu_id);
408 break;
409 case 9:
410 pandecode_jc_v9(ctx, jc_gpu_va, gpu_id);
411 break;
412 default:
413 unreachable("Unsupported architecture");
414 }
415
416 simple_mtx_unlock(&ctx->lock);
417 }
418
419 void
pandecode_cs(struct pandecode_context * ctx,mali_ptr queue_gpu_va,uint32_t size,unsigned gpu_id,uint32_t * regs)420 pandecode_cs(struct pandecode_context *ctx, mali_ptr queue_gpu_va,
421 uint32_t size, unsigned gpu_id, uint32_t *regs)
422 {
423 simple_mtx_lock(&ctx->lock);
424
425 switch (pan_arch(gpu_id)) {
426 case 10:
427 pandecode_cs_v10(ctx, queue_gpu_va, size, gpu_id, regs);
428 break;
429 default:
430 unreachable("Unsupported architecture");
431 }
432
433 simple_mtx_unlock(&ctx->lock);
434 }
435
436 void
pandecode_shader_disassemble(struct pandecode_context * ctx,mali_ptr shader_ptr,unsigned gpu_id)437 pandecode_shader_disassemble(struct pandecode_context *ctx, mali_ptr shader_ptr,
438 unsigned gpu_id)
439 {
440 uint8_t *PANDECODE_PTR_VAR(ctx, code, shader_ptr);
441
442 /* Compute maximum possible size */
443 struct pandecode_mapped_memory *mem =
444 pandecode_find_mapped_gpu_mem_containing(ctx, shader_ptr);
445 size_t sz = mem->length - (shader_ptr - mem->gpu_va);
446
447 /* Print some boilerplate to clearly denote the assembly (which doesn't
448 * obey indentation rules), and actually do the disassembly! */
449
450 pandecode_log_cont(ctx, "\nShader %p (GPU VA %" PRIx64 ") sz %" PRId64 "\n",
451 code, shader_ptr, sz);
452
453 if (pan_arch(gpu_id) >= 9) {
454 disassemble_valhall(ctx->dump_stream, (const uint64_t *)code, sz, true);
455 } else if (pan_arch(gpu_id) >= 6)
456 disassemble_bifrost(ctx->dump_stream, code, sz, false);
457 else
458 disassemble_midgard(ctx->dump_stream, code, sz, gpu_id, true);
459
460 pandecode_log_cont(ctx, "\n\n");
461 }
462