1 /*
2 * Copyright 2021-2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5 #include <assert.h>
6 #include <dlfcn.h>
7 #include <inttypes.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <unistd.h>
11
12 #include <IOKit/IOKitLib.h>
13 #include <mach/mach.h>
14
15 #include "util/compiler.h"
16 #include "util/u_hexdump.h"
17 #include "agx_iokit.h"
18 #include "decode.h"
19 #include "dyld_interpose.h"
20 #include "util.h"
21
22 /*
23 * Wrap IOKit entrypoints to intercept communication between the AGX kernel
24 * extension and userspace clients. IOKit prototypes are public from the IOKit
25 * source release.
26 */
27
28 mach_port_t metal_connection = 0;
29
30 kern_return_t
wrap_Method(mach_port_t connection,uint32_t selector,const uint64_t * input,uint32_t inputCnt,const void * inputStruct,size_t inputStructCnt,uint64_t * output,uint32_t * outputCnt,void * outputStruct,size_t * outputStructCntP)31 wrap_Method(mach_port_t connection, uint32_t selector, const uint64_t *input,
32 uint32_t inputCnt, const void *inputStruct, size_t inputStructCnt,
33 uint64_t *output, uint32_t *outputCnt, void *outputStruct,
34 size_t *outputStructCntP)
35 {
36 /* Heuristic guess which connection is Metal, skip over I/O from everything
37 * else. This is technically wrong but it works in practice, and reduces the
38 * surface area we need to wrap.
39 */
40 if (selector == AGX_SELECTOR_SET_API) {
41 metal_connection = connection;
42 } else if (metal_connection != connection) {
43 return IOConnectCallMethod(connection, selector, input, inputCnt,
44 inputStruct, inputStructCnt, output, outputCnt,
45 outputStruct, outputStructCntP);
46 }
47
48 printf("Selector %u, %X, %X\n", selector, connection, metal_connection);
49
50 /* Check the arguments make sense */
51 assert((input != NULL) == (inputCnt != 0));
52 assert((inputStruct != NULL) == (inputStructCnt != 0));
53 assert((output != NULL) == (outputCnt != 0));
54 assert((outputStruct != NULL) == (outputStructCntP != 0));
55
56 /* Dump inputs */
57 switch (selector) {
58 case AGX_SELECTOR_SET_API:
59 assert(input == NULL && output == NULL && outputStruct == NULL);
60 assert(inputStruct != NULL && inputStructCnt == 16);
61 assert(((uint8_t *)inputStruct)[15] == 0x0);
62
63 printf("%X: SET_API(%s)\n", connection, (const char *)inputStruct);
64 break;
65
66 case AGX_SELECTOR_ALLOCATE_MEM: {
67 const struct agx_allocate_resource_req *req = inputStruct;
68 struct agx_allocate_resource_req *req2 = (void *)inputStruct;
69 req2->mode = (req->mode & 0x800) | 0x430;
70
71 bool suballocated = req->mode & 0x800;
72
73 printf("Resource allocation:\n");
74 printf(" Mode: 0x%X%s\n", req->mode & ~0x800,
75 suballocated ? " (suballocated) " : "");
76 printf(" CPU fixed: 0x%" PRIx64 "\n", req->cpu_fixed);
77 printf(" CPU fixed (parent): 0x%" PRIx64 "\n", req->cpu_fixed_parent);
78 printf(" Size: 0x%X\n", req->size);
79 printf(" Flags: 0x%X\n", req->flags);
80
81 if (suballocated) {
82 printf(" Parent: %u\n", req->parent);
83 } else {
84 assert(req->parent == 0);
85 }
86
87 for (unsigned i = 0; i < ARRAY_SIZE(req->unk0); ++i) {
88 if (req->unk0[i])
89 printf(" UNK%u: 0x%X\n", 0 + i, req->unk0[i]);
90 }
91
92 for (unsigned i = 0; i < ARRAY_SIZE(req->unk6); ++i) {
93 if (req->unk6[i])
94 printf(" UNK%u: 0x%X\n", 6 + i, req->unk6[i]);
95 }
96
97 if (req->unk17)
98 printf(" UNK17: 0x%X\n", req->unk17);
99
100 if (req->unk19)
101 printf(" UNK19: 0x%X\n", req->unk19);
102
103 for (unsigned i = 0; i < ARRAY_SIZE(req->unk21); ++i) {
104 if (req->unk21[i])
105 printf(" UNK%u: 0x%X\n", 21 + i, req->unk21[i]);
106 }
107
108 break;
109 }
110
111 case AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS:
112 assert(output == NULL && outputStruct == NULL);
113 assert(inputCnt == 1);
114
115 printf("%X: SUBMIT_COMMAND_BUFFERS command queue id:%llx %p\n",
116 connection, input[0], inputStruct);
117
118 const struct IOAccelCommandQueueSubmitArgs_Header *hdr = inputStruct;
119 const struct IOAccelCommandQueueSubmitArgs_Command *cmds =
120 (void *)(hdr + 1);
121
122 for (unsigned i = 0; i < hdr->count; ++i) {
123 const struct IOAccelCommandQueueSubmitArgs_Command *req = &cmds[i];
124 agxdecode_cmdstream(req->command_buffer_shmem_id,
125 req->segment_list_shmem_id, true);
126 if (getenv("ASAHI_DUMP"))
127 agxdecode_dump_mappings(req->segment_list_shmem_id);
128 }
129
130 agxdecode_next_frame();
131 FALLTHROUGH;
132
133 default:
134 printf("%X: call %s (out %p, %zu)", connection,
135 wrap_selector_name(selector), outputStructCntP,
136 outputStructCntP ? *outputStructCntP : 0);
137
138 for (uint64_t u = 0; u < inputCnt; ++u)
139 printf(" %llx", input[u]);
140
141 if (inputStructCnt) {
142 printf(", struct:\n");
143 u_hexdump(stdout, inputStruct, inputStructCnt, true);
144 } else {
145 printf("\n");
146 }
147
148 break;
149 }
150
151 /* Invoke the real method */
152 kern_return_t ret = IOConnectCallMethod(
153 connection, selector, input, inputCnt, inputStruct, inputStructCnt,
154 output, outputCnt, outputStruct, outputStructCntP);
155
156 if (ret != 0)
157 printf("return %u\n", ret);
158
159 /* Track allocations for later analysis (dumping, disassembly, etc) */
160 switch (selector) {
161 case AGX_SELECTOR_CREATE_SHMEM: {
162 assert(inputCnt == 2);
163 assert((*outputStructCntP) == 0x10);
164 uint64_t *inp = (uint64_t *)input;
165
166 uint8_t type = inp[1];
167
168 assert(type <= 2);
169 if (type == 2)
170 printf("(cmdbuf with error reporting)\n");
171
172 uint64_t *ptr = (uint64_t *)outputStruct;
173 uint32_t *words = (uint32_t *)(ptr + 1);
174 bool mmap = inp[1];
175
176 /* Construct a synthetic GEM handle for the shmem */
177 agxdecode_track_alloc(&(struct agx_bo){
178 .handle = words[1] ^ (mmap ? (1u << 30) : (1u << 29)),
179 .ptr.cpu = (void *)*ptr,
180 .size = words[0],
181 });
182
183 break;
184 }
185
186 case AGX_SELECTOR_ALLOCATE_MEM: {
187 assert((*outputStructCntP) == 0x50);
188 const struct agx_allocate_resource_req *req = inputStruct;
189 struct agx_allocate_resource_resp *resp = outputStruct;
190 if (resp->cpu && req->cpu_fixed)
191 assert(resp->cpu == req->cpu_fixed);
192 printf("Response:\n");
193 printf(" GPU VA: 0x%" PRIx64 "\n", resp->gpu_va);
194 printf(" CPU VA: 0x%" PRIx64 "\n", resp->cpu);
195 printf(" Handle: %u\n", resp->handle);
196 printf(" Root size: 0x%" PRIx64 "\n", resp->root_size);
197 printf(" Suballocation size: 0x%" PRIx64 "\n", resp->sub_size);
198 printf(" GUID: 0x%X\n", resp->guid);
199 for (unsigned i = 0; i < ARRAY_SIZE(resp->unk4); ++i) {
200 if (resp->unk4[i])
201 printf(" UNK%u: 0x%X\n", 4 + i, resp->unk4[i]);
202 }
203 for (unsigned i = 0; i < ARRAY_SIZE(resp->unk11); ++i) {
204 if (resp->unk11[i])
205 printf(" UNK%u: 0x%X\n", 11 + i, resp->unk11[i]);
206 }
207
208 if (req->parent)
209 assert(resp->sub_size <= resp->root_size);
210 else
211 assert(resp->sub_size == resp->root_size);
212
213 agxdecode_track_alloc(&(struct agx_bo){
214 .size = resp->sub_size,
215 .handle = resp->handle,
216 .ptr.gpu = resp->gpu_va,
217 .ptr.cpu = (void *)resp->cpu,
218 });
219
220 break;
221 }
222
223 case AGX_SELECTOR_FREE_MEM: {
224 assert(inputCnt == 1);
225 assert(inputStruct == NULL);
226 assert(output == NULL);
227 assert(outputStruct == NULL);
228
229 agxdecode_track_free(&(struct agx_bo){.handle = input[0]});
230
231 break;
232 }
233
234 case AGX_SELECTOR_FREE_SHMEM: {
235 assert(inputCnt == 1);
236 assert(inputStruct == NULL);
237 assert(output == NULL);
238 assert(outputStruct == NULL);
239
240 agxdecode_track_free(&(struct agx_bo){.handle = input[0] ^ (1u << 29)});
241
242 break;
243 }
244
245 default:
246 /* Dump the outputs */
247 if (outputCnt) {
248 printf("%u scalars: ", *outputCnt);
249
250 for (uint64_t u = 0; u < *outputCnt; ++u)
251 printf("%llx ", output[u]);
252
253 printf("\n");
254 }
255
256 if (outputStructCntP) {
257 printf(" struct\n");
258 u_hexdump(stdout, outputStruct, *outputStructCntP, true);
259
260 if (selector == 2) {
261 /* Dump linked buffer as well */
262 void **o = outputStruct;
263 u_hexdump(stdout, *o, 64, true);
264 }
265 }
266
267 printf("\n");
268 break;
269 }
270
271 return ret;
272 }
273
274 kern_return_t
wrap_AsyncMethod(mach_port_t connection,uint32_t selector,mach_port_t wakePort,uint64_t * reference,uint32_t referenceCnt,const uint64_t * input,uint32_t inputCnt,const void * inputStruct,size_t inputStructCnt,uint64_t * output,uint32_t * outputCnt,void * outputStruct,size_t * outputStructCntP)275 wrap_AsyncMethod(mach_port_t connection, uint32_t selector,
276 mach_port_t wakePort, uint64_t *reference,
277 uint32_t referenceCnt, const uint64_t *input,
278 uint32_t inputCnt, const void *inputStruct,
279 size_t inputStructCnt, uint64_t *output, uint32_t *outputCnt,
280 void *outputStruct, size_t *outputStructCntP)
281 {
282 /* Check the arguments make sense */
283 assert((input != NULL) == (inputCnt != 0));
284 assert((inputStruct != NULL) == (inputStructCnt != 0));
285 assert((output != NULL) == (outputCnt != 0));
286 assert((outputStruct != NULL) == (outputStructCntP != 0));
287
288 printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector,
289 wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
290
291 for (uint64_t u = 0; u < inputCnt; ++u)
292 printf(" %llx", input[u]);
293
294 if (inputStructCnt) {
295 printf(", struct:\n");
296 u_hexdump(stdout, inputStruct, inputStructCnt, true);
297 } else {
298 printf("\n");
299 }
300
301 printf(", references: ");
302 for (unsigned i = 0; i < referenceCnt; ++i)
303 printf(" %llx", reference[i]);
304 printf("\n");
305
306 kern_return_t ret = IOConnectCallAsyncMethod(
307 connection, selector, wakePort, reference, referenceCnt, input, inputCnt,
308 inputStruct, inputStructCnt, output, outputCnt, outputStruct,
309 outputStructCntP);
310
311 printf("return %u", ret);
312
313 if (outputCnt) {
314 printf("%u scalars: ", *outputCnt);
315
316 for (uint64_t u = 0; u < *outputCnt; ++u)
317 printf("%llx ", output[u]);
318
319 printf("\n");
320 }
321
322 if (outputStructCntP) {
323 printf(" struct\n");
324 u_hexdump(stdout, outputStruct, *outputStructCntP, true);
325
326 if (selector == 2) {
327 /* Dump linked buffer as well */
328 void **o = outputStruct;
329 u_hexdump(stdout, *o, 64, true);
330 }
331 }
332
333 printf("\n");
334 return ret;
335 }
336
337 kern_return_t
wrap_StructMethod(mach_port_t connection,uint32_t selector,const void * inputStruct,size_t inputStructCnt,void * outputStruct,size_t * outputStructCntP)338 wrap_StructMethod(mach_port_t connection, uint32_t selector,
339 const void *inputStruct, size_t inputStructCnt,
340 void *outputStruct, size_t *outputStructCntP)
341 {
342 return wrap_Method(connection, selector, NULL, 0, inputStruct,
343 inputStructCnt, NULL, NULL, outputStruct,
344 outputStructCntP);
345 }
346
347 kern_return_t
wrap_AsyncStructMethod(mach_port_t connection,uint32_t selector,mach_port_t wakePort,uint64_t * reference,uint32_t referenceCnt,const void * inputStruct,size_t inputStructCnt,void * outputStruct,size_t * outputStructCnt)348 wrap_AsyncStructMethod(mach_port_t connection, uint32_t selector,
349 mach_port_t wakePort, uint64_t *reference,
350 uint32_t referenceCnt, const void *inputStruct,
351 size_t inputStructCnt, void *outputStruct,
352 size_t *outputStructCnt)
353 {
354 return wrap_AsyncMethod(connection, selector, wakePort, reference,
355 referenceCnt, NULL, 0, inputStruct, inputStructCnt,
356 NULL, NULL, outputStruct, outputStructCnt);
357 }
358
359 kern_return_t
wrap_ScalarMethod(mach_port_t connection,uint32_t selector,const uint64_t * input,uint32_t inputCnt,uint64_t * output,uint32_t * outputCnt)360 wrap_ScalarMethod(mach_port_t connection, uint32_t selector,
361 const uint64_t *input, uint32_t inputCnt, uint64_t *output,
362 uint32_t *outputCnt)
363 {
364 return wrap_Method(connection, selector, input, inputCnt, NULL, 0, output,
365 outputCnt, NULL, NULL);
366 }
367
368 kern_return_t
wrap_AsyncScalarMethod(mach_port_t connection,uint32_t selector,mach_port_t wakePort,uint64_t * reference,uint32_t referenceCnt,const uint64_t * input,uint32_t inputCnt,uint64_t * output,uint32_t * outputCnt)369 wrap_AsyncScalarMethod(mach_port_t connection, uint32_t selector,
370 mach_port_t wakePort, uint64_t *reference,
371 uint32_t referenceCnt, const uint64_t *input,
372 uint32_t inputCnt, uint64_t *output, uint32_t *outputCnt)
373 {
374 return wrap_AsyncMethod(connection, selector, wakePort, reference,
375 referenceCnt, input, inputCnt, NULL, 0, output,
376 outputCnt, NULL, NULL);
377 }
378
379 mach_port_t
wrap_DataQueueAllocateNotificationPort()380 wrap_DataQueueAllocateNotificationPort()
381 {
382 mach_port_t ret = IODataQueueAllocateNotificationPort();
383 printf("Allocated notif port %X\n", ret);
384 return ret;
385 }
386
387 kern_return_t
wrap_SetNotificationPort(io_connect_t connect,uint32_t type,mach_port_t port,uintptr_t reference)388 wrap_SetNotificationPort(io_connect_t connect, uint32_t type, mach_port_t port,
389 uintptr_t reference)
390 {
391 printf(
392 "Set noficiation port connect=%X, type=%X, port=%X, reference=%" PRIx64
393 "\n",
394 connect, type, port, (uint64_t)reference);
395
396 return IOConnectSetNotificationPort(connect, type, port, reference);
397 }
398
399 IOReturn
wrap_DataQueueWaitForAvailableData(IODataQueueMemory * dataQueue,mach_port_t notificationPort)400 wrap_DataQueueWaitForAvailableData(IODataQueueMemory *dataQueue,
401 mach_port_t notificationPort)
402 {
403 printf("Waiting for data queue at notif port %X\n", notificationPort);
404 IOReturn ret = IODataQueueWaitForAvailableData(dataQueue, notificationPort);
405 printf("ret=%X\n", ret);
406 return ret;
407 }
408
409 IODataQueueEntry *
wrap_DataQueuePeek(IODataQueueMemory * dataQueue)410 wrap_DataQueuePeek(IODataQueueMemory *dataQueue)
411 {
412 printf("Peeking data queue\n");
413 return IODataQueuePeek(dataQueue);
414 }
415
416 IOReturn
wrap_DataQueueDequeue(IODataQueueMemory * dataQueue,void * data,uint32_t * dataSize)417 wrap_DataQueueDequeue(IODataQueueMemory *dataQueue, void *data,
418 uint32_t *dataSize)
419 {
420 printf("Dequeueing (dataQueue=%p, data=%p, buffer %u)\n", dataQueue, data,
421 *dataSize);
422 IOReturn ret = IODataQueueDequeue(dataQueue, data, dataSize);
423 printf("Return \"%s\", got %u bytes\n", mach_error_string(ret), *dataSize);
424
425 uint8_t *data8 = data;
426 for (unsigned i = 0; i < *dataSize; ++i) {
427 printf("%02X ", data8[i]);
428 }
429 printf("\n");
430
431 return ret;
432 }
433
434 DYLD_INTERPOSE(wrap_Method, IOConnectCallMethod);
435 DYLD_INTERPOSE(wrap_AsyncMethod, IOConnectCallAsyncMethod);
436 DYLD_INTERPOSE(wrap_StructMethod, IOConnectCallStructMethod);
437 DYLD_INTERPOSE(wrap_AsyncStructMethod, IOConnectCallAsyncStructMethod);
438 DYLD_INTERPOSE(wrap_ScalarMethod, IOConnectCallScalarMethod);
439 DYLD_INTERPOSE(wrap_AsyncScalarMethod, IOConnectCallAsyncScalarMethod);
440 DYLD_INTERPOSE(wrap_SetNotificationPort, IOConnectSetNotificationPort);
441 DYLD_INTERPOSE(wrap_DataQueueAllocateNotificationPort,
442 IODataQueueAllocateNotificationPort);
443 DYLD_INTERPOSE(wrap_DataQueueWaitForAvailableData,
444 IODataQueueWaitForAvailableData);
445 DYLD_INTERPOSE(wrap_DataQueuePeek, IODataQueuePeek);
446 DYLD_INTERPOSE(wrap_DataQueueDequeue, IODataQueueDequeue);
447