xref: /aosp_15_r20/trusty/kernel/lib/metrics/metrics.c (revision 344aa361028b423587d4ef3fa52a23d194628137)
1 /*
2  * Copyright (c) 2021, Google Inc. All rights reserved
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files
6  * (the "Software"), to deal in the Software without restriction,
7  * including without limitation the rights to use, copy, modify, merge,
8  * publish, distribute, sublicense, and/or sell copies of the Software,
9  * and to permit persons to whom the Software is furnished to do so,
10  * subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be
13  * included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <err.h>
26 #include <interface/metrics/consumer.h>
27 #include <kernel/mutex.h>
28 #include <lib/dpc.h>
29 #include <lib/trusty/handle.h>
30 #include <lib/trusty/ipc.h>
31 #include <lib/trusty/ipc_msg.h>
32 #include <lib/trusty/trusty_app.h>
33 #include <lk/init.h>
34 #include <lk/trace.h>
35 #include <string.h>
36 #include <trusty/uuid.h>
37 
38 #define LOCAL_TRACE (0)
39 
40 /*
41  * Format of the payload is "<UUID>:<app name>", with neither UUID nor app name
42  * being null-terminated. However, unlike APP_NAME_MAX_SIZE, UUID_STR_SIZE
43  * counts the null character. Hence, the maximum size of an app name is
44  * METRICS_MAX_APP_ID_LEN - UUID_STR_SIZE.
45  */
46 static_assert(UUID_STR_SIZE <= METRICS_MAX_APP_ID_LEN);
47 #define APP_NAME_MAX_SIZE (METRICS_MAX_APP_ID_LEN - UUID_STR_SIZE)
48 
49 /**
50  * enum chan_state - states of the metrics consumer channel event handler
51  * CHAN_STATE_WAITING_CHAN_READY:
52  *      Inital state of the channel handler. At this point we are waiting for an
53  *      IPC_HANDLE_POLL_READY channel event that signifies that metrics consumer
54  *      connection is ready for use. After consuming this event, we transition
55  *      to %CHAN_STATE_IDLE state.
56  * CHAN_STATE_IDLE:
57  *      While in this state we (2) can not consume any events from the channel
58  *      (1) can only send one message over the channel. Once a message is sent,
59  *      we transition to either %CHAN_STATE_WAITING_CRASH_RESP or
60  *      %CHAN_STATE_WAITING_EXIT_RESP or %CHAN_STATE_WAITING_EVENT_DROP_RESP
61  *      depending on what message was sent.
62  * CHAN_STATE_WAITING_CRASH_RESP:
63  *      In this state we are waiting for a response to a message about an app
64  *      crash.  After receiving the response message, we transition to
65  *      %CHAN_STATE_IDLE state.
66  * CHAN_STATE_WAITING_EXIT_RESP:
67  *      In this state we are waiting for a response to a message about an app
68  *      crash.  After receiving the response message, we transition to
69  *      %CHAN_STATE_IDLE state.
70  * CHAN_STATE_WAITING_EVENT_DROP_RESP:
71  *      In this state we are waiting for a response to a message about an event
72  *      drop. After receiving the response message, we transition to
73  *      %CHAN_STATE_IDLE state.
74  */
75 enum chan_state {
76     CHAN_STATE_WAITING_CHAN_READY = 0,
77     CHAN_STATE_IDLE = 1,
78     CHAN_STATE_WAITING_CRASH_RESP = 2,
79     CHAN_STATE_WAITING_EXIT_RESP = 3,
80     CHAN_STATE_WAITING_EVENT_DROP_RESP = 4,
81 };
82 
83 struct metrics_ctx {
84     struct handle* chan;
85     enum chan_state chan_state;
86     bool event_dropped;
87 };
88 
89 static struct metrics_ctx ctx;
90 static mutex_t ctx_lock = MUTEX_INITIAL_VALUE(ctx_lock);
91 
recv_resp(struct handle * chan,uint32_t cmd)92 static int recv_resp(struct handle* chan, uint32_t cmd) {
93     int rc;
94     struct ipc_msg_info msg_info;
95     struct metrics_resp resp;
96 
97     rc = ipc_get_msg(chan, &msg_info);
98     if (rc != NO_ERROR) {
99         TRACEF("failed (%d) to get message\n", rc);
100         return rc;
101     }
102 
103     struct iovec_kern iov = {
104             .iov_base = &resp,
105             .iov_len = sizeof(resp),
106     };
107     struct ipc_msg_kern ipc_msg = {
108             .num_iov = 1,
109             .iov = &iov,
110             .num_handles = 0,
111             .handles = NULL,
112     };
113     rc = ipc_read_msg(chan, msg_info.id, 0, &ipc_msg);
114     ipc_put_msg(chan, msg_info.id);
115 
116     if (rc < 0) {
117         TRACEF("failed (%d) ipc_read_msg().\n", rc);
118         return rc;
119     }
120 
121     if (rc != sizeof(resp)) {
122         TRACEF("unexpected number of bytes received: %d.\n", rc);
123         return ERR_BAD_LEN;
124     }
125 
126     if (resp.cmd != (cmd | METRICS_CMD_RESP_BIT)) {
127         TRACEF("unknown command received: %u %u.\n", resp.cmd, cmd);
128         return ERR_CMD_UNKNOWN;
129     }
130 
131     if (resp.status != METRICS_NO_ERROR) {
132         TRACEF("event report failure: %d.\n", resp.status);
133         /* This error is not severe enough to close the connection. */
134     }
135 
136     return NO_ERROR;
137 }
138 
send_req(struct handle * chan,struct ipc_msg_kern * ipc_msg,size_t total_len)139 static int send_req(struct handle* chan,
140                     struct ipc_msg_kern* ipc_msg,
141                     size_t total_len) {
142     int rc = ipc_send_msg(chan, ipc_msg);
143     if (rc < 0) {
144         TRACEF("failed (%d) to send message\n", rc);
145         return rc;
146     }
147 
148     if (rc != (int)total_len) {
149         TRACEF("unexpected number of bytes sent: %d\n", rc);
150         return ERR_BAD_LEN;
151     }
152 
153     return NO_ERROR;
154 }
155 
report_crash(struct handle * chan,struct trusty_app * app,const struct trusty_error_args * error_args)156 static int report_crash(struct handle* chan,
157                         struct trusty_app* app,
158                         const struct trusty_error_args* error_args) {
159     int rc;
160     struct metrics_req req = {};
161     struct metrics_report_crash_req args = {};
162     size_t total_len;
163 
164     DEBUG_ASSERT(is_mutex_held(&ctx_lock));
165 
166     uuid_to_str(&app->props.uuid, args.app_id);
167 
168     req.cmd = METRICS_CMD_REPORT_CRASH;
169     args.crash_reason = error_args->reason;
170     args.far = error_args->far;
171     memcpy(args.far_hash, error_args->far_hash, sizeof(args.far_hash));
172     args.elr = error_args->elr;
173     memcpy(args.elr_hash, error_args->elr_hash, sizeof(args.elr_hash));
174     args.is_hash = error_args->is_hash;
175 
176     struct iovec_kern iovs[] = {
177             {
178                     .iov_base = &req,
179                     .iov_len = sizeof(req),
180             },
181             {
182                     .iov_base = &args,
183                     .iov_len = sizeof(args),
184             },
185     };
186     struct ipc_msg_kern ipc_msg = {
187             .num_iov = countof(iovs),
188             .iov = iovs,
189     };
190 
191     total_len = sizeof(req) + sizeof(args);
192     rc = send_req(chan, &ipc_msg, total_len);
193     if (rc != NO_ERROR) {
194         TRACEF("failed (%d) report app crash\n", rc);
195         return rc;
196     }
197 
198     return NO_ERROR;
199 }
200 
report_exit(struct handle * chan,struct trusty_app * app,const struct trusty_error_args * error_args)201 static int report_exit(struct handle* chan,
202                         struct trusty_app* app,
203                         const struct trusty_error_args* error_args) {
204     int rc;
205     struct metrics_req req = {};
206     struct metrics_report_exit_req args = {};
207     size_t total_len;
208 
209     DEBUG_ASSERT(is_mutex_held(&ctx_lock));
210 
211     uuid_to_str(&app->props.uuid, args.app_id);
212 
213     req.cmd = METRICS_CMD_REPORT_EXIT;
214     args.exit_code = error_args->reason;
215 
216     struct iovec_kern iovs[] = {
217             {
218                     .iov_base = &req,
219                     .iov_len = sizeof(req),
220             },
221             {
222                     .iov_base = &args,
223                     .iov_len = sizeof(args),
224             },
225     };
226     struct ipc_msg_kern ipc_msg = {
227             .num_iov = countof(iovs),
228             .iov = iovs,
229     };
230 
231     total_len = sizeof(req) + sizeof(args);
232     rc = send_req(chan, &ipc_msg, total_len);
233     if (rc != NO_ERROR) {
234         TRACEF("failed (%d) report app exit\n", rc);
235         return rc;
236     }
237 
238     return NO_ERROR;
239 }
240 
report_event_drop(struct handle * chan)241 static int report_event_drop(struct handle* chan) {
242     int rc;
243     struct metrics_req req;
244 
245     DEBUG_ASSERT(is_mutex_held(&ctx_lock));
246 
247     req.cmd = METRICS_CMD_REPORT_EVENT_DROP;
248     req.reserved = 0;
249 
250     struct iovec_kern iov = {
251             .iov_base = &req,
252             .iov_len = sizeof(req),
253     };
254     struct ipc_msg_kern ipc_msg = {
255             .num_iov = 1,
256             .iov = &iov,
257     };
258 
259     rc = send_req(chan, &ipc_msg, sizeof(req));
260     if (rc != NO_ERROR) {
261         TRACEF("failed (%d) report event drop\n", rc);
262         return rc;
263     }
264 
265     return NO_ERROR;
266 }
267 
on_ta_crash(struct trusty_app * app,const struct trusty_error_args * error_args)268 static int on_ta_crash(struct trusty_app* app, const struct trusty_error_args* error_args) {
269     int rc;
270 
271     mutex_acquire(&ctx_lock);
272 
273     if (ctx.chan_state != CHAN_STATE_IDLE) {
274         TRACEF("there is a metrics event still in progress or metrics TA "
275                "is unavailable\n");
276         ctx.event_dropped = true;
277         goto out;
278     }
279 
280     if (!ctx.chan) {
281         TRACEF("failed get metrics consumer channel\n");
282         goto out;
283     }
284 
285     if(error_args->is_crash) {
286         rc = report_crash(ctx.chan, app, error_args);
287         ctx.chan_state = CHAN_STATE_WAITING_CRASH_RESP;
288     }
289     else {
290         rc = report_exit(ctx.chan, app, error_args);
291         ctx.chan_state = CHAN_STATE_WAITING_EXIT_RESP;
292     }
293     if (rc != NO_ERROR) {
294         TRACEF("failed (%d) report app crash\n", rc);
295         goto err;
296     }
297 
298     goto out;
299 
300 err:
301     handle_close(ctx.chan);
302     ctx.chan = NULL;
303 out:
304     mutex_release(&ctx_lock);
305     /*
306      * Returning an error here will bring down the kernel. Metrics reporting
307      * isn't critical. So, we always return NO_ERROR. If something goes wrong,
308      * printing an error should suffice.
309      */
310     return NO_ERROR;
311 }
312 
313 static struct trusty_app_notifier notifier = {
314         .crash = on_ta_crash,
315 };
316 
handle_chan(struct dpc * work)317 static void handle_chan(struct dpc* work) {
318     int rc;
319     uint32_t event;
320 
321     mutex_acquire(&ctx_lock);
322 
323     event = ctx.chan->ops->poll(ctx.chan, ~0U, true);
324     if (event & IPC_HANDLE_POLL_HUP) {
325         TRACEF("received IPC_HANDLE_POLL_HUP, closing channel\n");
326         goto err;
327     }
328 
329     switch (ctx.chan_state) {
330     case CHAN_STATE_WAITING_CHAN_READY:
331         if (!(event & IPC_HANDLE_POLL_READY)) {
332             TRACEF("unexpected channel event: 0x%x\n", event);
333             goto err;
334         }
335 
336         ctx.chan_state = CHAN_STATE_IDLE;
337         goto out;
338 
339     case CHAN_STATE_IDLE:
340         TRACEF("unexpected channel event: 0x%x\n", event);
341         goto err;
342 
343     case CHAN_STATE_WAITING_CRASH_RESP:
344         if (!(event & IPC_HANDLE_POLL_MSG)) {
345             TRACEF("unexpected channel event: 0x%x\n", event);
346             goto err;
347         }
348 
349         rc = recv_resp(ctx.chan, METRICS_CMD_REPORT_CRASH);
350         if (rc != NO_ERROR) {
351             TRACEF("failed (%d) receive response\n", rc);
352             goto err;
353         }
354 
355         ctx.chan_state = CHAN_STATE_IDLE;
356 
357         if (ctx.event_dropped) {
358             rc = report_event_drop(ctx.chan);
359             if (rc != NO_ERROR) {
360                 TRACEF("failed (%d) report event drop\n", rc);
361                 goto err;
362             }
363             ctx.chan_state = CHAN_STATE_WAITING_EVENT_DROP_RESP;
364             goto out;
365         }
366 
367         goto out;
368 
369     case CHAN_STATE_WAITING_EXIT_RESP:
370         if (!(event & IPC_HANDLE_POLL_MSG)) {
371             TRACEF("unexpected channel event: 0x%x\n", event);
372             goto err;
373         }
374 
375         rc = recv_resp(ctx.chan, METRICS_CMD_REPORT_EXIT);
376         if (rc != NO_ERROR) {
377             TRACEF("failed (%d) receive response\n", rc);
378             goto err;
379         }
380 
381         ctx.chan_state = CHAN_STATE_IDLE;
382 
383         if (ctx.event_dropped) {
384             rc = report_event_drop(ctx.chan);
385             if (rc != NO_ERROR) {
386                 TRACEF("failed (%d) report event drop\n", rc);
387                 goto err;
388             }
389             ctx.chan_state = CHAN_STATE_WAITING_EVENT_DROP_RESP;
390             goto out;
391         }
392 
393         goto out;
394 
395     case CHAN_STATE_WAITING_EVENT_DROP_RESP:
396         if (!(event & IPC_HANDLE_POLL_MSG)) {
397             TRACEF("unexpected channel event: 0x%x\n", event);
398             goto err;
399         }
400 
401         rc = recv_resp(ctx.chan, METRICS_CMD_REPORT_EVENT_DROP);
402         if (rc != NO_ERROR) {
403             TRACEF("failed (%d) receive response\n", rc);
404             goto err;
405         }
406 
407         ctx.chan_state = CHAN_STATE_IDLE;
408         ctx.event_dropped = false;
409         goto out;
410     }
411 
412 err:
413     handle_close(ctx.chan);
414     ctx.chan = NULL;
415 out:
416     mutex_release(&ctx_lock);
417 }
418 
419 static struct dpc chan_event_work = {
420         .node = LIST_INITIAL_CLEARED_VALUE,
421         .cb = handle_chan,
422 };
423 
on_handle_event(struct handle_waiter * waiter)424 static void on_handle_event(struct handle_waiter* waiter) {
425     int rc = dpc_enqueue_work(NULL, &chan_event_work, false);
426     if (rc != NO_ERROR) {
427         TRACEF("failed (%d) to enqueue dpc work\n", rc);
428     }
429 }
430 
431 static struct handle_waiter waiter = {
432         .node = LIST_INITIAL_CLEARED_VALUE,
433         .notify_proc = on_handle_event,
434 };
435 
metrics_init(uint level)436 static void metrics_init(uint level) {
437     int rc = ipc_port_connect_async(&kernel_uuid, METRICS_CONSUMER_PORT,
438                                     IPC_PORT_PATH_MAX,
439                                     IPC_CONNECT_WAIT_FOR_PORT, &ctx.chan);
440     if (rc) {
441         TRACEF("failed (%d) to connect to port\n", rc);
442         goto err_port_connect;
443     }
444 
445     rc = trusty_register_app_notifier(&notifier);
446     if (rc) {
447         TRACEF("failed (%d) to register app notifier\n", rc);
448         goto err_app_notifier;
449     }
450 
451     ctx.chan_state = CHAN_STATE_WAITING_CHAN_READY;
452     handle_add_waiter(ctx.chan, &waiter);
453 
454     return;
455 
456 err_app_notifier:
457     handle_close(ctx.chan);
458     ctx.chan = NULL;
459 err_port_connect:
460     return;
461 }
462 
463 /* Need to init before (LK_INIT_LEVEL_APPS - 1) to register an app notifier. */
464 LK_INIT_HOOK(metrics, metrics_init, LK_INIT_LEVEL_APPS - 2);
465