xref: /aosp_15_r20/external/bcc/tools/biosnoop.lua (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1#!/usr/bin/env bcc-lua
2--[[
3Copyright 2016 GitHub, Inc
4
5Licensed under the Apache License, Version 2.0 (the "License");
6you may not use this file except in compliance with the License.
7You may obtain a copy of the License at
8
9http://www.apache.org/licenses/LICENSE-2.0
10
11Unless required by applicable law or agreed to in writing, software
12distributed under the License is distributed on an "AS IS" BASIS,
13WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14See the License for the specific language governing permissions and
15limitations under the License.
16--]]
17
18local program = [[
19#include <uapi/linux/ptrace.h>
20#include <linux/blkdev.h>
21
22struct val_t {
23    u32 pid;
24    char name[TASK_COMM_LEN];
25};
26
27struct data_t {
28    u32 pid;
29    u64 rwflag;
30    u64 delta;
31    u64 sector;
32    u64 len;
33    u64 ts;
34    char disk_name[DISK_NAME_LEN];
35    char name[TASK_COMM_LEN];
36};
37
38BPF_HASH(start, struct request *);
39BPF_HASH(infobyreq, struct request *, struct val_t);
40BPF_PERF_OUTPUT(events);
41
42// cache PID and comm by-req
43int trace_pid_start(struct pt_regs *ctx, struct request *req)
44{
45    struct val_t val = {};
46
47    if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
48        val.pid = bpf_get_current_pid_tgid();
49        infobyreq.update(&req, &val);
50    }
51    return 0;
52}
53
54// time block I/O
55int trace_req_start(struct pt_regs *ctx, struct request *req)
56{
57    u64 ts;
58
59    ts = bpf_ktime_get_ns();
60    start.update(&req, &ts);
61
62    return 0;
63}
64
65// output
66int trace_req_completion(struct pt_regs *ctx, struct request *req)
67{
68    u64 *tsp, delta;
69    u32 *pidp = 0;
70    struct val_t *valp;
71    struct data_t data ={};
72    u64 ts;
73
74    // fetch timestamp and calculate delta
75    tsp = start.lookup(&req);
76    if (tsp == 0) {
77        // missed tracing issue
78        return 0;
79    }
80    ts = bpf_ktime_get_ns();
81    data.delta = ts - *tsp;
82    data.ts = ts / 1000;
83
84    valp = infobyreq.lookup(&req);
85    if (valp == 0) {
86        data.len = req->__data_len;
87        data.name[0] = '?';
88        data.name[1] = 0;
89    } else {
90        data.pid = valp->pid;
91        data.len = req->__data_len;
92        data.sector = req->__sector;
93        bpf_probe_read_kernel(&data.name, sizeof(data.name), valp->name);
94        bpf_probe_read_kernel(&data.disk_name, sizeof(data.disk_name),
95                       req->rq_disk->disk_name);
96    }
97
98/*
99 * The following deals with a kernel version change (in mainline 4.7, although
100 * it may be backported to earlier kernels) with how block request write flags
101 * are tested. We handle both pre- and post-change versions here. Please avoid
102 * kernel version tests like this as much as possible: they inflate the code,
103 * test, and maintenance burden.
104 */
105#ifdef REQ_WRITE
106    data.rwflag = !!(req->cmd_flags & REQ_WRITE);
107#elif defined(REQ_OP_SHIFT)
108    data.rwflag = !!((req->cmd_flags >> REQ_OP_SHIFT) == REQ_OP_WRITE);
109#else
110    data.rwflag = !!((req->cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE);
111#endif
112
113    events.perf_submit(ctx,&data,sizeof(data));
114    start.delete(&req);
115    infobyreq.delete(&req);
116
117    return 0;
118}
119]]
120
121local ffi = require("ffi")
122
123return function(BPF, utils)
124  local bpf = BPF:new{text=program}
125
126  bpf:attach_kprobe{event="blk_account_io_start", fn_name="trace_pid_start"}
127  bpf:attach_kprobe{event="blk_start_request", fn_name="trace_req_start"}
128  bpf:attach_kprobe{event="blk_mq_start_request", fn_name="trace_req_start"}
129  bpf:attach_kprobe{event="blk_account_io_done",
130      fn_name="trace_req_completion"}
131
132  print("%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s" % {"TIME(s)", "COMM", "PID",
133    "DISK", "T", "SECTOR", "BYTES", "LAT(ms)"})
134
135  local rwflg = ""
136  local start_ts = 0
137  local prev_ts = 0
138  local delta = 0
139
140  local function print_event(cpu, event)
141    local val = -1
142    local event_pid = event.pid
143    local event_delta = tonumber(event.delta)
144    local event_sector = tonumber(event.sector)
145    local event_len = tonumber(event.len)
146    local event_ts = tonumber(event.ts)
147    local event_disk_name = ffi.string(event.disk_name)
148    local event_name = ffi.string(event.name)
149
150    if event.rwflag == 1 then
151      rwflg = "W"
152    end
153
154    if event.rwflag == 0 then
155      rwflg = "R"
156    end
157
158    if not event_name:match("%?") then
159      val = event_sector
160    end
161
162    if start_ts == 0 then
163      prev_ts = start_ts
164    end
165
166    if start_ts == 1 then
167      delta = delta + (event_ts - prev_ts)
168    end
169
170    print("%-14.9f %-14.14s %-6s %-7s %-2s %-9s %-7s %7.2f" % {
171      delta / 1000000, event_name, event_pid, event_disk_name, rwflg, val,
172      event_len, event_delta / 1000000})
173
174    prev_ts = event_ts
175    start_ts = 1
176  end
177
178  local TASK_COMM_LEN = 16 -- linux/sched.h
179  local DISK_NAME_LEN = 32 -- linux/genhd.h
180
181  bpf:get_table("events"):open_perf_buffer(print_event, [[
182    struct {
183      uint32_t pid;
184      uint64_t rwflag;
185      uint64_t delta;
186      uint64_t sector;
187      uint64_t len;
188      uint64_t ts;
189      char disk_name[$];
190      char name[$];
191    }
192  ]], {DISK_NAME_LEN, TASK_COMM_LEN}, 64)
193  bpf:perf_buffer_poll_loop()
194end
195