1*387f9dfdSAndroid Build Coastguard Worker#!/usr/bin/python 2*387f9dfdSAndroid Build Coastguard Worker# 3*387f9dfdSAndroid Build Coastguard Worker# oomkill Trace oom_kill_process(). For Linux, uses BCC, eBPF. 4*387f9dfdSAndroid Build Coastguard Worker# 5*387f9dfdSAndroid Build Coastguard Worker# This traces the kernel out-of-memory killer, and prints basic details, 6*387f9dfdSAndroid Build Coastguard Worker# including the system load averages. This can provide more context on the 7*387f9dfdSAndroid Build Coastguard Worker# system state at the time of OOM: was it getting busier or steady, based 8*387f9dfdSAndroid Build Coastguard Worker# on the load averages? This tool may also be useful to customize for 9*387f9dfdSAndroid Build Coastguard Worker# investigations; for example, by adding other task_struct details at the time 10*387f9dfdSAndroid Build Coastguard Worker# of OOM. 11*387f9dfdSAndroid Build Coastguard Worker# 12*387f9dfdSAndroid Build Coastguard Worker# Copyright 2016 Netflix, Inc. 13*387f9dfdSAndroid Build Coastguard Worker# Licensed under the Apache License, Version 2.0 (the "License") 14*387f9dfdSAndroid Build Coastguard Worker# 15*387f9dfdSAndroid Build Coastguard Worker# 09-Feb-2016 Brendan Gregg Created this. 16*387f9dfdSAndroid Build Coastguard Worker 17*387f9dfdSAndroid Build Coastguard Workerfrom bcc import BPF 18*387f9dfdSAndroid Build Coastguard Workerfrom time import strftime 19*387f9dfdSAndroid Build Coastguard Workerimport ctypes as ct 20*387f9dfdSAndroid Build Coastguard Worker 21*387f9dfdSAndroid Build Coastguard Worker# linux stats 22*387f9dfdSAndroid Build Coastguard Workerloadavg = "/proc/loadavg" 23*387f9dfdSAndroid Build Coastguard Worker 24*387f9dfdSAndroid Build Coastguard Worker# define BPF program 25*387f9dfdSAndroid Build Coastguard Workerbpf_text = """ 26*387f9dfdSAndroid Build Coastguard Worker#include <uapi/linux/ptrace.h> 27*387f9dfdSAndroid Build Coastguard Worker#include <linux/oom.h> 28*387f9dfdSAndroid Build Coastguard Worker 29*387f9dfdSAndroid Build Coastguard Workerstruct data_t { 30*387f9dfdSAndroid Build Coastguard Worker u64 fpid; 31*387f9dfdSAndroid Build Coastguard Worker u64 tpid; 32*387f9dfdSAndroid Build Coastguard Worker u64 pages; 33*387f9dfdSAndroid Build Coastguard Worker char fcomm[TASK_COMM_LEN]; 34*387f9dfdSAndroid Build Coastguard Worker char tcomm[TASK_COMM_LEN]; 35*387f9dfdSAndroid Build Coastguard Worker}; 36*387f9dfdSAndroid Build Coastguard Worker 37*387f9dfdSAndroid Build Coastguard WorkerBPF_PERF_OUTPUT(events); 38*387f9dfdSAndroid Build Coastguard Worker 39*387f9dfdSAndroid Build Coastguard Workervoid kprobe__oom_kill_process(struct pt_regs *ctx, struct oom_control *oc, 40*387f9dfdSAndroid Build Coastguard Worker struct task_struct *p, unsigned int points, unsigned long totalpages) 41*387f9dfdSAndroid Build Coastguard Worker{ 42*387f9dfdSAndroid Build Coastguard Worker struct data_t data = {}; 43*387f9dfdSAndroid Build Coastguard Worker u32 pid = bpf_get_current_pid_tgid(); 44*387f9dfdSAndroid Build Coastguard Worker data.fpid = pid; 45*387f9dfdSAndroid Build Coastguard Worker data.tpid = p->pid; 46*387f9dfdSAndroid Build Coastguard Worker data.pages = totalpages; 47*387f9dfdSAndroid Build Coastguard Worker bpf_get_current_comm(&data.fcomm, sizeof(data.fcomm)); 48*387f9dfdSAndroid Build Coastguard Worker bpf_probe_read(&data.tcomm, sizeof(data.tcomm), p->comm); 49*387f9dfdSAndroid Build Coastguard Worker events.perf_submit(ctx, &data, sizeof(data)); 50*387f9dfdSAndroid Build Coastguard Worker} 51*387f9dfdSAndroid Build Coastguard Worker""" 52*387f9dfdSAndroid Build Coastguard Worker 53*387f9dfdSAndroid Build Coastguard Worker# kernel->user event data: struct data_t 54*387f9dfdSAndroid Build Coastguard WorkerTASK_COMM_LEN = 16 # linux/sched.h 55*387f9dfdSAndroid Build Coastguard Workerclass Data(ct.Structure): 56*387f9dfdSAndroid Build Coastguard Worker _fields_ = [ 57*387f9dfdSAndroid Build Coastguard Worker ("fpid", ct.c_ulonglong), 58*387f9dfdSAndroid Build Coastguard Worker ("tpid", ct.c_ulonglong), 59*387f9dfdSAndroid Build Coastguard Worker ("pages", ct.c_ulonglong), 60*387f9dfdSAndroid Build Coastguard Worker ("fcomm", ct.c_char * TASK_COMM_LEN), 61*387f9dfdSAndroid Build Coastguard Worker ("tcomm", ct.c_char * TASK_COMM_LEN) 62*387f9dfdSAndroid Build Coastguard Worker ] 63*387f9dfdSAndroid Build Coastguard Worker 64*387f9dfdSAndroid Build Coastguard Worker# process event 65*387f9dfdSAndroid Build Coastguard Workerdef print_event(cpu, data, size): 66*387f9dfdSAndroid Build Coastguard Worker event = ct.cast(data, ct.POINTER(Data)).contents 67*387f9dfdSAndroid Build Coastguard Worker with open(loadavg) as stats: 68*387f9dfdSAndroid Build Coastguard Worker avgline = stats.read().rstrip() 69*387f9dfdSAndroid Build Coastguard Worker print(("%s Triggered by PID %d (\"%s\"), OOM kill of PID %d (\"%s\")" 70*387f9dfdSAndroid Build Coastguard Worker ", %d pages, loadavg: %s") % (strftime("%H:%M:%S"), event.fpid, 71*387f9dfdSAndroid Build Coastguard Worker event.fcomm.decode('utf-8', 'replace'), event.tpid, 72*387f9dfdSAndroid Build Coastguard Worker event.tcomm.decode('utf-8', 'replace'), event.pages, avgline)) 73*387f9dfdSAndroid Build Coastguard Worker 74*387f9dfdSAndroid Build Coastguard Worker# initialize BPF 75*387f9dfdSAndroid Build Coastguard Workerb = BPF(text=bpf_text) 76*387f9dfdSAndroid Build Coastguard Workerprint("Tracing OOM kills... Ctrl-C to stop.") 77*387f9dfdSAndroid Build Coastguard Workerb["events"].open_perf_buffer(print_event) 78*387f9dfdSAndroid Build Coastguard Workerwhile 1: 79*387f9dfdSAndroid Build Coastguard Worker b.perf_buffer_poll() 80