xref: /aosp_15_r20/external/bcc/examples/cpp/pyperf/PyPerfBPFProgram.cc (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 /*
2  * Copyright (c) Facebook, Inc.
3  * Licensed under the Apache License, Version 2.0 (the "License")
4  */
5 
6 #include <string>
7 
8 namespace ebpf {
9 namespace pyperf {
10 
11 extern const std::string PYPERF_BPF_PROGRAM = R"(
12 #include <linux/sched.h>
13 #include <uapi/linux/ptrace.h>
14 
15 #define PYTHON_STACK_FRAMES_PER_PROG 25
16 #define PYTHON_STACK_PROG_CNT 3
17 #define STACK_MAX_LEN (PYTHON_STACK_FRAMES_PER_PROG * PYTHON_STACK_PROG_CNT)
18 #define CLASS_NAME_LEN 32
19 #define FUNCTION_NAME_LEN 64
20 #define FILE_NAME_LEN 128
21 #define TASK_COMM_LEN 16
22 
23 enum {
24   STACK_STATUS_COMPLETE = 0,
25   STACK_STATUS_ERROR = 1,
26   STACK_STATUS_TRUNCATED = 2,
27 };
28 
29 enum {
30   GIL_STATE_NO_INFO = 0,
31   GIL_STATE_ERROR = 1,
32   GIL_STATE_UNINITIALIZED = 2,
33   GIL_STATE_NOT_LOCKED = 3,
34   GIL_STATE_THIS_THREAD = 4,
35   GIL_STATE_GLOBAL_CURRENT_THREAD = 5,
36   GIL_STATE_OTHER_THREAD = 6,
37   GIL_STATE_NULL = 7,
38 };
39 
40 enum {
41   THREAD_STATE_UNKNOWN = 0,
42   THREAD_STATE_MATCH = 1,
43   THREAD_STATE_MISMATCH = 2,
44   THREAD_STATE_THIS_THREAD_NULL = 3,
45   THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL = 4,
46   THREAD_STATE_BOTH_NULL = 5,
47 };
48 
49 enum {
50   PTHREAD_ID_UNKNOWN = 0,
51   PTHREAD_ID_MATCH = 1,
52   PTHREAD_ID_MISMATCH = 2,
53   PTHREAD_ID_THREAD_STATE_NULL = 3,
54   PTHREAD_ID_NULL = 4,
55   PTHREAD_ID_ERROR = 5,
56 };
57 
58 typedef struct {
59   int64_t PyObject_type;
60   int64_t PyTypeObject_name;
61   int64_t PyThreadState_frame;
62   int64_t PyThreadState_thread;
63   int64_t PyFrameObject_back;
64   int64_t PyFrameObject_code;
65   int64_t PyFrameObject_lineno;
66   int64_t PyFrameObject_localsplus;
67   int64_t PyCodeObject_filename;
68   int64_t PyCodeObject_name;
69   int64_t PyCodeObject_varnames;
70   int64_t PyTupleObject_item;
71   int64_t String_data;
72   int64_t String_size;
73 } OffsetConfig;
74 
75 typedef struct {
76   uintptr_t current_state_addr; // virtual address of _PyThreadState_Current
77   uintptr_t tls_key_addr; // virtual address of autoTLSkey for pthreads TLS
78   uintptr_t gil_locked_addr; // virtual address of gil_locked
79   uintptr_t gil_last_holder_addr; // virtual address of gil_last_holder
80   OffsetConfig offsets;
81 } PidData;
82 
83 typedef struct {
84   char classname[CLASS_NAME_LEN];
85   char name[FUNCTION_NAME_LEN];
86   char file[FILE_NAME_LEN];
87   // NOTE: PyFrameObject also has line number but it is typically just the
88   // first line of that function and PyCode_Addr2Line needs to be called
89   // to get the actual line
90 } Symbol;
91 
92 typedef struct {
93   uint32_t pid;
94   uint32_t tid;
95   char comm[TASK_COMM_LEN];
96   uint8_t thread_state_match;
97   uint8_t gil_state;
98   uint8_t pthread_id_match;
99   uint8_t stack_status;
100   // instead of storing symbol name here directly, we add it to another
101   // hashmap with Symbols and only store the ids here
102   int64_t stack_len;
103   int32_t stack[STACK_MAX_LEN];
104 } Event;
105 
106 #define _STR_CONCAT(str1, str2) str1##str2
107 #define STR_CONCAT(str1, str2) _STR_CONCAT(str1, str2)
108 #define FAIL_COMPILATION_IF(condition)            \
109   typedef struct {                                \
110     char _condition_check[1 - 2 * !!(condition)]; \
111   } STR_CONCAT(compile_time_condition_check, __COUNTER__);
112 // See comments in get_frame_data
113 FAIL_COMPILATION_IF(sizeof(Symbol) == sizeof(struct bpf_perf_event_value))
114 
115 typedef struct {
116   OffsetConfig offsets;
117   uint64_t cur_cpu;
118   int64_t symbol_counter;
119   void* frame_ptr;
120   int64_t python_stack_prog_call_cnt;
121   Event event;
122 } sample_state_t;
123 
124 BPF_PERCPU_ARRAY(state_heap, sample_state_t, 1);
125 BPF_HASH(symbols, Symbol, int32_t, __SYMBOLS_SIZE__);
126 BPF_HASH(pid_config, pid_t, PidData);
127 BPF_PROG_ARRAY(progs, 1);
128 
129 BPF_PERF_OUTPUT(events);
130 
131 static inline __attribute__((__always_inline__)) void* get_thread_state(
132     void* tls_base,
133     PidData* pid_data) {
134   // Python sets the thread_state using pthread_setspecific with the key
135   // stored in a global variable autoTLSkey.
136   // We read the value of the key from the global variable and then read
137   // the value in the thread-local storage. This relies on pthread implementation.
138   // This is basically the same as running the following in GDB:
139   //  p *(PyThreadState*)((struct pthread*)pthread_self())->
140   //    specific_1stblock[autoTLSkey]->data
141   int key;
142   bpf_probe_read_user(&key, sizeof(key), (void*)pid_data->tls_key_addr);
143   // This assumes autoTLSkey < 32, which means that the TLS is stored in
144   //   pthread->specific_1stblock[autoTLSkey]
145   // 0x310 is offsetof(struct pthread, specific_1stblock),
146   // 0x10 is sizeof(pthread_key_data)
147   // 0x8 is offsetof(struct pthread_key_data, data)
148   // 'struct pthread' is not in the public API so we have to hardcode
149   // the offsets here
150   void* thread_state;
151   bpf_probe_read_user(
152       &thread_state,
153       sizeof(thread_state),
154       tls_base + 0x310 + key * 0x10 + 0x08);
155   return thread_state;
156 }
157 
158 static inline __attribute__((__always_inline__)) int submit_sample(
159     struct pt_regs* ctx,
160     sample_state_t* state) {
161   events.perf_submit(ctx, &state->event, sizeof(Event));
162   return 0;
163 }
164 
165 // this function is trivial, but we need to do map lookup in separate function,
166 // because BCC doesn't allow direct map calls (including lookups) from inside
167 // a macro (which we want to do in GET_STATE() macro below)
168 static inline __attribute__((__always_inline__)) sample_state_t* get_state() {
169   int zero = 0;
170   return state_heap.lookup(&zero);
171 }
172 
173 #define GET_STATE()                     \
174   sample_state_t* state = get_state();  \
175   if (!state) {                         \
176     return 0; /* should never happen */ \
177   }
178 
179 static inline __attribute__((__always_inline__)) int get_thread_state_match(
180     void* this_thread_state,
181     void* global_thread_state) {
182   if (this_thread_state == 0 && global_thread_state == 0) {
183     return THREAD_STATE_BOTH_NULL;
184   }
185   if (this_thread_state == 0) {
186     return THREAD_STATE_THIS_THREAD_NULL;
187   }
188   if (global_thread_state == 0) {
189     return THREAD_STATE_GLOBAL_CURRENT_THREAD_NULL;
190   }
191   if (this_thread_state == global_thread_state) {
192     return THREAD_STATE_MATCH;
193   } else {
194     return THREAD_STATE_MISMATCH;
195   }
196 }
197 
198 static inline __attribute__((__always_inline__)) int get_gil_state(
199     void* this_thread_state,
200     void* global_thread_state,
201     PidData* pid_data) {
202   // Get information of GIL state
203   if (pid_data->gil_locked_addr == 0 || pid_data->gil_last_holder_addr == 0) {
204     return GIL_STATE_NO_INFO;
205   }
206 
207   int gil_locked = 0;
208   void* gil_thread_state = 0;
209   if (bpf_probe_read_user(
210           &gil_locked, sizeof(gil_locked), (void*)pid_data->gil_locked_addr)) {
211     return GIL_STATE_ERROR;
212   }
213 
214   switch (gil_locked) {
215     case -1:
216       return GIL_STATE_UNINITIALIZED;
217     case 0:
218       return GIL_STATE_NOT_LOCKED;
219     case 1:
220       // GIL is held by some Thread
221       bpf_probe_read_user(
222           &gil_thread_state,
223           sizeof(void*),
224           (void*)pid_data->gil_last_holder_addr);
225       if (gil_thread_state == this_thread_state) {
226         return GIL_STATE_THIS_THREAD;
227       } else if (gil_thread_state == global_thread_state) {
228         return GIL_STATE_GLOBAL_CURRENT_THREAD;
229       } else if (gil_thread_state == 0) {
230         return GIL_STATE_NULL;
231       } else {
232         return GIL_STATE_OTHER_THREAD;
233       }
234     default:
235       return GIL_STATE_ERROR;
236   }
237 }
238 
239 static inline __attribute__((__always_inline__)) int
240 get_pthread_id_match(void* thread_state, void* tls_base, PidData* pid_data) {
241   if (thread_state == 0) {
242     return PTHREAD_ID_THREAD_STATE_NULL;
243   }
244 
245   uint64_t pthread_self, pthread_created;
246 
247   bpf_probe_read_user(
248       &pthread_created,
249       sizeof(pthread_created),
250       thread_state + pid_data->offsets.PyThreadState_thread);
251   if (pthread_created == 0) {
252     return PTHREAD_ID_NULL;
253   }
254 
255   // 0x10 = offsetof(struct pthread, header.self)
256   bpf_probe_read_user(&pthread_self, sizeof(pthread_self), tls_base + 0x10);
257   if (pthread_self == 0) {
258     return PTHREAD_ID_ERROR;
259   }
260 
261   if (pthread_self == pthread_created) {
262     return PTHREAD_ID_MATCH;
263   } else {
264     return PTHREAD_ID_MISMATCH;
265   }
266 }
267 
268 int on_event(struct pt_regs* ctx) {
269   uint64_t pid_tgid = bpf_get_current_pid_tgid();
270   pid_t pid = (pid_t)(pid_tgid >> 32);
271   PidData* pid_data = pid_config.lookup(&pid);
272   if (!pid_data) {
273     return 0;
274   }
275 
276   GET_STATE();
277 
278   state->offsets = pid_data->offsets;
279   state->cur_cpu = bpf_get_smp_processor_id();
280   state->python_stack_prog_call_cnt = 0;
281 
282   Event* event = &state->event;
283   event->pid = pid;
284   event->tid = (pid_t)pid_tgid;
285   bpf_get_current_comm(&event->comm, sizeof(event->comm));
286 
287   // Get pointer of global PyThreadState, which should belong to the Thread
288   // currently holds the GIL
289   void* global_current_thread = (void*)0;
290   bpf_probe_read_user(
291       &global_current_thread,
292       sizeof(global_current_thread),
293       (void*)pid_data->current_state_addr);
294 
295   struct task_struct* task = (struct task_struct*)bpf_get_current_task();
296 #if __x86_64__
297 // thread_struct->fs was renamed to fsbase in
298 // https://github.com/torvalds/linux/commit/296f781a4b7801ad9c1c0219f9e87b6c25e196fe
299 // so depending on kernel version, we need to account for that
300 #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 7, 0)
301   void* tls_base = (void*)task->thread.fs;
302 #else
303   void* tls_base = (void*)task->thread.fsbase;
304 #endif
305 #elif __aarch64__
306   void* tls_base = (void*)task->thread.tp_value;
307 #else
308 #error "Unsupported platform"
309 #endif
310 
311   // Read PyThreadState of this Thread from TLS
312   void* thread_state = get_thread_state(tls_base, pid_data);
313 
314   // Check for matching between TLS PyThreadState and
315   // the global _PyThreadState_Current
316   event->thread_state_match =
317       get_thread_state_match(thread_state, global_current_thread);
318 
319   // Read GIL state
320   event->gil_state =
321       get_gil_state(thread_state, global_current_thread, pid_data);
322 
323   // Check for matching between pthread ID created current PyThreadState and
324   // pthread of actual current pthread
325   event->pthread_id_match =
326       get_pthread_id_match(thread_state, tls_base, pid_data);
327 
328   // pre-initialize event struct in case any subprogram below fails
329   event->stack_status = STACK_STATUS_COMPLETE;
330   event->stack_len = 0;
331 
332   if (thread_state != 0) {
333     // Get pointer to top frame from PyThreadState
334     bpf_probe_read_user(
335         &state->frame_ptr,
336         sizeof(void*),
337         thread_state + pid_data->offsets.PyThreadState_frame);
338     // jump to reading first set of Python frames
339     progs.call(ctx, PYTHON_STACK_PROG_IDX);
340     // we won't ever get here
341   }
342 
343   return submit_sample(ctx, state);
344 }
345 
346 static inline __attribute__((__always_inline__)) void get_names(
347     void* cur_frame,
348     void* code_ptr,
349     OffsetConfig* offsets,
350     Symbol* symbol,
351     void* ctx) {
352   // Figure out if we want to parse class name, basically checking the name of
353   // the first argument,
354   //   ((PyTupleObject*)$frame->f_code->co_varnames)->ob_item[0]
355   // If it's 'self', we get the type and it's name, if it's cls, we just get
356   // the name. This is not perfect but there is no better way to figure this
357   // out from the code object.
358   void* args_ptr;
359   bpf_probe_read_user(
360       &args_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_varnames);
361   bpf_probe_read_user(
362       &args_ptr, sizeof(void*), args_ptr + offsets->PyTupleObject_item);
363   bpf_probe_read_user_str(
364       &symbol->name, sizeof(symbol->name), args_ptr + offsets->String_data);
365 
366   // compare strings as ints to save instructions
367   char self_str[4] = {'s', 'e', 'l', 'f'};
368   char cls_str[4] = {'c', 'l', 's', '\0'};
369   bool first_self = *(int32_t*)symbol->name == *(int32_t*)self_str;
370   bool first_cls = *(int32_t*)symbol->name == *(int32_t*)cls_str;
371 
372   // We re-use the same Symbol instance across loop iterations, which means
373   // we will have left-over data in the struct. Although this won't affect
374   // correctness of the result because we have '\0' at end of the strings read,
375   // it would affect effectiveness of the deduplication.
376   // Helper bpf_perf_prog_read_value clears the buffer on error, so here we
377   // (ab)use this behavior to clear the memory. It requires the size of Symbol
378   // to be different from struct bpf_perf_event_value, which we check at
379   // compilation time using the FAIL_COMPILATION_IF macro.
380   bpf_perf_prog_read_value(ctx, symbol, sizeof(Symbol));
381 
382   // Read class name from $frame->f_localsplus[0]->ob_type->tp_name.
383   if (first_self || first_cls) {
384     void* ptr;
385     bpf_probe_read_user(
386         &ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_localsplus);
387     if (first_self) {
388       // we are working with an instance, first we need to get type
389       bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyObject_type);
390     }
391     bpf_probe_read_user(&ptr, sizeof(void*), ptr + offsets->PyTypeObject_name);
392     bpf_probe_read_user_str(&symbol->classname, sizeof(symbol->classname), ptr);
393   }
394 
395   void* pystr_ptr;
396   // read PyCodeObject's filename into symbol
397   bpf_probe_read_user(
398       &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_filename);
399   bpf_probe_read_user_str(
400       &symbol->file, sizeof(symbol->file), pystr_ptr + offsets->String_data);
401   // read PyCodeObject's name into symbol
402   bpf_probe_read_user(
403       &pystr_ptr, sizeof(void*), code_ptr + offsets->PyCodeObject_name);
404   bpf_probe_read_user_str(
405       &symbol->name, sizeof(symbol->name), pystr_ptr + offsets->String_data);
406 }
407 
408 // get_frame_data reads current PyFrameObject filename/name and updates
409 // stack_info->frame_ptr with pointer to next PyFrameObject
410 static inline __attribute__((__always_inline__)) bool get_frame_data(
411     void** frame_ptr,
412     OffsetConfig* offsets,
413     Symbol* symbol,
414     // ctx is only used to call helper to clear symbol, see documentation below
415     void* ctx) {
416   void* cur_frame = *frame_ptr;
417   if (!cur_frame) {
418     return false;
419   }
420   void* code_ptr;
421   // read PyCodeObject first, if that fails, then no point reading next frame
422   bpf_probe_read_user(
423       &code_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_code);
424   if (!code_ptr) {
425     return false;
426   }
427 
428   get_names(cur_frame, code_ptr, offsets, symbol, ctx);
429 
430   // read next PyFrameObject pointer, update in place
431   bpf_probe_read_user(
432       frame_ptr, sizeof(void*), cur_frame + offsets->PyFrameObject_back);
433 
434   return true;
435 }
436 
437 // To avoid duplicate ids, every CPU needs to use different ids when inserting
438 // into the hashmap. NUM_CPUS is defined at PyPerf backend side and passed
439 // through CFlag.
440 static inline __attribute__((__always_inline__)) int64_t get_symbol_id(
441     sample_state_t* state,
442     Symbol* sym) {
443   int32_t* symbol_id_ptr = symbols.lookup(sym);
444   if (symbol_id_ptr) {
445     return *symbol_id_ptr;
446   }
447   // the symbol is new, bump the counter
448   int32_t symbol_id = state->symbol_counter * NUM_CPUS + state->cur_cpu;
449   state->symbol_counter++;
450   symbols.update(sym, &symbol_id);
451   return symbol_id;
452 }
453 
454 int read_python_stack(struct pt_regs* ctx) {
455   GET_STATE();
456 
457   state->python_stack_prog_call_cnt++;
458   Event* sample = &state->event;
459 
460   Symbol sym = {};
461   bool last_res = false;
462 #pragma unroll
463   for (int i = 0; i < PYTHON_STACK_FRAMES_PER_PROG; i++) {
464     last_res = get_frame_data(&state->frame_ptr, &state->offsets, &sym, ctx);
465     if (last_res) {
466       uint32_t symbol_id = get_symbol_id(state, &sym);
467       int64_t cur_len = sample->stack_len;
468       if (cur_len >= 0 && cur_len < STACK_MAX_LEN) {
469         sample->stack[cur_len] = symbol_id;
470         sample->stack_len++;
471       }
472     }
473   }
474 
475   if (!state->frame_ptr) {
476     sample->stack_status = STACK_STATUS_COMPLETE;
477   } else {
478     if (!last_res) {
479       sample->stack_status = STACK_STATUS_ERROR;
480     } else {
481       sample->stack_status = STACK_STATUS_TRUNCATED;
482     }
483   }
484 
485   if (sample->stack_status == STACK_STATUS_TRUNCATED &&
486       state->python_stack_prog_call_cnt < PYTHON_STACK_PROG_CNT) {
487     // read next batch of frames
488     progs.call(ctx, PYTHON_STACK_PROG_IDX);
489   }
490 
491   return submit_sample(ctx, state);
492 }
493 )";
494 
495 }
496 }  // namespace ebpf
497