xref: /aosp_15_r20/external/bcc/src/cc/libbpf.c (revision 387f9dfdfa2baef462e92476d413c7bc2470293e)
1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef _GNU_SOURCE
17 #define _GNU_SOURCE
18 #endif
19 
20 #include "libbpf.h"
21 
22 #include <arpa/inet.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <inttypes.h>
26 #include <libgen.h>
27 #include <limits.h>
28 #include <linux/bpf.h>
29 #include <linux/bpf_common.h>
30 #include <linux/if_alg.h>
31 #include <linux/if_packet.h>
32 #include <linux/perf_event.h>
33 #include <linux/pkt_cls.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/sched.h>
36 #include <linux/types.h>
37 #include <linux/unistd.h>
38 #include <linux/version.h>
39 #include <net/ethernet.h>
40 #include <net/if.h>
41 #include <sched.h>
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <sys/ioctl.h>
47 #include <sys/resource.h>
48 #include <sys/stat.h>
49 #include <sys/types.h>
50 #include <sys/vfs.h>
51 #include <unistd.h>
52 
53 #include "bcc_zip.h"
54 #include "perf_reader.h"
55 
56 // TODO: Remove this when CentOS 6 support is not needed anymore
57 #include "setns.h"
58 
59 #include "bcc_libbpf_inc.h"
60 
61 // TODO: remove these defines when linux-libc-dev exports them properly
62 
63 #ifndef __NR_bpf
64 #if defined(__powerpc64__)
65 #define __NR_bpf 361
66 #elif defined(__s390x__)
67 #define __NR_bpf 351
68 #elif defined(__aarch64__)
69 #define __NR_bpf 280
70 #else
71 #define __NR_bpf 321
72 #endif
73 #endif
74 
75 #ifndef SO_ATTACH_BPF
76 #define SO_ATTACH_BPF 50
77 #endif
78 
79 #ifndef PERF_EVENT_IOC_SET_BPF
80 #define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
81 #endif
82 
83 #ifndef PERF_FLAG_FD_CLOEXEC
84 #define PERF_FLAG_FD_CLOEXEC (1UL << 3)
85 #endif
86 
87 // TODO: Remove this when CentOS 6 support is not needed anymore
88 #ifndef AF_ALG
89 #define AF_ALG 38
90 #endif
91 
92 #ifndef min
93 #define min(x, y) ((x) < (y) ? (x) : (y))
94 #endif
95 
96 #define UNUSED(expr) do { (void)(expr); } while (0)
97 
98 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
99 
100 #ifndef BPF_FS_MAGIC
101 #define BPF_FS_MAGIC		0xcafe4a11
102 #endif
103 
104 struct bpf_helper {
105   char *name;
106   char *required_version;
107 };
108 
109 static struct bpf_helper helpers[] = {
110   {"map_lookup_elem", "3.19"},
111   {"map_update_elem", "3.19"},
112   {"map_delete_elem", "3.19"},
113   {"probe_read", "4.1"},
114   {"ktime_get_ns", "4.1"},
115   {"trace_printk", "4.1"},
116   {"get_prandom_u32", "4.1"},
117   {"get_smp_processor_id", "4.1"},
118   {"skb_store_bytes", "4.1"},
119   {"l3_csum_replace", "4.1"},
120   {"l4_csum_replace", "4.1"},
121   {"tail_call", "4.2"},
122   {"clone_redirect", "4.2"},
123   {"get_current_pid_tgid", "4.2"},
124   {"get_current_uid_gid", "4.2"},
125   {"get_current_comm", "4.2"},
126   {"get_cgroup_classid", "4.3"},
127   {"skb_vlan_push", "4.3"},
128   {"skb_vlan_pop", "4.3"},
129   {"skb_get_tunnel_key", "4.3"},
130   {"skb_set_tunnel_key", "4.3"},
131   {"perf_event_read", "4.3"},
132   {"redirect", "4.4"},
133   {"get_route_realm", "4.4"},
134   {"perf_event_output", "4.4"},
135   {"skb_load_bytes", "4.5"},
136   {"get_stackid", "4.6"},
137   {"csum_diff", "4.6"},
138   {"skb_get_tunnel_opt", "4.6"},
139   {"skb_set_tunnel_opt", "4.6"},
140   {"skb_change_proto", "4.8"},
141   {"skb_change_type", "4.8"},
142   {"skb_under_cgroup", "4.8"},
143   {"get_hash_recalc", "4.8"},
144   {"get_current_task", "4.8"},
145   {"probe_write_user", "4.8"},
146   {"current_task_under_cgroup", "4.9"},
147   {"skb_change_tail", "4.9"},
148   {"skb_pull_data", "4.9"},
149   {"csum_update", "4.9"},
150   {"set_hash_invalid", "4.9"},
151   {"get_numa_node_id", "4.10"},
152   {"skb_change_head", "4.10"},
153   {"xdp_adjust_head", "4.10"},
154   {"probe_read_str", "4.11"},
155   {"get_socket_cookie", "4.12"},
156   {"get_socket_uid", "4.12"},
157   {"set_hash", "4.13"},
158   {"setsockopt", "4.13"},
159   {"skb_adjust_room", "4.13"},
160   {"redirect_map", "4.14"},
161   {"sk_redirect_map", "4.14"},
162   {"sock_map_update", "4.14"},
163   {"xdp_adjust_meta", "4.15"},
164   {"perf_event_read_value", "4.15"},
165   {"perf_prog_read_value", "4.15"},
166   {"getsockopt", "4.15"},
167   {"override_return", "4.16"},
168   {"sock_ops_cb_flags_set", "4.16"},
169   {"msg_redirect_map", "4.17"},
170   {"msg_apply_bytes", "4.17"},
171   {"msg_cork_bytes", "4.17"},
172   {"msg_pull_data", "4.17"},
173   {"bind", "4.17"},
174   {"xdp_adjust_tail", "4.18"},
175   {"skb_get_xfrm_state", "4.18"},
176   {"get_stack", "4.18"},
177   {"skb_load_bytes_relative", "4.18"},
178   {"fib_lookup", "4.18"},
179   {"sock_hash_update", "4.18"},
180   {"msg_redirect_hash", "4.18"},
181   {"sk_redirect_hash", "4.18"},
182   {"lwt_push_encap", "4.18"},
183   {"lwt_seg6_store_bytes", "4.18"},
184   {"lwt_seg6_adjust_srh", "4.18"},
185   {"lwt_seg6_action", "4.18"},
186   {"rc_repeat", "4.18"},
187   {"rc_keydown", "4.18"},
188   {"skb_cgroup_id", "4.18"},
189   {"get_current_cgroup_id", "4.18"},
190   {"get_local_storage", "4.19"},
191   {"sk_select_reuseport", "4.19"},
192   {"skb_ancestor_cgroup_id", "4.19"},
193   {"sk_lookup_tcp", "4.20"},
194   {"sk_lookup_udp", "4.20"},
195   {"sk_release", "4.20"},
196   {"map_push_elem", "4.20"},
197   {"map_pop_elem", "4.20"},
198   {"map_peak_elem", "4.20"},
199   {"msg_push_data", "4.20"},
200   {"msg_pop_data", "5.0"},
201   {"rc_pointer_rel", "5.0"},
202   {"spin_lock", "5.1"},
203   {"spin_unlock", "5.1"},
204   {"sk_fullsock", "5.1"},
205   {"tcp_sock", "5.1"},
206   {"skb_ecn_set_ce", "5.1"},
207   {"get_listener_sock", "5.1"},
208   {"skc_lookup_tcp", "5.2"},
209   {"tcp_check_syncookie", "5.2"},
210   {"sysctl_get_name", "5.2"},
211   {"sysctl_get_current_value", "5.2"},
212   {"sysctl_get_new_value", "5.2"},
213   {"sysctl_set_new_value", "5.2"},
214   {"strtol", "5.2"},
215   {"strtoul", "5.2"},
216   {"sk_storage_get", "5.2"},
217   {"sk_storage_delete", "5.2"},
218   {"send_signal", "5.3"},
219   {"tcp_gen_syncookie", "5.3"},
220   {"skb_output", "5.5"},
221   {"probe_read_user", "5.5"},
222   {"probe_read_kernel", "5.5"},
223   {"probe_read_user_str", "5.5"},
224   {"probe_read_kernel_str", "5.5"},
225   {"tcp_send_ack", "5.5"},
226   {"send_signal_thread", "5.5"},
227   {"jiffies64", "5.5"},
228   {"read_branch_records", "5.6"},
229   {"get_ns_current_pid_tgid", "5.6"},
230   {"xdp_output", "5.6"},
231   {"get_netns_cookie", "5.6"},
232   {"get_current_ancestor_cgroup_id", "5.6"},
233   {"sk_assign", "5.6"},
234   {"ktime_get_boot_ns", "5.7"},
235   {"seq_printf", "5.7"},
236   {"seq_write", "5.7"},
237   {"sk_cgroup_id", "5.7"},
238   {"sk_ancestor_cgroup_id", "5.7"},
239   {"csum_level", "5.7"},
240   {"ringbuf_output", "5.8"},
241   {"ringbuf_reserve", "5.8"},
242   {"ringbuf_submit", "5.8"},
243   {"ringbuf_discard", "5.8"},
244   {"ringbuf_query", "5.8"},
245   {"skc_to_tcp6_sock", "5.9"},
246   {"skc_to_tcp_sock", "5.9"},
247   {"skc_to_tcp_timewait_sock", "5.9"},
248   {"skc_to_tcp_request_sock", "5.9"},
249   {"skc_to_udp6_sock", "5.9"},
250   {"get_task_stack", "5.9"},
251   {"load_hdr_opt", "5.10"},
252   {"store_hdr_opt", "5.10"},
253   {"reserve_hdr_opt", "5.10"},
254   {"inode_storage_get", "5.10"},
255   {"inode_storage_delete", "5.10"},
256   {"d_path", "5.10"},
257   {"copy_from_user", "5.10"},
258   {"snprintf_btf", "5.10"},
259   {"seq_printf_btf", "5.10"},
260   {"skb_cgroup_classid", "5.10"},
261   {"redirect_neigh", "5.10"},
262   {"per_cpu_ptr", "5.10"},
263   {"this_cpu_ptr", "5.10"},
264   {"redirect_peer", "5.10"},
265   {"task_storage_get", "5.11"},
266   {"task_storage_delete", "5.11"},
267   {"get_current_task_btf", "5.11"},
268   {"bprm_opts_set", "5.11"},
269   {"ktime_get_coarse_ns", "5.11"},
270   {"ima_inode_hash", "5.11"},
271   {"sock_from_file", "5.11"},
272   {"check_mtu", "5.12"},
273   {"for_each_map_elem", "5.13"},
274   {"snprintf", "5.13"},
275   {"sys_bpf", "5.14"},
276   {"btf_find_by_name_kind", "5.14"},
277   {"sys_close", "5.14"},
278   {"timer_init", "5.15"},
279   {"timer_set_callback", "5.15"},
280   {"timer_start", "5.15"},
281   {"timer_cancel", "5.15"},
282   {"get_func_ip", "5.15"},
283   {"get_attach_cookie", "5.15"},
284   {"task_pt_regs", "5.15"},
285   {"get_branch_snapshot", "5.16"},
286   {"trace_vprintk", "5.16"},
287   {"skc_to_unix_sock", "5.16"},
288   {"kallsyms_lookup_name", "5.16"},
289   {"find_vma", "5.17"},
290   {"loop", "5.17"},
291   {"strncmp", "5.17"},
292   {"get_func_arg", "5.17"},
293   {"get_func_ret", "5.17"},
294   {"get_func_ret", "5.17"},
295   {"get_retval", "5.18"},
296   {"set_retval", "5.18"},
297   {"xdp_get_buff_len", "5.18"},
298   {"xdp_load_bytes", "5.18"},
299   {"xdp_store_bytes", "5.18"},
300   {"copy_from_user_task", "5.18"},
301   {"skb_set_tstamp", "5.18"},
302   {"ima_file_hash", "5.18"},
303   {"kptr_xchg", "5.19"},
304   {"map_lookup_percpu_elem", "5.19"},
305   {"skc_to_mptcp_sock", "5.19"},
306   {"dynptr_from_mem", "5.19"},
307   {"ringbuf_reserve_dynptr", "5.19"},
308   {"ringbuf_submit_dynptr", "5.19"},
309   {"ringbuf_discard_dynptr", "5.19"},
310   {"dynptr_read", "5.19"},
311   {"dynptr_write", "5.19"},
312   {"dynptr_data", "5.19"},
313   {"tcp_raw_gen_syncookie_ipv4", "6.0"},
314   {"tcp_raw_gen_syncookie_ipv6", "6.0"},
315   {"tcp_raw_check_syncookie_ipv4", "6.0"},
316   {"tcp_raw_check_syncookie_ipv6", "6.0"},
317   {"ktime_get_tai_ns", "6.1"},
318   {"user_ringbuf_drain", "6.1"},
319   {"cgrp_storage_get", "6.2"},
320   {"cgrp_storage_delete", "6.2"},
321 };
322 
ptr_to_u64(void * ptr)323 static uint64_t ptr_to_u64(void *ptr)
324 {
325   return (uint64_t) (unsigned long) ptr;
326 }
327 
libbpf_bpf_map_create(struct bcc_create_map_attr * create_attr)328 static int libbpf_bpf_map_create(struct bcc_create_map_attr *create_attr)
329 {
330   LIBBPF_OPTS(bpf_map_create_opts, p);
331 
332   p.map_flags = create_attr->map_flags;
333   p.numa_node = create_attr->numa_node;
334   p.btf_fd = create_attr->btf_fd;
335   p.btf_key_type_id = create_attr->btf_key_type_id;
336   p.btf_value_type_id = create_attr->btf_value_type_id;
337   p.map_ifindex = create_attr->map_ifindex;
338   if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS)
339     p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id;
340   else
341     p.inner_map_fd = create_attr->inner_map_fd;
342 
343   return bpf_map_create(create_attr->map_type, create_attr->name, create_attr->key_size,
344                         create_attr->value_size, create_attr->max_entries, &p);
345 }
346 
bcc_create_map_xattr(struct bcc_create_map_attr * attr,bool allow_rlimit)347 int bcc_create_map_xattr(struct bcc_create_map_attr *attr, bool allow_rlimit)
348 {
349   unsigned name_len = attr->name ? strlen(attr->name) : 0;
350   char map_name[BPF_OBJ_NAME_LEN] = {};
351 
352   memcpy(map_name, attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1));
353   attr->name = map_name;
354   int ret = libbpf_bpf_map_create(attr);
355 
356   if (ret < 0 && errno == EPERM) {
357     if (!allow_rlimit)
358       return ret;
359 
360     // see note below about the rationale for this retry
361     struct rlimit rl = {};
362     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
363       rl.rlim_max = RLIM_INFINITY;
364       rl.rlim_cur = rl.rlim_max;
365       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
366         ret = libbpf_bpf_map_create(attr);
367     }
368   }
369 
370   // kernel already supports btf if its loading is successful,
371   // but this map type may not support pretty print yet.
372   if (ret < 0 && attr->btf_key_type_id && errno == 524 /* ENOTSUPP */) {
373     attr->btf_fd = 0;
374     attr->btf_key_type_id = 0;
375     attr->btf_value_type_id = 0;
376     ret = libbpf_bpf_map_create(attr);
377   }
378 
379   if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
380     map_name[0] = '\0';
381     ret = libbpf_bpf_map_create(attr);
382   }
383 
384   if (ret < 0 && errno == EPERM) {
385     if (!allow_rlimit)
386       return ret;
387 
388     // see note below about the rationale for this retry
389     struct rlimit rl = {};
390     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
391       rl.rlim_max = RLIM_INFINITY;
392       rl.rlim_cur = rl.rlim_max;
393       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
394         ret = libbpf_bpf_map_create(attr);
395     }
396   }
397   return ret;
398 }
399 
bcc_create_map(enum bpf_map_type map_type,const char * name,int key_size,int value_size,int max_entries,int map_flags)400 int bcc_create_map(enum bpf_map_type map_type, const char *name,
401                    int key_size, int value_size,
402                    int max_entries, int map_flags)
403 {
404   struct bcc_create_map_attr attr = {};
405 
406   attr.map_type = map_type;
407   attr.name = name;
408   attr.key_size = key_size;
409   attr.value_size = value_size;
410   attr.max_entries = max_entries;
411   attr.map_flags = map_flags;
412   return bcc_create_map_xattr(&attr, true);
413 }
414 
bpf_update_elem(int fd,void * key,void * value,unsigned long long flags)415 int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags)
416 {
417   return bpf_map_update_elem(fd, key, value, flags);
418 }
419 
bpf_lookup_elem(int fd,void * key,void * value)420 int bpf_lookup_elem(int fd, void *key, void *value)
421 {
422   return bpf_map_lookup_elem(fd, key, value);
423 }
424 
bpf_delete_elem(int fd,void * key)425 int bpf_delete_elem(int fd, void *key)
426 {
427   return bpf_map_delete_elem(fd, key);
428 }
429 
bpf_lookup_and_delete(int fd,void * key,void * value)430 int bpf_lookup_and_delete(int fd, void *key, void *value)
431 {
432   return bpf_map_lookup_and_delete_elem(fd, key, value);
433 }
434 
bpf_lookup_batch(int fd,__u32 * in_batch,__u32 * out_batch,void * keys,void * values,__u32 * count)435 int bpf_lookup_batch(int fd, __u32 *in_batch, __u32 *out_batch, void *keys,
436                      void *values, __u32 *count)
437 {
438   return bpf_map_lookup_batch(fd, in_batch, out_batch, keys, values, count,
439                               NULL);
440 }
441 
bpf_delete_batch(int fd,void * keys,__u32 * count)442 int bpf_delete_batch(int fd,  void *keys, __u32 *count)
443 {
444   return bpf_map_delete_batch(fd, keys, count, NULL);
445 }
446 
bpf_update_batch(int fd,void * keys,void * values,__u32 * count)447 int bpf_update_batch(int fd, void *keys, void *values, __u32 *count)
448 {
449   return bpf_map_update_batch(fd, keys, values, count, NULL);
450 }
451 
bpf_lookup_and_delete_batch(int fd,__u32 * in_batch,__u32 * out_batch,void * keys,void * values,__u32 * count)452 int bpf_lookup_and_delete_batch(int fd, __u32 *in_batch, __u32 *out_batch,
453                                 void *keys, void *values, __u32 *count)
454 {
455   return bpf_map_lookup_and_delete_batch(fd, in_batch, out_batch, keys, values,
456                                          count, NULL);
457 }
458 
bpf_get_first_key(int fd,void * key,size_t key_size)459 int bpf_get_first_key(int fd, void *key, size_t key_size)
460 {
461   int i, res;
462 
463   // 4.12 and above kernel supports passing NULL to BPF_MAP_GET_NEXT_KEY
464   // to get first key of the map. For older kernels, the call will fail.
465   res = bpf_map_get_next_key(fd, 0, key);
466   if (res < 0 && errno == EFAULT) {
467     // Fall back to try to find a non-existing key.
468     static unsigned char try_values[3] = {0, 0xff, 0x55};
469     for (i = 0; i < 3; i++) {
470       memset(key, try_values[i], key_size);
471       // We want to check the existence of the key but we don't know the size
472       // of map's value. So we pass an invalid pointer for value, expect
473       // the call to fail and check if the error is ENOENT indicating the
474       // key doesn't exist. If we use NULL for the invalid pointer, it might
475       // trigger a page fault in kernel and affect performance. Hence we use
476       // ~0 which will fail and return fast.
477       // This should fail since we pass an invalid pointer for value.
478       if (bpf_map_lookup_elem(fd, key, (void *)~0) >= 0)
479         return -1;
480       // This means the key doesn't exist.
481       if (errno == ENOENT)
482         return bpf_map_get_next_key(fd, (void*)&try_values[i], key);
483     }
484     return -1;
485   } else {
486     return res;
487   }
488 }
489 
bpf_get_next_key(int fd,void * key,void * next_key)490 int bpf_get_next_key(int fd, void *key, void *next_key)
491 {
492   return bpf_map_get_next_key(fd, key, next_key);
493 }
494 
bpf_print_hints(int ret,char * log)495 static void bpf_print_hints(int ret, char *log)
496 {
497   if (ret < 0)
498     fprintf(stderr, "bpf: Failed to load program: %s\n", strerror(errno));
499   if (log == NULL)
500     return;
501   else
502     fprintf(stderr, "%s\n", log);
503 
504   if (ret >= 0)
505     return;
506 
507   // The following error strings will need maintenance to match LLVM.
508 
509   // stack busting
510   if (strstr(log, "invalid stack off=-") != NULL) {
511     fprintf(stderr, "HINT: Looks like you exceeded the BPF stack limit. "
512       "This can happen if you allocate too much local variable storage. "
513       "For example, if you allocated a 1 Kbyte struct (maybe for "
514       "BPF_PERF_OUTPUT), busting a max stack of 512 bytes.\n\n");
515   }
516 
517   // didn't check NULL on map lookup
518   if (strstr(log, "invalid mem access 'map_value_or_null'") != NULL) {
519     fprintf(stderr, "HINT: The 'map_value_or_null' error can happen if "
520       "you dereference a pointer value from a map lookup without first "
521       "checking if that pointer is NULL.\n\n");
522   }
523 
524   // lacking a bpf_probe_read
525   if (strstr(log, "invalid mem access 'inv'") != NULL) {
526     fprintf(stderr, "HINT: The invalid mem access 'inv' error can happen "
527       "if you try to dereference memory without first using "
528       "bpf_probe_read_kernel() to copy it to the BPF stack. Sometimes the "
529       "bpf_probe_read_kernel() is automatic by the bcc rewriter, other times "
530       "you'll need to be explicit.\n\n");
531   }
532 
533   // referencing global/static variables or read only data
534   if (strstr(log, "unknown opcode") != NULL) {
535     fprintf(stderr, "HINT: The 'unknown opcode' can happen if you reference "
536       "a global or static variable, or data in read-only section. For example,"
537       " 'char *p = \"hello\"' will result in p referencing a read-only section,"
538       " and 'char p[] = \"hello\"' will have \"hello\" stored on the stack.\n\n");
539   }
540 
541   // helper function not found in kernel
542   char *helper_str = strstr(log, "invalid func ");
543   if (helper_str != NULL) {
544     helper_str += strlen("invalid func ");
545     char *str = strchr(helper_str, '#');
546     if (str != NULL) {
547       helper_str = str + 1;
548     }
549     unsigned int helper_id = atoi(helper_str);
550     if (helper_id && helper_id < sizeof(helpers) / sizeof(struct bpf_helper)) {
551       struct bpf_helper helper = helpers[helper_id - 1];
552       fprintf(stderr, "HINT: bpf_%s missing (added in Linux %s).\n\n",
553               helper.name, helper.required_version);
554     }
555   }
556 }
557 #define ROUND_UP(x, n) (((x) + (n) - 1u) & ~((n) - 1u))
558 
bpf_obj_get_info(int prog_map_fd,void * info,uint32_t * info_len)559 int bpf_obj_get_info(int prog_map_fd, void *info, uint32_t *info_len)
560 {
561   return bpf_obj_get_info_by_fd(prog_map_fd, info, info_len);
562 }
563 
bpf_prog_compute_tag(const struct bpf_insn * insns,int prog_len,unsigned long long * ptag)564 int bpf_prog_compute_tag(const struct bpf_insn *insns, int prog_len,
565                          unsigned long long *ptag)
566 {
567   struct sockaddr_alg alg = {
568     .salg_family    = AF_ALG,
569     .salg_type      = "hash",
570     .salg_name      = "sha1",
571   };
572   int shafd = socket(AF_ALG, SOCK_SEQPACKET | SOCK_CLOEXEC, 0);
573   if (shafd < 0) {
574     fprintf(stderr, "sha1 socket not available %s\n", strerror(errno));
575     return -1;
576   }
577   int ret = bind(shafd, (struct sockaddr *)&alg, sizeof(alg));
578   if (ret < 0) {
579     fprintf(stderr, "sha1 bind fail %s\n", strerror(errno));
580     close(shafd);
581     return ret;
582   }
583   int shafd2 = accept4(shafd, NULL, 0, SOCK_CLOEXEC);
584   if (shafd2 < 0) {
585     fprintf(stderr, "sha1 accept fail %s\n", strerror(errno));
586     close(shafd);
587     return -1;
588   }
589   struct bpf_insn prog[prog_len / 8];
590   bool map_ld_seen = false;
591   int i;
592   for (i = 0; i < prog_len / 8; i++) {
593     prog[i] = insns[i];
594     if (insns[i].code == (BPF_LD | BPF_DW | BPF_IMM) &&
595         insns[i].src_reg == BPF_PSEUDO_MAP_FD &&
596         !map_ld_seen) {
597       prog[i].imm = 0;
598       map_ld_seen = true;
599     } else if (insns[i].code == 0 && map_ld_seen) {
600       prog[i].imm = 0;
601       map_ld_seen = false;
602     } else {
603       map_ld_seen = false;
604     }
605   }
606   ret = write(shafd2, prog, prog_len);
607   if (ret != prog_len) {
608     fprintf(stderr, "sha1 write fail %s\n", strerror(errno));
609     close(shafd2);
610     close(shafd);
611     return -1;
612   }
613 
614   union {
615     unsigned char sha[20];
616     unsigned long long tag;
617   } u = {};
618   ret = read(shafd2, u.sha, 20);
619   if (ret != 20) {
620     fprintf(stderr, "sha1 read fail %s\n", strerror(errno));
621     close(shafd2);
622     close(shafd);
623     return -1;
624   }
625   *ptag = __builtin_bswap64(u.tag);
626   close(shafd2);
627   close(shafd);
628   return 0;
629 }
630 
bpf_prog_get_tag(int fd,unsigned long long * ptag)631 int bpf_prog_get_tag(int fd, unsigned long long *ptag)
632 {
633   char fmt[64];
634   snprintf(fmt, sizeof(fmt), "/proc/self/fdinfo/%d", fd);
635   FILE * f = fopen(fmt, "re");
636   if (!f) {
637 /*    fprintf(stderr, "failed to open fdinfo %s\n", strerror(errno));*/
638     return -1;
639   }
640   unsigned long long tag = 0;
641   // prog_tag: can appear in different lines
642   while (fgets(fmt, sizeof(fmt), f)) {
643     if (sscanf(fmt, "prog_tag:%llx", &tag) == 1) {
644       *ptag = tag;
645       fclose(f);
646       return 0;
647     }
648   }
649   fclose(f);
650   return -2;
651 }
652 
libbpf_bpf_prog_load(enum bpf_prog_type prog_type,const char * prog_name,const char * license,const struct bpf_insn * insns,size_t insn_cnt,struct bpf_prog_load_opts * opts,char * log_buf,size_t log_buf_sz)653 static int libbpf_bpf_prog_load(enum bpf_prog_type prog_type,
654                                 const char *prog_name, const char *license,
655                                 const struct bpf_insn *insns, size_t insn_cnt,
656                                 struct bpf_prog_load_opts *opts,
657                                 char *log_buf, size_t log_buf_sz)
658 {
659 
660   LIBBPF_OPTS(bpf_prog_load_opts, p);
661 
662   if (!opts || !log_buf != !log_buf_sz) {
663     errno = EINVAL;
664     return -EINVAL;
665   }
666 
667   p.expected_attach_type = opts->expected_attach_type;
668   switch (prog_type) {
669   case BPF_PROG_TYPE_STRUCT_OPS:
670   case BPF_PROG_TYPE_LSM:
671     p.attach_btf_id = opts->attach_btf_id;
672     break;
673   case BPF_PROG_TYPE_TRACING:
674   case BPF_PROG_TYPE_EXT:
675     p.attach_btf_id = opts->attach_btf_id;
676     p.attach_prog_fd = opts->attach_prog_fd;
677     p.attach_btf_obj_fd = opts->attach_btf_obj_fd;
678     break;
679   default:
680     p.prog_ifindex = opts->prog_ifindex;
681     p.kern_version = opts->kern_version;
682   }
683   p.log_level = opts->log_level;
684   p.log_buf = log_buf;
685   p.log_size = log_buf_sz;
686   p.prog_btf_fd = opts->prog_btf_fd;
687   p.func_info_rec_size = opts->func_info_rec_size;
688   p.func_info_cnt = opts->func_info_cnt;
689   p.func_info = opts->func_info;
690   p.line_info_rec_size = opts->line_info_rec_size;
691   p.line_info_cnt = opts->line_info_cnt;
692   p.line_info = opts->line_info;
693   p.prog_flags = opts->prog_flags;
694 
695   return bpf_prog_load(prog_type, prog_name, license,
696                        insns, insn_cnt, &p);
697 }
698 
699 #ifndef MINIMAL_LIBBPF
find_btf_id(const char * module_name,const char * func_name,enum bpf_attach_type expected_attach_type,int * btf_fd)700 static int find_btf_id(const char *module_name, const char *func_name,
701                        enum bpf_attach_type expected_attach_type, int *btf_fd)
702 {
703   struct btf *vmlinux_btf = NULL, *module_btf = NULL;
704   struct bpf_btf_info info;
705   int err, fd, btf_id;
706   __u32 id = 0, len;
707   char name[64];
708 
709   if (!module_name[0] || !strcmp(module_name, "vmlinux"))
710     return libbpf_find_vmlinux_btf_id(func_name, expected_attach_type);
711 
712   while (true) {
713     err = bpf_btf_get_next_id(id, &id);
714     if (err) {
715       fprintf(stderr, "bpf_btf_get_next_id failed: %d\n", err);
716       return err;
717     }
718 
719     fd = bpf_btf_get_fd_by_id(id);
720     if (fd < 0) {
721       err = fd;
722       fprintf(stderr, "bpf_btf_get_fd_by_id failed: %d\n", err);
723       return err;
724     }
725 
726     len = sizeof(info);
727     memset(&info, 0, sizeof(info));
728     info.name = ptr_to_u64(name);
729     info.name_len = sizeof(name);
730 
731     err = bpf_btf_get_info_by_fd(fd, &info, &len);
732     if (err) {
733       fprintf(stderr, "bpf_btf_get_info_by_fd failed: %d\n", err);
734       goto err_out;
735     }
736 
737     if (!info.kernel_btf || strcmp(name, module_name)) {
738       close(fd);
739       continue;
740     }
741 
742     vmlinux_btf = btf__load_vmlinux_btf();
743     err = libbpf_get_error(vmlinux_btf);
744     if (err) {
745       fprintf(stderr, "btf__load_vmlinux_btf failed: %d\n", err);
746       goto err_out;
747     }
748 
749     module_btf = btf__load_module_btf(module_name, vmlinux_btf);
750     err = libbpf_get_error(vmlinux_btf);
751     if (err) {
752       fprintf(stderr, "btf__load_module_btf failed: %d\n", err);
753       goto err_out;
754     }
755 
756     btf_id = btf__find_by_name_kind(module_btf, func_name, BTF_KIND_FUNC);
757     if (btf_id < 0) {
758       err = btf_id;
759       fprintf(stderr, "btf__find_by_name_kind failed: %d\n", err);
760       goto err_out;
761     }
762 
763     btf__free(module_btf);
764     btf__free(vmlinux_btf);
765 
766     *btf_fd = fd;
767     return btf_id;
768 
769 err_out:
770     btf__free(module_btf);
771     btf__free(vmlinux_btf);
772     close(fd);
773     *btf_fd = -1;
774     return err;
775   }
776 
777   return -1;
778 }
779 #endif
780 
bcc_prog_load_xattr(enum bpf_prog_type prog_type,const char * prog_name,const char * license,const struct bpf_insn * insns,struct bpf_prog_load_opts * opts,int prog_len,char * log_buf,unsigned log_buf_size,bool allow_rlimit)781 int bcc_prog_load_xattr(enum bpf_prog_type prog_type, const char *prog_name,
782                         const char *license, const struct bpf_insn *insns,
783                         struct bpf_prog_load_opts *opts, int prog_len,
784                         char *log_buf, unsigned log_buf_size, bool allow_rlimit)
785 {
786   unsigned name_len = prog_name ? strlen(prog_name) : 0;
787   char *tmp_log_buf = NULL, *opts_log_buf = NULL;
788   unsigned tmp_log_buf_size = 0, opts_log_buf_size = 0;
789   int ret = 0, name_offset = 0, expected_attach_type = 0;
790   char new_prog_name[BPF_OBJ_NAME_LEN] = {};
791   char mod_name[64] = {};
792   char *mod_end;
793   int mod_len;
794   int fd = -1;
795 
796   unsigned insns_cnt = prog_len / sizeof(struct bpf_insn);
797 
798   if (opts->log_level > 0) {
799     if (log_buf_size > 0) {
800       // Use user-provided log buffer if available.
801       log_buf[0] = 0;
802       opts_log_buf = log_buf;
803       opts_log_buf_size = log_buf_size;
804     } else {
805       // Create and use temporary log buffer if user didn't provide one.
806       tmp_log_buf_size = LOG_BUF_SIZE;
807       tmp_log_buf = malloc(tmp_log_buf_size);
808       if (!tmp_log_buf) {
809         fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
810                 strerror(errno));
811         opts->log_level = 0;
812       } else {
813         tmp_log_buf[0] = 0;
814         opts_log_buf = tmp_log_buf;
815         opts_log_buf_size = tmp_log_buf_size;
816       }
817     }
818   }
819 
820   if (name_len) {
821     if (strncmp(prog_name, "kprobe__", 8) == 0)
822       name_offset = 8;
823     else if (strncmp(prog_name, "kretprobe__", 11) == 0)
824       name_offset = 11;
825     else if (strncmp(prog_name, "tracepoint__", 12) == 0)
826       name_offset = 12;
827     else if (strncmp(prog_name, "raw_tracepoint__", 16) == 0)
828       name_offset = 16;
829     else if (strncmp(prog_name, "kfunc__", 7) == 0) {
830       // kfunc__vmlinux__vfs_read
831       mod_end = strstr(prog_name + 7, "__");
832       mod_len = mod_end - prog_name - 7;
833       strncpy(mod_name, prog_name + 7, mod_len);
834       name_offset = 7 + mod_len + 2;
835       expected_attach_type = BPF_TRACE_FENTRY;
836     } else if (strncmp(prog_name, "kmod_ret__", 10) == 0) {
837       name_offset = 10;
838       expected_attach_type = BPF_MODIFY_RETURN;
839     } else if (strncmp(prog_name, "kretfunc__", 10) == 0) {
840       // kretfunc__vmlinux__vfs_read
841       mod_end = strstr(prog_name + 10, "__");
842       mod_len = mod_end - prog_name - 10;
843       strncpy(mod_name, prog_name + 10, mod_len);
844       name_offset = 10 + mod_len + 2;
845       expected_attach_type = BPF_TRACE_FEXIT;
846     } else if (strncmp(prog_name, "lsm__", 5) == 0) {
847       name_offset = 5;
848       expected_attach_type = BPF_LSM_MAC;
849     } else if (strncmp(prog_name, "bpf_iter__", 10) == 0) {
850       name_offset = 10;
851       expected_attach_type = BPF_TRACE_ITER;
852     }
853 
854     if (prog_type == BPF_PROG_TYPE_TRACING ||
855         prog_type == BPF_PROG_TYPE_LSM) {
856 #ifdef MINIMAL_LIBBPF
857       fprintf(stderr, "vmlinux BTF not supported in this build of libbpf\n");
858       return -1;
859 #else
860       ret = find_btf_id(mod_name, prog_name + name_offset,
861                         expected_attach_type, &fd);
862       if (ret == -EINVAL) {
863         fprintf(stderr, "bpf: %s BTF is not found\n", mod_name);
864         return ret;
865       } else if (ret < 0) {
866         fprintf(stderr, "bpf: %s is not found in %s BTF\n",
867                 prog_name + name_offset, mod_name);
868         return ret;
869       }
870 
871       opts->attach_btf_obj_fd = fd == -1 ? 0 : fd;
872       opts->attach_btf_id = ret;
873       opts->expected_attach_type = expected_attach_type;
874 #endif
875     }
876 
877     memcpy(new_prog_name, prog_name + name_offset,
878            min(name_len - name_offset, BPF_OBJ_NAME_LEN - 1));
879   }
880 
881   ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, opts_log_buf, opts_log_buf_size);
882 
883   // func_info/line_info may not be supported in old kernels.
884   if (ret < 0 && opts->func_info && errno == EINVAL) {
885     opts->prog_btf_fd = 0;
886     opts->func_info = NULL;
887     opts->func_info_cnt = 0;
888     opts->func_info_rec_size = 0;
889     opts->line_info = NULL;
890     opts->line_info_cnt = 0;
891     opts->line_info_rec_size = 0;
892     ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, opts_log_buf, opts_log_buf_size);
893   }
894 
895   // BPF object name is not supported on older Kernels.
896   // If we failed due to this, clear the name and try again.
897   if (ret < 0 && name_len && (errno == E2BIG || errno == EINVAL)) {
898     new_prog_name[0] = '\0';
899     ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, opts_log_buf, opts_log_buf_size);
900   }
901 
902   if (ret < 0 && errno == EPERM) {
903     if (!allow_rlimit)
904       return ret;
905 
906     // When EPERM is returned, two reasons are possible:
907     //  1. user has no permissions for bpf()
908     //  2. user has insufficent rlimit for locked memory
909     // Unfortunately, there is no api to inspect the current usage of locked
910     // mem for the user, so an accurate calculation of how much memory to lock
911     // for this new program is difficult to calculate. As a hack, bump the limit
912     // to unlimited. If program load fails again, return the error.
913     struct rlimit rl = {};
914     if (getrlimit(RLIMIT_MEMLOCK, &rl) == 0) {
915       rl.rlim_max = RLIM_INFINITY;
916       rl.rlim_cur = rl.rlim_max;
917       if (setrlimit(RLIMIT_MEMLOCK, &rl) == 0)
918         ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, opts_log_buf, opts_log_buf_size);
919     }
920   }
921 
922   if (ret < 0 && errno == E2BIG) {
923     fprintf(stderr,
924             "bpf: %s. Program %s too large (%u insns), at most %d insns\n\n",
925             strerror(errno), new_prog_name, insns_cnt, BPF_MAXINSNS);
926     return -1;
927   }
928 
929   // The load has failed. Handle log message.
930   if (ret < 0) {
931     // User has provided a log buffer.
932     if (log_buf_size) {
933       // If logging is not already enabled, enable it and do the syscall again.
934       if (opts->log_level == 0) {
935         opts->log_level = 1;
936         ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, log_buf, log_buf_size);
937       }
938       // Print the log message and return.
939       bpf_print_hints(ret, log_buf);
940       if (errno == ENOSPC)
941         fprintf(stderr, "bpf: log_buf size may be insufficient\n");
942       goto return_result;
943     }
944 
945     // User did not provide log buffer. We will try to increase size of
946     // our temporary log buffer to get full error message.
947     if (tmp_log_buf)
948       free(tmp_log_buf);
949     tmp_log_buf_size = LOG_BUF_SIZE;
950     if (opts->log_level == 0)
951       opts->log_level = 1;
952     for (;;) {
953       tmp_log_buf = malloc(tmp_log_buf_size);
954       if (!tmp_log_buf) {
955         fprintf(stderr, "bpf: Failed to allocate temporary log buffer: %s\n\n",
956                 strerror(errno));
957         goto return_result;
958       }
959       tmp_log_buf[0] = 0;
960       ret = libbpf_bpf_prog_load(prog_type, new_prog_name, license, insns, insns_cnt, opts, tmp_log_buf, tmp_log_buf_size);
961       if (ret < 0 && errno == ENOSPC) {
962         // Temporary buffer size is not enough. Double it and try again.
963         free(tmp_log_buf);
964         tmp_log_buf = NULL;
965         tmp_log_buf_size <<= 1;
966       } else {
967         break;
968       }
969     }
970   }
971 
972   // Check if we should print the log message if log_level is not 0,
973   // either specified by user or set due to error.
974   if (opts->log_level > 0) {
975     // Don't print if user enabled logging and provided log buffer,
976     // but there is no error.
977     if (log_buf && ret < 0)
978       bpf_print_hints(ret, log_buf);
979     else if (tmp_log_buf)
980       bpf_print_hints(ret, tmp_log_buf);
981   }
982 
983 return_result:
984   if (fd >= 0)
985     close(fd);
986   if (tmp_log_buf)
987     free(tmp_log_buf);
988   return ret;
989 }
990 
bcc_prog_load(enum bpf_prog_type prog_type,const char * name,const struct bpf_insn * insns,int prog_len,const char * license,unsigned kern_version,int log_level,char * log_buf,unsigned log_buf_size)991 int bcc_prog_load(enum bpf_prog_type prog_type, const char *name,
992                   const struct bpf_insn *insns, int prog_len,
993                   const char *license, unsigned kern_version,
994                   int log_level, char *log_buf, unsigned log_buf_size)
995 {
996   struct bpf_prog_load_opts opts = {};
997 
998 
999   if (prog_type != BPF_PROG_TYPE_TRACING && prog_type != BPF_PROG_TYPE_EXT)
1000     opts.kern_version = kern_version;
1001   opts.log_level = log_level;
1002   return bcc_prog_load_xattr(prog_type, name, license, insns, &opts, prog_len, log_buf, log_buf_size, true);
1003 }
1004 
bpf_open_raw_sock(const char * name)1005 int bpf_open_raw_sock(const char *name)
1006 {
1007   struct sockaddr_ll sll;
1008   int sock;
1009 
1010   sock = socket(PF_PACKET, SOCK_RAW | SOCK_NONBLOCK | SOCK_CLOEXEC, htons(ETH_P_ALL));
1011   if (sock < 0) {
1012     fprintf(stderr, "cannot create raw socket\n");
1013     return -1;
1014   }
1015 
1016   /* Do not bind on empty interface names */
1017   if (!name || *name == '\0')
1018     return sock;
1019 
1020   memset(&sll, 0, sizeof(sll));
1021   sll.sll_family = AF_PACKET;
1022   sll.sll_ifindex = if_nametoindex(name);
1023   if (sll.sll_ifindex == 0) {
1024     fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
1025     close(sock);
1026     return -1;
1027   }
1028   sll.sll_protocol = htons(ETH_P_ALL);
1029   if (bind(sock, (struct sockaddr *)&sll, sizeof(sll)) < 0) {
1030     fprintf(stderr, "bind to %s: %s\n", name, strerror(errno));
1031     close(sock);
1032     return -1;
1033   }
1034 
1035   return sock;
1036 }
1037 
bpf_attach_socket(int sock,int prog)1038 int bpf_attach_socket(int sock, int prog) {
1039   return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
1040 }
1041 
1042 #define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
bpf_find_probe_type(const char * event_type)1043 static int bpf_find_probe_type(const char *event_type)
1044 {
1045   int fd;
1046   int ret;
1047   char buf[PATH_MAX];
1048 
1049   ret = snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
1050   if (ret < 0 || ret >= (int)sizeof(buf))
1051     return -1;
1052 
1053   fd = open(buf, O_RDONLY | O_CLOEXEC);
1054   if (fd < 0)
1055     return -1;
1056   ret = read(fd, buf, sizeof(buf));
1057   close(fd);
1058   if (ret < 0 || ret >= (int)sizeof(buf))
1059     return -1;
1060   errno = 0;
1061   ret = (int)strtol(buf, NULL, 10);
1062   return errno ? -1 : ret;
1063 }
1064 
1065 #define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
bpf_get_retprobe_bit(const char * event_type)1066 static int bpf_get_retprobe_bit(const char *event_type)
1067 {
1068   int fd;
1069   int ret;
1070   char buf[PATH_MAX];
1071 
1072   ret = snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
1073   if (ret < 0 || ret >= (int)sizeof(buf))
1074     return -1;
1075 
1076   fd = open(buf, O_RDONLY | O_CLOEXEC);
1077   if (fd < 0)
1078     return -1;
1079   ret = read(fd, buf, sizeof(buf));
1080   close(fd);
1081   if (ret < 0 || ret >= (int)sizeof(buf))
1082     return -1;
1083   if (strncmp(buf, "config:", strlen("config:")))
1084     return -1;
1085   errno = 0;
1086   ret = (int)strtol(buf + strlen("config:"), NULL, 10);
1087   return errno ? -1 : ret;
1088 }
1089 
1090 /*
1091  * Kernel API with e12f03d ("perf/core: Implement the 'perf_kprobe' PMU") allows
1092  * creating [k,u]probe with perf_event_open, which makes it easier to clean up
1093  * the [k,u]probe. This function tries to create pfd with the perf_kprobe PMU.
1094  */
bpf_try_perf_event_open_with_probe(const char * name,uint64_t offs,int pid,const char * event_type,int is_return,uint64_t ref_ctr_offset)1095 static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
1096              int pid, const char *event_type, int is_return,
1097              uint64_t ref_ctr_offset)
1098 {
1099   struct perf_event_attr attr = {};
1100   int type = bpf_find_probe_type(event_type);
1101   int is_return_bit = bpf_get_retprobe_bit(event_type);
1102   int cpu = 0;
1103 
1104   if (type < 0 || is_return_bit < 0)
1105     return -1;
1106   attr.sample_period = 1;
1107   attr.wakeup_events = 1;
1108   if (is_return)
1109     attr.config |= 1 << is_return_bit;
1110   attr.config |= (ref_ctr_offset << PERF_UPROBE_REF_CTR_OFFSET_SHIFT);
1111 
1112   /*
1113    * struct perf_event_attr in latest perf_event.h has the following
1114    * extension to config1 and config2. To keep bcc compatibe with
1115    * older perf_event.h, we use config1 and config2 here instead of
1116    * kprobe_func, uprobe_path, kprobe_addr, and probe_offset.
1117    *
1118    * union {
1119    *  __u64 bp_addr;
1120    *  __u64 kprobe_func;
1121    *  __u64 uprobe_path;
1122    *  __u64 config1;
1123    * };
1124    * union {
1125    *   __u64 bp_len;
1126    *   __u64 kprobe_addr;
1127    *   __u64 probe_offset;
1128    *   __u64 config2;
1129    * };
1130    */
1131   attr.config2 = offs;  /* config2 here is kprobe_addr or probe_offset */
1132   attr.size = sizeof(attr);
1133   attr.type = type;
1134   /* config1 here is kprobe_func or  uprobe_path */
1135   attr.config1 = ptr_to_u64((void *)name);
1136   // PID filter is only possible for uprobe events.
1137   if (pid < 0)
1138     pid = -1;
1139   // perf_event_open API doesn't allow both pid and cpu to be -1.
1140   // So only set it to -1 when PID is not -1.
1141   // Tracing events do not do CPU filtering in any cases.
1142   if (pid != -1)
1143     cpu = -1;
1144   return syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */,
1145                  PERF_FLAG_FD_CLOEXEC);
1146 }
1147 
1148 #define DEBUGFS_TRACEFS "/sys/kernel/debug/tracing"
1149 #define TRACEFS "/sys/kernel/tracing"
1150 
get_tracefs_path()1151 static const char *get_tracefs_path()
1152 {
1153   if (access(DEBUGFS_TRACEFS, F_OK) == 0) {
1154     return DEBUGFS_TRACEFS;
1155   }
1156   return TRACEFS;
1157 }
1158 
1159 
1160 // When a valid Perf Event FD provided through pfd, it will be used to enable
1161 // and attach BPF program to the event, and event_path will be ignored.
1162 // Otherwise, event_path is expected to contain the path to the event in tracefs
1163 // and it will be used to open the Perf Event FD.
1164 // In either case, if the attach partially failed (such as issue with the
1165 // ioctl operations), the **caller** need to clean up the Perf Event FD, either
1166 // provided by the caller or opened here.
bpf_attach_tracing_event(int progfd,const char * event_path,int pid,int * pfd)1167 static int bpf_attach_tracing_event(int progfd, const char *event_path, int pid,
1168                                     int *pfd)
1169 {
1170   int efd, cpu = 0;
1171   ssize_t bytes;
1172   char buf[PATH_MAX];
1173   struct perf_event_attr attr = {};
1174   // Caller did not provide a valid Perf Event FD. Create one with the tracefs
1175   // event path provided.
1176   if (*pfd < 0) {
1177     snprintf(buf, sizeof(buf), "%s/id", event_path);
1178     efd = open(buf, O_RDONLY | O_CLOEXEC, 0);
1179     if (efd < 0) {
1180       fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1181       return -1;
1182     }
1183 
1184     bytes = read(efd, buf, sizeof(buf));
1185     if (bytes <= 0 || bytes >= (int)sizeof(buf)) {
1186       fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
1187       close(efd);
1188       return -1;
1189     }
1190     close(efd);
1191     buf[bytes] = '\0';
1192     attr.config = strtol(buf, NULL, 0);
1193     attr.type = PERF_TYPE_TRACEPOINT;
1194     attr.sample_period = 1;
1195     attr.wakeup_events = 1;
1196     // PID filter is only possible for uprobe events.
1197     if (pid < 0)
1198       pid = -1;
1199     // perf_event_open API doesn't allow both pid and cpu to be -1.
1200     // So only set it to -1 when PID is not -1.
1201     // Tracing events do not do CPU filtering in any cases.
1202     if (pid != -1)
1203       cpu = -1;
1204     *pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
1205     if (*pfd < 0) {
1206       fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
1207       return -1;
1208     }
1209   }
1210 
1211   if (ioctl(*pfd, PERF_EVENT_IOC_SET_BPF, progfd) < 0) {
1212     perror("ioctl(PERF_EVENT_IOC_SET_BPF)");
1213     return -1;
1214   }
1215   if (ioctl(*pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1216     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1217     return -1;
1218   }
1219 
1220   return 0;
1221 }
1222 
1223 /* Creates an [uk]probe using tracefs.
1224  * On success, the path to the probe is placed in buf (which is assumed to be of size PATH_MAX).
1225  */
create_probe_event(char * buf,const char * ev_name,enum bpf_probe_attach_type attach_type,const char * config1,uint64_t offset,const char * event_type,pid_t pid,int maxactive)1226 static int create_probe_event(char *buf, const char *ev_name,
1227                               enum bpf_probe_attach_type attach_type,
1228                               const char *config1, uint64_t offset,
1229                               const char *event_type, pid_t pid, int maxactive)
1230 {
1231   int kfd = -1, res = -1;
1232   char ev_alias[256];
1233   bool is_kprobe = strncmp("kprobe", event_type, 6) == 0;
1234   bool use_debugfs = false;
1235 
1236   snprintf(buf, PATH_MAX, "%s/%s_events", get_tracefs_path(), event_type);
1237   kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1238   if (kfd < 0) {
1239     use_debugfs = true;
1240     snprintf(buf, PATH_MAX, "/sys/kernel/debug/tracing/%s_events", event_type);
1241     kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1242     if (kfd < 0) {
1243       fprintf(stderr, "%s: open(%s): %s\n", __func__, buf,
1244               strerror(errno));
1245       return -1;
1246     }
1247   }
1248 
1249   res = snprintf(ev_alias, sizeof(ev_alias), "%s_bcc_%d", ev_name, getpid());
1250   if (res < 0 || res >= sizeof(ev_alias)) {
1251     fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
1252     close(kfd);
1253     goto error;
1254   }
1255 
1256   if (is_kprobe) {
1257     if (offset > 0 && attach_type == BPF_PROBE_ENTRY)
1258       snprintf(buf, PATH_MAX, "p:kprobes/%s %s+%"PRIu64,
1259                ev_alias, config1, offset);
1260     else if (maxactive > 0 && attach_type == BPF_PROBE_RETURN)
1261       snprintf(buf, PATH_MAX, "r%d:kprobes/%s %s",
1262                maxactive, ev_alias, config1);
1263     else
1264       snprintf(buf, PATH_MAX, "%c:kprobes/%s %s",
1265                attach_type == BPF_PROBE_ENTRY ? 'p' : 'r',
1266                ev_alias, config1);
1267   } else {
1268     res = snprintf(buf, PATH_MAX, "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
1269                    event_type, ev_alias, config1, (unsigned long)offset);
1270     if (res < 0 || res >= PATH_MAX) {
1271       fprintf(stderr, "Event alias (%s) too long for buffer\n", ev_alias);
1272       close(kfd);
1273       return -1;
1274     }
1275   }
1276 
1277   if (write(kfd, buf, strlen(buf)) < 0) {
1278     if (errno == ENOENT)
1279       fprintf(stderr, "cannot attach %s, probe entry may not exist\n", event_type);
1280     else
1281       fprintf(stderr, "cannot attach %s, %s\n", event_type, strerror(errno));
1282     close(kfd);
1283     goto error;
1284   }
1285   close(kfd);
1286   snprintf(buf, PATH_MAX, "%s/events/%ss/%s", get_tracefs_path(),
1287            event_type, ev_alias);
1288   return 0;
1289 error:
1290   return -1;
1291 }
1292 
1293 // config1 could be either kprobe_func or uprobe_path,
1294 // see bpf_try_perf_event_open_with_probe().
bpf_attach_probe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * config1,const char * event_type,uint64_t offset,pid_t pid,int maxactive,uint32_t ref_ctr_offset)1295 static int bpf_attach_probe(int progfd, enum bpf_probe_attach_type attach_type,
1296                             const char *ev_name, const char *config1, const char* event_type,
1297                             uint64_t offset, pid_t pid, int maxactive,
1298                             uint32_t ref_ctr_offset)
1299 {
1300   int kfd, pfd = -1;
1301   char buf[PATH_MAX], fname[256], kprobe_events[PATH_MAX];
1302   bool is_kprobe = strncmp("kprobe", event_type, 6) == 0;
1303 
1304   if (maxactive <= 0)
1305     // Try create the [k,u]probe Perf Event with perf_event_open API.
1306     pfd = bpf_try_perf_event_open_with_probe(config1, offset, pid, event_type,
1307                                              attach_type != BPF_PROBE_ENTRY,
1308                                              ref_ctr_offset);
1309 
1310   // If failed, most likely Kernel doesn't support the perf_kprobe PMU
1311   // (e12f03d "perf/core: Implement the 'perf_kprobe' PMU") yet.
1312   // Try create the event using tracefs.
1313   if (pfd < 0) {
1314     if (create_probe_event(buf, ev_name, attach_type, config1, offset,
1315                            event_type, pid, maxactive) < 0)
1316       goto error;
1317 
1318     // If we're using maxactive, we need to check that the event was created
1319     // under the expected name.  If tracefs doesn't support maxactive yet
1320     // (kernel < 4.12), the event is created under a different name; we need to
1321     // delete that event and start again without maxactive.
1322     if (is_kprobe && maxactive > 0 && attach_type == BPF_PROBE_RETURN) {
1323       if (snprintf(fname, sizeof(fname), "%s/id", buf) >= sizeof(fname)) {
1324         fprintf(stderr, "filename (%s) is too long for buffer\n", buf);
1325         goto error;
1326       }
1327       if (access(fname, F_OK) == -1) {
1328         snprintf(kprobe_events, PATH_MAX, "%s/kprobe_events", get_tracefs_path());
1329         // Deleting kprobe event with incorrect name.
1330         kfd = open(kprobe_events, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1331         if (kfd < 0) {
1332           fprintf(stderr, "open(%s): %s\n", kprobe_events, strerror(errno));
1333           return -1;
1334         }
1335         snprintf(fname, sizeof(fname), "-:kprobes/%s_0", ev_name);
1336         if (write(kfd, fname, strlen(fname)) < 0) {
1337           if (errno == ENOENT)
1338             fprintf(stderr, "cannot detach kprobe, probe entry may not exist\n");
1339           else
1340             fprintf(stderr, "cannot detach kprobe, %s\n", strerror(errno));
1341           close(kfd);
1342           goto error;
1343         }
1344         close(kfd);
1345 
1346         // Re-creating kprobe event without maxactive.
1347         if (create_probe_event(buf, ev_name, attach_type, config1,
1348                                offset, event_type, pid, 0) < 0)
1349           goto error;
1350       }
1351     }
1352   }
1353   // If perf_event_open succeeded, bpf_attach_tracing_event will use the created
1354   // Perf Event FD directly and buf would be empty and unused.
1355   // Otherwise it will read the event ID from the path in buf, create the
1356   // Perf Event event using that ID, and updated value of pfd.
1357   if (bpf_attach_tracing_event(progfd, buf, pid, &pfd) == 0)
1358     return pfd;
1359 
1360 error:
1361   bpf_close_perf_event_fd(pfd);
1362   return -1;
1363 }
1364 
bpf_attach_kprobe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * fn_name,uint64_t fn_offset,int maxactive)1365 int bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type,
1366                       const char *ev_name, const char *fn_name,
1367                       uint64_t fn_offset, int maxactive)
1368 {
1369   return bpf_attach_probe(progfd, attach_type,
1370                           ev_name, fn_name, "kprobe",
1371                           fn_offset, -1, maxactive, 0);
1372 }
1373 
_find_archive_path_and_offset(const char * entry_path,char out_path[PATH_MAX],uint64_t * offset)1374 static int _find_archive_path_and_offset(const char *entry_path,
1375                                          char out_path[PATH_MAX],
1376                                          uint64_t *offset) {
1377   const char *separator = strstr(entry_path, "!/");
1378   if (separator == NULL || (separator - entry_path) >= PATH_MAX) {
1379     return -1;
1380   }
1381 
1382   struct bcc_zip_entry entry;
1383   struct bcc_zip_archive *archive =
1384       bcc_zip_archive_open_and_find(entry_path, &entry);
1385   if (archive == NULL) {
1386     return -1;
1387   }
1388   if (entry.compression) {
1389     bcc_zip_archive_close(archive);
1390     return -1;
1391   }
1392 
1393   strncpy(out_path, entry_path, separator - entry_path);
1394   out_path[separator - entry_path] = 0;
1395   *offset += entry.data_offset;
1396 
1397   bcc_zip_archive_close(archive);
1398   return 0;
1399 }
1400 
bpf_attach_uprobe(int progfd,enum bpf_probe_attach_type attach_type,const char * ev_name,const char * binary_path,uint64_t offset,pid_t pid,uint32_t ref_ctr_offset)1401 int bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type,
1402                       const char *ev_name, const char *binary_path,
1403                       uint64_t offset, pid_t pid, uint32_t ref_ctr_offset)
1404 {
1405   char archive_path[PATH_MAX];
1406   if (access(binary_path, F_OK) != 0 &&
1407       _find_archive_path_and_offset(binary_path, archive_path, &offset) == 0) {
1408     binary_path = archive_path;
1409   }
1410 
1411   return bpf_attach_probe(progfd, attach_type,
1412                           ev_name, binary_path, "uprobe",
1413                           offset, pid, -1, ref_ctr_offset);
1414 }
1415 
bpf_detach_probe(const char * ev_name,const char * event_type)1416 static int bpf_detach_probe(const char *ev_name, const char *event_type)
1417 {
1418   int kfd = -1, res;
1419   char buf[PATH_MAX];
1420   int found_event = 0;
1421   size_t bufsize = 0;
1422   char *cptr = NULL;
1423   FILE *fp;
1424   bool use_debugfs = false;
1425 
1426   /*
1427    * For [k,u]probe created with perf_event_open (on newer kernel), it is
1428    * not necessary to clean it up in [k,u]probe_events. We first look up
1429    * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
1430    * it is safe to skip the cleaning up process (write -:... to the file).
1431    */
1432   snprintf(buf, sizeof(buf), "%s/%s_events", get_tracefs_path(), event_type);
1433   fp = fopen(buf, "r");
1434   if (!fp) {
1435     use_debugfs = true;
1436     snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
1437     fp = fopen(buf, "re");
1438     if (!fp) {
1439       fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1440       goto error;
1441     }
1442   }
1443 
1444   res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
1445   if (res < 0 || res >= (int)sizeof(buf)) {
1446     fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1447     goto error;
1448   }
1449 
1450   while (getline(&cptr, &bufsize, fp) != -1)
1451     if (strstr(cptr, buf) != NULL) {
1452       found_event = 1;
1453       break;
1454     }
1455   free(cptr);
1456   fclose(fp);
1457   fp = NULL;
1458 
1459   if (!found_event)
1460     return 0;
1461 
1462   snprintf(buf, sizeof(buf), "%s/%s_events", get_tracefs_path(), event_type);
1463   kfd = open(buf, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
1464   if (kfd < 0) {
1465     fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
1466     goto error;
1467   }
1468 
1469   res = snprintf(buf, sizeof(buf), "-:%ss/%s_bcc_%d", event_type, ev_name, getpid());
1470   if (res < 0 || res >= (int)sizeof(buf)) {
1471     fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
1472     goto error;
1473   }
1474   if (write(kfd, buf, strlen(buf)) < 0) {
1475     fprintf(stderr, "write(%s): %s\n", buf, strerror(errno));
1476     goto error;
1477   }
1478 
1479   close(kfd);
1480   return 0;
1481 
1482 error:
1483   if (kfd >= 0)
1484     close(kfd);
1485   if (fp)
1486     fclose(fp);
1487   return -1;
1488 }
1489 
bpf_detach_kprobe(const char * ev_name)1490 int bpf_detach_kprobe(const char *ev_name)
1491 {
1492   return bpf_detach_probe(ev_name, "kprobe");
1493 }
1494 
bpf_detach_uprobe(const char * ev_name)1495 int bpf_detach_uprobe(const char *ev_name)
1496 {
1497   return bpf_detach_probe(ev_name, "uprobe");
1498 }
1499 
bpf_attach_tracepoint(int progfd,const char * tp_category,const char * tp_name)1500 int bpf_attach_tracepoint(int progfd, const char *tp_category,
1501                           const char *tp_name)
1502 {
1503   char buf[256];
1504   int pfd = -1;
1505 
1506   snprintf(buf, sizeof(buf), "%s/events/%s/%s", get_tracefs_path(), tp_category, tp_name);
1507   if (bpf_attach_tracing_event(progfd, buf, -1 /* PID */, &pfd) == 0)
1508     return pfd;
1509 
1510   bpf_close_perf_event_fd(pfd);
1511   return -1;
1512 }
1513 
bpf_detach_tracepoint(const char * tp_category,const char * tp_name)1514 int bpf_detach_tracepoint(const char *tp_category, const char *tp_name) {
1515   UNUSED(tp_category);
1516   UNUSED(tp_name);
1517   // Right now, there is nothing to do, but it's a good idea to encourage
1518   // callers to detach anything they attach.
1519   return 0;
1520 }
1521 
bpf_attach_raw_tracepoint(int progfd,const char * tp_name)1522 int bpf_attach_raw_tracepoint(int progfd, const char *tp_name)
1523 {
1524   int ret;
1525 
1526   ret = bpf_raw_tracepoint_open(tp_name, progfd);
1527   if (ret < 0)
1528     fprintf(stderr, "bpf_attach_raw_tracepoint (%s): %s\n", tp_name, strerror(errno));
1529   return ret;
1530 }
1531 
1532 #ifndef MINIMAL_LIBBPF
bpf_has_kernel_btf(void)1533 bool bpf_has_kernel_btf(void)
1534 {
1535   struct btf *btf;
1536   int err;
1537 
1538   btf = btf__parse_raw("/sys/kernel/btf/vmlinux");
1539   err = libbpf_get_error(btf);
1540   if (err)
1541     return false;
1542 
1543   btf__free(btf);
1544   return true;
1545 }
1546 
find_member_by_name(struct btf * btf,const struct btf_type * btf_type,const char * field_name)1547 static int find_member_by_name(struct btf *btf, const struct btf_type *btf_type, const char *field_name) {
1548   const struct btf_member *btf_member = btf_members(btf_type);
1549   int i;
1550 
1551   for (i = 0; i < btf_vlen(btf_type); i++, btf_member++) {
1552     const char *name = btf__name_by_offset(btf, btf_member->name_off);
1553     if (!strcmp(name, field_name)) {
1554       return 1;
1555     } else if (name[0] == '\0') {
1556       if (find_member_by_name(btf, btf__type_by_id(btf, btf_member->type), field_name))
1557         return 1;
1558     }
1559   }
1560   return 0;
1561 }
1562 
kernel_struct_has_field(const char * struct_name,const char * field_name)1563 int kernel_struct_has_field(const char *struct_name, const char *field_name)
1564 {
1565   const struct btf_type *btf_type;
1566   struct btf *btf;
1567   int ret, btf_id;
1568 
1569   btf = btf__load_vmlinux_btf();
1570   ret = libbpf_get_error(btf);
1571   if (ret)
1572     return -1;
1573 
1574   btf_id = btf__find_by_name_kind(btf, struct_name, BTF_KIND_STRUCT);
1575   if (btf_id < 0) {
1576     ret = -1;
1577     goto cleanup;
1578   }
1579 
1580   btf_type = btf__type_by_id(btf, btf_id);
1581   ret = find_member_by_name(btf, btf_type, field_name);
1582 
1583 cleanup:
1584   btf__free(btf);
1585   return ret;
1586 }
1587 #endif
1588 
bpf_attach_kfunc(int prog_fd)1589 int bpf_attach_kfunc(int prog_fd)
1590 {
1591   int ret;
1592 
1593   ret = bpf_raw_tracepoint_open(NULL, prog_fd);
1594   if (ret < 0)
1595     fprintf(stderr, "bpf_attach_raw_tracepoint (kfunc): %s\n", strerror(errno));
1596   return ret;
1597 }
1598 
bpf_attach_lsm(int prog_fd)1599 int bpf_attach_lsm(int prog_fd)
1600 {
1601   int ret;
1602 
1603   ret = bpf_raw_tracepoint_open(NULL, prog_fd);
1604   if (ret < 0)
1605     fprintf(stderr, "bpf_attach_raw_tracepoint (lsm): %s\n", strerror(errno));
1606   return ret;
1607 }
1608 
bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int pid,int cpu,int page_cnt)1609 void * bpf_open_perf_buffer(perf_reader_raw_cb raw_cb,
1610                             perf_reader_lost_cb lost_cb, void *cb_cookie,
1611                             int pid, int cpu, int page_cnt)
1612 {
1613   struct bcc_perf_buffer_opts opts = {
1614     .pid = pid,
1615     .cpu = cpu,
1616     .wakeup_events = 1,
1617   };
1618 
1619   return bpf_open_perf_buffer_opts(raw_cb, lost_cb, cb_cookie, page_cnt, &opts);
1620 }
1621 
bpf_open_perf_buffer_opts(perf_reader_raw_cb raw_cb,perf_reader_lost_cb lost_cb,void * cb_cookie,int page_cnt,struct bcc_perf_buffer_opts * opts)1622 void * bpf_open_perf_buffer_opts(perf_reader_raw_cb raw_cb,
1623                             perf_reader_lost_cb lost_cb, void *cb_cookie,
1624                             int page_cnt, struct bcc_perf_buffer_opts *opts)
1625 {
1626   int pfd, pid = opts->pid, cpu = opts->cpu;
1627   struct perf_event_attr attr = {};
1628   struct perf_reader *reader = NULL;
1629 
1630   reader = perf_reader_new(raw_cb, lost_cb, cb_cookie, page_cnt);
1631   if (!reader)
1632     goto error;
1633 
1634   attr.config = 10;//PERF_COUNT_SW_BPF_OUTPUT;
1635   attr.type = PERF_TYPE_SOFTWARE;
1636   attr.sample_type = PERF_SAMPLE_RAW;
1637   attr.sample_period = 1;
1638   attr.wakeup_events = opts->wakeup_events;
1639   pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1640   if (pfd < 0) {
1641     fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1642     fprintf(stderr, "   (check your kernel for PERF_COUNT_SW_BPF_OUTPUT support, 4.4 or newer)\n");
1643     goto error;
1644   }
1645   perf_reader_set_fd(reader, pfd);
1646 
1647   if (perf_reader_mmap(reader) < 0)
1648     goto error;
1649 
1650   if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1651     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1652     goto error;
1653   }
1654 
1655   return reader;
1656 
1657 error:
1658   if (reader)
1659     perf_reader_free(reader);
1660 
1661   return NULL;
1662 }
1663 
invalid_perf_config(uint32_t type,uint64_t config)1664 static int invalid_perf_config(uint32_t type, uint64_t config) {
1665   switch (type) {
1666   case PERF_TYPE_HARDWARE:
1667     if (config >= PERF_COUNT_HW_MAX) {
1668       fprintf(stderr, "HARDWARE perf event config out of range\n");
1669       goto is_invalid;
1670     }
1671     return 0;
1672   case PERF_TYPE_SOFTWARE:
1673     if (config >= PERF_COUNT_SW_MAX) {
1674       fprintf(stderr, "SOFTWARE perf event config out of range\n");
1675       goto is_invalid;
1676     } else if (config == 10 /* PERF_COUNT_SW_BPF_OUTPUT */) {
1677       fprintf(stderr, "Unable to open or attach perf event for BPF_OUTPUT\n");
1678       goto is_invalid;
1679     }
1680     return 0;
1681   case PERF_TYPE_HW_CACHE:
1682     if (((config >> 16) >= PERF_COUNT_HW_CACHE_RESULT_MAX) ||
1683         (((config >> 8) & 0xff) >= PERF_COUNT_HW_CACHE_OP_MAX) ||
1684         ((config & 0xff) >= PERF_COUNT_HW_CACHE_MAX)) {
1685       fprintf(stderr, "HW_CACHE perf event config out of range\n");
1686       goto is_invalid;
1687     }
1688     return 0;
1689   case PERF_TYPE_TRACEPOINT:
1690   case PERF_TYPE_BREAKPOINT:
1691     fprintf(stderr,
1692             "Unable to open or attach TRACEPOINT or BREAKPOINT events\n");
1693     goto is_invalid;
1694   default:
1695     return 0;
1696   }
1697 is_invalid:
1698   fprintf(stderr, "Invalid perf event type %" PRIu32 " config %" PRIu64 "\n",
1699           type, config);
1700   return 1;
1701 }
1702 
bpf_open_perf_event(uint32_t type,uint64_t config,int pid,int cpu)1703 int bpf_open_perf_event(uint32_t type, uint64_t config, int pid, int cpu) {
1704   int fd;
1705   struct perf_event_attr attr = {};
1706 
1707   if (invalid_perf_config(type, config)) {
1708     return -1;
1709   }
1710 
1711   attr.sample_period = LONG_MAX;
1712   attr.type = type;
1713   attr.config = config;
1714 
1715   fd = syscall(__NR_perf_event_open, &attr, pid, cpu, -1, PERF_FLAG_FD_CLOEXEC);
1716   if (fd < 0) {
1717     fprintf(stderr, "perf_event_open: %s\n", strerror(errno));
1718     return -1;
1719   }
1720 
1721   if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
1722     perror("ioctl(PERF_EVENT_IOC_ENABLE)");
1723     close(fd);
1724     return -1;
1725   }
1726 
1727   return fd;
1728 }
1729 
bpf_attach_xdp(const char * dev_name,int progfd,uint32_t flags)1730 int bpf_attach_xdp(const char *dev_name, int progfd, uint32_t flags) {
1731   int ifindex = if_nametoindex(dev_name);
1732   char err_buf[256];
1733   int ret = -1;
1734 
1735   if (ifindex == 0) {
1736     fprintf(stderr, "bpf: Resolving device name to index: %s\n", strerror(errno));
1737     return -1;
1738   }
1739 
1740   ret = bpf_xdp_attach(ifindex, progfd, flags, NULL);
1741   if (ret) {
1742     libbpf_strerror(ret, err_buf, sizeof(err_buf));
1743     fprintf(stderr, "bpf: Attaching prog to %s: %s\n", dev_name, err_buf);
1744     return -1;
1745   }
1746 
1747   return 0;
1748 }
1749 
bpf_attach_perf_event_raw(int progfd,void * perf_event_attr,pid_t pid,int cpu,int group_fd,unsigned long extra_flags)1750 int bpf_attach_perf_event_raw(int progfd, void *perf_event_attr, pid_t pid,
1751                               int cpu, int group_fd, unsigned long extra_flags) {
1752   int fd = syscall(__NR_perf_event_open, perf_event_attr, pid, cpu, group_fd,
1753                    PERF_FLAG_FD_CLOEXEC | extra_flags);
1754   if (fd < 0) {
1755     perror("perf_event_open failed");
1756     return -1;
1757   }
1758   if (ioctl(fd, PERF_EVENT_IOC_SET_BPF, progfd) != 0) {
1759     perror("ioctl(PERF_EVENT_IOC_SET_BPF) failed");
1760     close(fd);
1761     return -1;
1762   }
1763   if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0) != 0) {
1764     perror("ioctl(PERF_EVENT_IOC_ENABLE) failed");
1765     close(fd);
1766     return -1;
1767   }
1768 
1769   return fd;
1770 }
1771 
bpf_attach_perf_event(int progfd,uint32_t ev_type,uint32_t ev_config,uint64_t sample_period,uint64_t sample_freq,pid_t pid,int cpu,int group_fd)1772 int bpf_attach_perf_event(int progfd, uint32_t ev_type, uint32_t ev_config,
1773                           uint64_t sample_period, uint64_t sample_freq,
1774                           pid_t pid, int cpu, int group_fd) {
1775   if (invalid_perf_config(ev_type, ev_config)) {
1776     return -1;
1777   }
1778   if (!((sample_period > 0) ^ (sample_freq > 0))) {
1779     fprintf(
1780       stderr, "Exactly one of sample_period / sample_freq should be set\n"
1781     );
1782     return -1;
1783   }
1784 
1785   struct perf_event_attr attr = {};
1786   attr.type = ev_type;
1787   attr.config = ev_config;
1788   if (pid > 0)
1789     attr.inherit = 1;
1790   if (sample_freq > 0) {
1791     attr.freq = 1;
1792     attr.sample_freq = sample_freq;
1793   } else {
1794     attr.sample_period = sample_period;
1795   }
1796 
1797   return bpf_attach_perf_event_raw(progfd, &attr, pid, cpu, group_fd, 0);
1798 }
1799 
bpf_close_perf_event_fd(int fd)1800 int bpf_close_perf_event_fd(int fd) {
1801   int res, error = 0;
1802   if (fd >= 0) {
1803     res = ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
1804     if (res != 0) {
1805       perror("ioctl(PERF_EVENT_IOC_DISABLE) failed");
1806       error = res;
1807     }
1808     res = close(fd);
1809     if (res != 0) {
1810       perror("close perf event FD failed");
1811       error = (res && !error) ? res : error;
1812     }
1813   }
1814   return error;
1815 }
1816 
1817 /* Create a new ringbuf manager to manage ringbuf associated with
1818  * map_fd, associating it with callback sample_cb. */
bpf_new_ringbuf(int map_fd,ring_buffer_sample_fn sample_cb,void * ctx)1819 void * bpf_new_ringbuf(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx) {
1820     return ring_buffer__new(map_fd, sample_cb, ctx, NULL);
1821 }
1822 
1823 /* Free the ringbuf manager rb and all ring buffers associated with it. */
bpf_free_ringbuf(struct ring_buffer * rb)1824 void bpf_free_ringbuf(struct ring_buffer *rb) {
1825     ring_buffer__free(rb);
1826 }
1827 
1828 /* Add a new ring buffer associated with map_fd to the ring buffer manager rb,
1829  * associating it with callback sample_cb. */
bpf_add_ringbuf(struct ring_buffer * rb,int map_fd,ring_buffer_sample_fn sample_cb,void * ctx)1830 int bpf_add_ringbuf(struct ring_buffer *rb, int map_fd,
1831                     ring_buffer_sample_fn sample_cb, void *ctx) {
1832     return ring_buffer__add(rb, map_fd, sample_cb, ctx);
1833 }
1834 
1835 /* Poll for available data and consume, if data is available.  Returns number
1836  * of records consumed, or a negative number if any callbacks returned an
1837  * error. */
bpf_poll_ringbuf(struct ring_buffer * rb,int timeout_ms)1838 int bpf_poll_ringbuf(struct ring_buffer *rb, int timeout_ms) {
1839     return ring_buffer__poll(rb, timeout_ms);
1840 }
1841 
1842 /* Consume available data _without_ polling. Good for use cases where low
1843  * latency is desired over performance impact.  Returns number of records
1844  * consumed, or a negative number if any callbacks returned an error. */
bpf_consume_ringbuf(struct ring_buffer * rb)1845 int bpf_consume_ringbuf(struct ring_buffer *rb) {
1846     return ring_buffer__consume(rb);
1847 }
1848 
bcc_iter_attach(int prog_fd,union bpf_iter_link_info * link_info,uint32_t link_info_len)1849 int bcc_iter_attach(int prog_fd, union bpf_iter_link_info *link_info,
1850                     uint32_t link_info_len)
1851 {
1852     DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
1853 
1854     link_create_opts.iter_info = link_info;
1855     link_create_opts.iter_info_len = link_info_len;
1856     return bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, &link_create_opts);
1857 }
1858 
bcc_iter_create(int link_fd)1859 int bcc_iter_create(int link_fd)
1860 {
1861     return bpf_iter_create(link_fd);
1862 }
1863 
bcc_make_parent_dir(const char * path)1864 int bcc_make_parent_dir(const char *path) {
1865   int   err = 0;
1866   char *dname, *dir;
1867 
1868   dname = strdup(path);
1869   if (dname == NULL)
1870     return -ENOMEM;
1871 
1872   dir = dirname(dname);
1873   if (mkdir(dir, 0700) && errno != EEXIST)
1874     err = -errno;
1875 
1876   free(dname);
1877   if (err)
1878     fprintf(stderr, "failed to mkdir %s: %s\n", path, strerror(-err));
1879 
1880   return err;
1881 }
1882 
bcc_check_bpffs_path(const char * path)1883 int bcc_check_bpffs_path(const char *path) {
1884   struct statfs st_fs;
1885   char  *dname, *dir;
1886   int    err = 0;
1887 
1888   if (path == NULL)
1889     return -EINVAL;
1890 
1891   dname = strdup(path);
1892   if (dname == NULL)
1893     return -ENOMEM;
1894 
1895   dir = dirname(dname);
1896   if (statfs(dir, &st_fs)) {
1897     err = -errno;
1898     fprintf(stderr, "failed to statfs %s: %s\n", path, strerror(-err));
1899   }
1900 
1901   free(dname);
1902   if (!err && st_fs.f_type != BPF_FS_MAGIC) {
1903     err = -EINVAL;
1904     fprintf(stderr, "specified path %s is not on BPF FS\n", path);
1905   }
1906 
1907   return err;
1908 }
1909