1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4 * Common eBPF ELF object loading operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <[email protected]>
7 * Copyright (C) 2015 Wang Nan <[email protected]>
8 * Copyright (C) 2015 Huawei Inc.
9 * Copyright (C) 2017 Nicira, Inc.
10 * Copyright (C) 2019 Isovalent, Inc.
11 */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/bpf_perf_event.h>
37 #include <linux/ring_buffer.h>
38 #include <sys/epoll.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/vfs.h>
44 #include <sys/utsname.h>
45 #include <sys/resource.h>
46 #include <libelf.h>
47 #include <gelf.h>
48 #include <zlib.h>
49
50 #include "libbpf.h"
51 #include "bpf.h"
52 #include "btf.h"
53 #include "str_error.h"
54 #include "libbpf_internal.h"
55 #include "hashmap.h"
56 #include "bpf_gen_internal.h"
57 #include "zip.h"
58
59 #ifndef BPF_FS_MAGIC
60 #define BPF_FS_MAGIC 0xcafe4a11
61 #endif
62
63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
64
65 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
66
67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
68 * compilation if user enables corresponding warning. Disable it explicitly.
69 */
70 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
71
72 #define __printf(a, b) __attribute__((format(printf, a, b)))
73
74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
76 static int map_set_def_max_entries(struct bpf_map *map);
77
78 static const char * const attach_type_name[] = {
79 [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress",
80 [BPF_CGROUP_INET_EGRESS] = "cgroup_inet_egress",
81 [BPF_CGROUP_INET_SOCK_CREATE] = "cgroup_inet_sock_create",
82 [BPF_CGROUP_INET_SOCK_RELEASE] = "cgroup_inet_sock_release",
83 [BPF_CGROUP_SOCK_OPS] = "cgroup_sock_ops",
84 [BPF_CGROUP_DEVICE] = "cgroup_device",
85 [BPF_CGROUP_INET4_BIND] = "cgroup_inet4_bind",
86 [BPF_CGROUP_INET6_BIND] = "cgroup_inet6_bind",
87 [BPF_CGROUP_INET4_CONNECT] = "cgroup_inet4_connect",
88 [BPF_CGROUP_INET6_CONNECT] = "cgroup_inet6_connect",
89 [BPF_CGROUP_UNIX_CONNECT] = "cgroup_unix_connect",
90 [BPF_CGROUP_INET4_POST_BIND] = "cgroup_inet4_post_bind",
91 [BPF_CGROUP_INET6_POST_BIND] = "cgroup_inet6_post_bind",
92 [BPF_CGROUP_INET4_GETPEERNAME] = "cgroup_inet4_getpeername",
93 [BPF_CGROUP_INET6_GETPEERNAME] = "cgroup_inet6_getpeername",
94 [BPF_CGROUP_UNIX_GETPEERNAME] = "cgroup_unix_getpeername",
95 [BPF_CGROUP_INET4_GETSOCKNAME] = "cgroup_inet4_getsockname",
96 [BPF_CGROUP_INET6_GETSOCKNAME] = "cgroup_inet6_getsockname",
97 [BPF_CGROUP_UNIX_GETSOCKNAME] = "cgroup_unix_getsockname",
98 [BPF_CGROUP_UDP4_SENDMSG] = "cgroup_udp4_sendmsg",
99 [BPF_CGROUP_UDP6_SENDMSG] = "cgroup_udp6_sendmsg",
100 [BPF_CGROUP_UNIX_SENDMSG] = "cgroup_unix_sendmsg",
101 [BPF_CGROUP_SYSCTL] = "cgroup_sysctl",
102 [BPF_CGROUP_UDP4_RECVMSG] = "cgroup_udp4_recvmsg",
103 [BPF_CGROUP_UDP6_RECVMSG] = "cgroup_udp6_recvmsg",
104 [BPF_CGROUP_UNIX_RECVMSG] = "cgroup_unix_recvmsg",
105 [BPF_CGROUP_GETSOCKOPT] = "cgroup_getsockopt",
106 [BPF_CGROUP_SETSOCKOPT] = "cgroup_setsockopt",
107 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
108 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
109 [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
110 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
111 [BPF_LIRC_MODE2] = "lirc_mode2",
112 [BPF_FLOW_DISSECTOR] = "flow_dissector",
113 [BPF_TRACE_RAW_TP] = "trace_raw_tp",
114 [BPF_TRACE_FENTRY] = "trace_fentry",
115 [BPF_TRACE_FEXIT] = "trace_fexit",
116 [BPF_MODIFY_RETURN] = "modify_return",
117 [BPF_LSM_MAC] = "lsm_mac",
118 [BPF_LSM_CGROUP] = "lsm_cgroup",
119 [BPF_SK_LOOKUP] = "sk_lookup",
120 [BPF_TRACE_ITER] = "trace_iter",
121 [BPF_XDP_DEVMAP] = "xdp_devmap",
122 [BPF_XDP_CPUMAP] = "xdp_cpumap",
123 [BPF_XDP] = "xdp",
124 [BPF_SK_REUSEPORT_SELECT] = "sk_reuseport_select",
125 [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_reuseport_select_or_migrate",
126 [BPF_PERF_EVENT] = "perf_event",
127 [BPF_TRACE_KPROBE_MULTI] = "trace_kprobe_multi",
128 [BPF_STRUCT_OPS] = "struct_ops",
129 [BPF_NETFILTER] = "netfilter",
130 [BPF_TCX_INGRESS] = "tcx_ingress",
131 [BPF_TCX_EGRESS] = "tcx_egress",
132 [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi",
133 [BPF_NETKIT_PRIMARY] = "netkit_primary",
134 [BPF_NETKIT_PEER] = "netkit_peer",
135 };
136
137 static const char * const link_type_name[] = {
138 [BPF_LINK_TYPE_UNSPEC] = "unspec",
139 [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
140 [BPF_LINK_TYPE_TRACING] = "tracing",
141 [BPF_LINK_TYPE_CGROUP] = "cgroup",
142 [BPF_LINK_TYPE_ITER] = "iter",
143 [BPF_LINK_TYPE_NETNS] = "netns",
144 [BPF_LINK_TYPE_XDP] = "xdp",
145 [BPF_LINK_TYPE_PERF_EVENT] = "perf_event",
146 [BPF_LINK_TYPE_KPROBE_MULTI] = "kprobe_multi",
147 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops",
148 [BPF_LINK_TYPE_NETFILTER] = "netfilter",
149 [BPF_LINK_TYPE_TCX] = "tcx",
150 [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi",
151 [BPF_LINK_TYPE_NETKIT] = "netkit",
152 };
153
154 static const char * const map_type_name[] = {
155 [BPF_MAP_TYPE_UNSPEC] = "unspec",
156 [BPF_MAP_TYPE_HASH] = "hash",
157 [BPF_MAP_TYPE_ARRAY] = "array",
158 [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
159 [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
160 [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
161 [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
162 [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
163 [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
164 [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
165 [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
166 [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
167 [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
168 [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
169 [BPF_MAP_TYPE_DEVMAP] = "devmap",
170 [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
171 [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
172 [BPF_MAP_TYPE_CPUMAP] = "cpumap",
173 [BPF_MAP_TYPE_XSKMAP] = "xskmap",
174 [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
175 [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
176 [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY] = "reuseport_sockarray",
177 [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage",
178 [BPF_MAP_TYPE_QUEUE] = "queue",
179 [BPF_MAP_TYPE_STACK] = "stack",
180 [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
181 [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
182 [BPF_MAP_TYPE_RINGBUF] = "ringbuf",
183 [BPF_MAP_TYPE_INODE_STORAGE] = "inode_storage",
184 [BPF_MAP_TYPE_TASK_STORAGE] = "task_storage",
185 [BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
186 [BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
187 [BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
188 [BPF_MAP_TYPE_ARENA] = "arena",
189 };
190
191 static const char * const prog_type_name[] = {
192 [BPF_PROG_TYPE_UNSPEC] = "unspec",
193 [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
194 [BPF_PROG_TYPE_KPROBE] = "kprobe",
195 [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
196 [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
197 [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
198 [BPF_PROG_TYPE_XDP] = "xdp",
199 [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
200 [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
201 [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
202 [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
203 [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
204 [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
205 [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
206 [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
207 [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
208 [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
209 [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
210 [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
211 [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
212 [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
213 [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
214 [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
215 [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
216 [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
217 [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
218 [BPF_PROG_TYPE_TRACING] = "tracing",
219 [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
220 [BPF_PROG_TYPE_EXT] = "ext",
221 [BPF_PROG_TYPE_LSM] = "lsm",
222 [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup",
223 [BPF_PROG_TYPE_SYSCALL] = "syscall",
224 [BPF_PROG_TYPE_NETFILTER] = "netfilter",
225 };
226
__base_pr(enum libbpf_print_level level,const char * format,va_list args)227 static int __base_pr(enum libbpf_print_level level, const char *format,
228 va_list args)
229 {
230 if (level == LIBBPF_DEBUG)
231 return 0;
232
233 return vfprintf(stderr, format, args);
234 }
235
236 static libbpf_print_fn_t __libbpf_pr = __base_pr;
237
libbpf_set_print(libbpf_print_fn_t fn)238 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
239 {
240 libbpf_print_fn_t old_print_fn;
241
242 old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
243
244 return old_print_fn;
245 }
246
247 __printf(2, 3)
libbpf_print(enum libbpf_print_level level,const char * format,...)248 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
249 {
250 va_list args;
251 int old_errno;
252 libbpf_print_fn_t print_fn;
253
254 print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
255 if (!print_fn)
256 return;
257
258 old_errno = errno;
259
260 va_start(args, format);
261 __libbpf_pr(level, format, args);
262 va_end(args);
263
264 errno = old_errno;
265 }
266
pr_perm_msg(int err)267 static void pr_perm_msg(int err)
268 {
269 struct rlimit limit;
270 char buf[100];
271
272 if (err != -EPERM || geteuid() != 0)
273 return;
274
275 err = getrlimit(RLIMIT_MEMLOCK, &limit);
276 if (err)
277 return;
278
279 if (limit.rlim_cur == RLIM_INFINITY)
280 return;
281
282 if (limit.rlim_cur < 1024)
283 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
284 else if (limit.rlim_cur < 1024*1024)
285 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
286 else
287 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
288
289 pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
290 buf);
291 }
292
293 #define STRERR_BUFSIZE 128
294
295 /* Copied from tools/perf/util/util.h */
296 #ifndef zfree
297 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
298 #endif
299
300 #ifndef zclose
301 # define zclose(fd) ({ \
302 int ___err = 0; \
303 if ((fd) >= 0) \
304 ___err = close((fd)); \
305 fd = -1; \
306 ___err; })
307 #endif
308
ptr_to_u64(const void * ptr)309 static inline __u64 ptr_to_u64(const void *ptr)
310 {
311 return (__u64) (unsigned long) ptr;
312 }
313
libbpf_set_strict_mode(enum libbpf_strict_mode mode)314 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
315 {
316 /* as of v1.0 libbpf_set_strict_mode() is a no-op */
317 return 0;
318 }
319
libbpf_major_version(void)320 __u32 libbpf_major_version(void)
321 {
322 return LIBBPF_MAJOR_VERSION;
323 }
324
libbpf_minor_version(void)325 __u32 libbpf_minor_version(void)
326 {
327 return LIBBPF_MINOR_VERSION;
328 }
329
libbpf_version_string(void)330 const char *libbpf_version_string(void)
331 {
332 #define __S(X) #X
333 #define _S(X) __S(X)
334 return "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
335 #undef _S
336 #undef __S
337 }
338
339 enum reloc_type {
340 RELO_LD64,
341 RELO_CALL,
342 RELO_DATA,
343 RELO_EXTERN_LD64,
344 RELO_EXTERN_CALL,
345 RELO_SUBPROG_ADDR,
346 RELO_CORE,
347 };
348
349 struct reloc_desc {
350 enum reloc_type type;
351 int insn_idx;
352 union {
353 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
354 struct {
355 int map_idx;
356 int sym_off;
357 int ext_idx;
358 };
359 };
360 };
361
362 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
363 enum sec_def_flags {
364 SEC_NONE = 0,
365 /* expected_attach_type is optional, if kernel doesn't support that */
366 SEC_EXP_ATTACH_OPT = 1,
367 /* legacy, only used by libbpf_get_type_names() and
368 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
369 * This used to be associated with cgroup (and few other) BPF programs
370 * that were attachable through BPF_PROG_ATTACH command. Pretty
371 * meaningless nowadays, though.
372 */
373 SEC_ATTACHABLE = 2,
374 SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
375 /* attachment target is specified through BTF ID in either kernel or
376 * other BPF program's BTF object
377 */
378 SEC_ATTACH_BTF = 4,
379 /* BPF program type allows sleeping/blocking in kernel */
380 SEC_SLEEPABLE = 8,
381 /* BPF program support non-linear XDP buffer */
382 SEC_XDP_FRAGS = 16,
383 /* Setup proper attach type for usdt probes. */
384 SEC_USDT = 32,
385 };
386
387 struct bpf_sec_def {
388 char *sec;
389 enum bpf_prog_type prog_type;
390 enum bpf_attach_type expected_attach_type;
391 long cookie;
392 int handler_id;
393
394 libbpf_prog_setup_fn_t prog_setup_fn;
395 libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
396 libbpf_prog_attach_fn_t prog_attach_fn;
397 };
398
399 /*
400 * bpf_prog should be a better name but it has been used in
401 * linux/filter.h.
402 */
403 struct bpf_program {
404 char *name;
405 char *sec_name;
406 size_t sec_idx;
407 const struct bpf_sec_def *sec_def;
408 /* this program's instruction offset (in number of instructions)
409 * within its containing ELF section
410 */
411 size_t sec_insn_off;
412 /* number of original instructions in ELF section belonging to this
413 * program, not taking into account subprogram instructions possible
414 * appended later during relocation
415 */
416 size_t sec_insn_cnt;
417 /* Offset (in number of instructions) of the start of instruction
418 * belonging to this BPF program within its containing main BPF
419 * program. For the entry-point (main) BPF program, this is always
420 * zero. For a sub-program, this gets reset before each of main BPF
421 * programs are processed and relocated and is used to determined
422 * whether sub-program was already appended to the main program, and
423 * if yes, at which instruction offset.
424 */
425 size_t sub_insn_off;
426
427 /* instructions that belong to BPF program; insns[0] is located at
428 * sec_insn_off instruction within its ELF section in ELF file, so
429 * when mapping ELF file instruction index to the local instruction,
430 * one needs to subtract sec_insn_off; and vice versa.
431 */
432 struct bpf_insn *insns;
433 /* actual number of instruction in this BPF program's image; for
434 * entry-point BPF programs this includes the size of main program
435 * itself plus all the used sub-programs, appended at the end
436 */
437 size_t insns_cnt;
438
439 struct reloc_desc *reloc_desc;
440 int nr_reloc;
441
442 /* BPF verifier log settings */
443 char *log_buf;
444 size_t log_size;
445 __u32 log_level;
446
447 struct bpf_object *obj;
448
449 int fd;
450 bool autoload;
451 bool autoattach;
452 bool sym_global;
453 bool mark_btf_static;
454 enum bpf_prog_type type;
455 enum bpf_attach_type expected_attach_type;
456 int exception_cb_idx;
457
458 int prog_ifindex;
459 __u32 attach_btf_obj_fd;
460 __u32 attach_btf_id;
461 __u32 attach_prog_fd;
462
463 void *func_info;
464 __u32 func_info_rec_size;
465 __u32 func_info_cnt;
466
467 void *line_info;
468 __u32 line_info_rec_size;
469 __u32 line_info_cnt;
470 __u32 prog_flags;
471 };
472
473 struct bpf_struct_ops {
474 const char *tname;
475 const struct btf_type *type;
476 struct bpf_program **progs;
477 __u32 *kern_func_off;
478 /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
479 void *data;
480 /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
481 * btf_vmlinux's format.
482 * struct bpf_struct_ops_tcp_congestion_ops {
483 * [... some other kernel fields ...]
484 * struct tcp_congestion_ops data;
485 * }
486 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
487 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
488 * from "data".
489 */
490 void *kern_vdata;
491 __u32 type_id;
492 };
493
494 #define DATA_SEC ".data"
495 #define BSS_SEC ".bss"
496 #define RODATA_SEC ".rodata"
497 #define KCONFIG_SEC ".kconfig"
498 #define KSYMS_SEC ".ksyms"
499 #define STRUCT_OPS_SEC ".struct_ops"
500 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
501 #define ARENA_SEC ".addr_space.1"
502
503 enum libbpf_map_type {
504 LIBBPF_MAP_UNSPEC,
505 LIBBPF_MAP_DATA,
506 LIBBPF_MAP_BSS,
507 LIBBPF_MAP_RODATA,
508 LIBBPF_MAP_KCONFIG,
509 };
510
511 struct bpf_map_def {
512 unsigned int type;
513 unsigned int key_size;
514 unsigned int value_size;
515 unsigned int max_entries;
516 unsigned int map_flags;
517 };
518
519 struct bpf_map {
520 struct bpf_object *obj;
521 char *name;
522 /* real_name is defined for special internal maps (.rodata*,
523 * .data*, .bss, .kconfig) and preserves their original ELF section
524 * name. This is important to be able to find corresponding BTF
525 * DATASEC information.
526 */
527 char *real_name;
528 int fd;
529 int sec_idx;
530 size_t sec_offset;
531 int map_ifindex;
532 int inner_map_fd;
533 struct bpf_map_def def;
534 __u32 numa_node;
535 __u32 btf_var_idx;
536 int mod_btf_fd;
537 __u32 btf_key_type_id;
538 __u32 btf_value_type_id;
539 __u32 btf_vmlinux_value_type_id;
540 enum libbpf_map_type libbpf_type;
541 void *mmaped;
542 struct bpf_struct_ops *st_ops;
543 struct bpf_map *inner_map;
544 void **init_slots;
545 int init_slots_sz;
546 char *pin_path;
547 bool pinned;
548 bool reused;
549 bool autocreate;
550 __u64 map_extra;
551 };
552
553 enum extern_type {
554 EXT_UNKNOWN,
555 EXT_KCFG,
556 EXT_KSYM,
557 };
558
559 enum kcfg_type {
560 KCFG_UNKNOWN,
561 KCFG_CHAR,
562 KCFG_BOOL,
563 KCFG_INT,
564 KCFG_TRISTATE,
565 KCFG_CHAR_ARR,
566 };
567
568 struct extern_desc {
569 enum extern_type type;
570 int sym_idx;
571 int btf_id;
572 int sec_btf_id;
573 const char *name;
574 char *essent_name;
575 bool is_set;
576 bool is_weak;
577 union {
578 struct {
579 enum kcfg_type type;
580 int sz;
581 int align;
582 int data_off;
583 bool is_signed;
584 } kcfg;
585 struct {
586 unsigned long long addr;
587
588 /* target btf_id of the corresponding kernel var. */
589 int kernel_btf_obj_fd;
590 int kernel_btf_id;
591
592 /* local btf_id of the ksym extern's type. */
593 __u32 type_id;
594 /* BTF fd index to be patched in for insn->off, this is
595 * 0 for vmlinux BTF, index in obj->fd_array for module
596 * BTF
597 */
598 __s16 btf_fd_idx;
599 } ksym;
600 };
601 };
602
603 struct module_btf {
604 struct btf *btf;
605 char *name;
606 __u32 id;
607 int fd;
608 int fd_array_idx;
609 };
610
611 enum sec_type {
612 SEC_UNUSED = 0,
613 SEC_RELO,
614 SEC_BSS,
615 SEC_DATA,
616 SEC_RODATA,
617 SEC_ST_OPS,
618 };
619
620 struct elf_sec_desc {
621 enum sec_type sec_type;
622 Elf64_Shdr *shdr;
623 Elf_Data *data;
624 };
625
626 struct elf_state {
627 int fd;
628 const void *obj_buf;
629 size_t obj_buf_sz;
630 Elf *elf;
631 Elf64_Ehdr *ehdr;
632 Elf_Data *symbols;
633 Elf_Data *arena_data;
634 size_t shstrndx; /* section index for section name strings */
635 size_t strtabidx;
636 struct elf_sec_desc *secs;
637 size_t sec_cnt;
638 int btf_maps_shndx;
639 __u32 btf_maps_sec_btf_id;
640 int text_shndx;
641 int symbols_shndx;
642 bool has_st_ops;
643 int arena_data_shndx;
644 };
645
646 struct usdt_manager;
647
648 struct bpf_object {
649 char name[BPF_OBJ_NAME_LEN];
650 char license[64];
651 __u32 kern_version;
652
653 struct bpf_program *programs;
654 size_t nr_programs;
655 struct bpf_map *maps;
656 size_t nr_maps;
657 size_t maps_cap;
658
659 char *kconfig;
660 struct extern_desc *externs;
661 int nr_extern;
662 int kconfig_map_idx;
663
664 bool loaded;
665 bool has_subcalls;
666 bool has_rodata;
667
668 struct bpf_gen *gen_loader;
669
670 /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
671 struct elf_state efile;
672
673 struct btf *btf;
674 struct btf_ext *btf_ext;
675
676 /* Parse and load BTF vmlinux if any of the programs in the object need
677 * it at load time.
678 */
679 struct btf *btf_vmlinux;
680 /* Path to the custom BTF to be used for BPF CO-RE relocations as an
681 * override for vmlinux BTF.
682 */
683 char *btf_custom_path;
684 /* vmlinux BTF override for CO-RE relocations */
685 struct btf *btf_vmlinux_override;
686 /* Lazily initialized kernel module BTFs */
687 struct module_btf *btf_modules;
688 bool btf_modules_loaded;
689 size_t btf_module_cnt;
690 size_t btf_module_cap;
691
692 /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
693 char *log_buf;
694 size_t log_size;
695 __u32 log_level;
696
697 int *fd_array;
698 size_t fd_array_cap;
699 size_t fd_array_cnt;
700
701 struct usdt_manager *usdt_man;
702
703 struct bpf_map *arena_map;
704 void *arena_data;
705 size_t arena_data_sz;
706
707 struct kern_feature_cache *feat_cache;
708 char *token_path;
709 int token_fd;
710
711 char path[];
712 };
713
714 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
715 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
716 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
717 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
718 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
719 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
720 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
721 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
722 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
723
bpf_program__unload(struct bpf_program * prog)724 void bpf_program__unload(struct bpf_program *prog)
725 {
726 if (!prog)
727 return;
728
729 zclose(prog->fd);
730
731 zfree(&prog->func_info);
732 zfree(&prog->line_info);
733 }
734
bpf_program__exit(struct bpf_program * prog)735 static void bpf_program__exit(struct bpf_program *prog)
736 {
737 if (!prog)
738 return;
739
740 bpf_program__unload(prog);
741 zfree(&prog->name);
742 zfree(&prog->sec_name);
743 zfree(&prog->insns);
744 zfree(&prog->reloc_desc);
745
746 prog->nr_reloc = 0;
747 prog->insns_cnt = 0;
748 prog->sec_idx = -1;
749 }
750
insn_is_subprog_call(const struct bpf_insn * insn)751 static bool insn_is_subprog_call(const struct bpf_insn *insn)
752 {
753 return BPF_CLASS(insn->code) == BPF_JMP &&
754 BPF_OP(insn->code) == BPF_CALL &&
755 BPF_SRC(insn->code) == BPF_K &&
756 insn->src_reg == BPF_PSEUDO_CALL &&
757 insn->dst_reg == 0 &&
758 insn->off == 0;
759 }
760
is_call_insn(const struct bpf_insn * insn)761 static bool is_call_insn(const struct bpf_insn *insn)
762 {
763 return insn->code == (BPF_JMP | BPF_CALL);
764 }
765
insn_is_pseudo_func(struct bpf_insn * insn)766 static bool insn_is_pseudo_func(struct bpf_insn *insn)
767 {
768 return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
769 }
770
771 static int
bpf_object__init_prog(struct bpf_object * obj,struct bpf_program * prog,const char * name,size_t sec_idx,const char * sec_name,size_t sec_off,void * insn_data,size_t insn_data_sz)772 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
773 const char *name, size_t sec_idx, const char *sec_name,
774 size_t sec_off, void *insn_data, size_t insn_data_sz)
775 {
776 if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
777 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
778 sec_name, name, sec_off, insn_data_sz);
779 return -EINVAL;
780 }
781
782 memset(prog, 0, sizeof(*prog));
783 prog->obj = obj;
784
785 prog->sec_idx = sec_idx;
786 prog->sec_insn_off = sec_off / BPF_INSN_SZ;
787 prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
788 /* insns_cnt can later be increased by appending used subprograms */
789 prog->insns_cnt = prog->sec_insn_cnt;
790
791 prog->type = BPF_PROG_TYPE_UNSPEC;
792 prog->fd = -1;
793 prog->exception_cb_idx = -1;
794
795 /* libbpf's convention for SEC("?abc...") is that it's just like
796 * SEC("abc...") but the corresponding bpf_program starts out with
797 * autoload set to false.
798 */
799 if (sec_name[0] == '?') {
800 prog->autoload = false;
801 /* from now on forget there was ? in section name */
802 sec_name++;
803 } else {
804 prog->autoload = true;
805 }
806
807 prog->autoattach = true;
808
809 /* inherit object's log_level */
810 prog->log_level = obj->log_level;
811
812 prog->sec_name = strdup(sec_name);
813 if (!prog->sec_name)
814 goto errout;
815
816 prog->name = strdup(name);
817 if (!prog->name)
818 goto errout;
819
820 prog->insns = malloc(insn_data_sz);
821 if (!prog->insns)
822 goto errout;
823 memcpy(prog->insns, insn_data, insn_data_sz);
824
825 return 0;
826 errout:
827 pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
828 bpf_program__exit(prog);
829 return -ENOMEM;
830 }
831
832 static int
bpf_object__add_programs(struct bpf_object * obj,Elf_Data * sec_data,const char * sec_name,int sec_idx)833 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
834 const char *sec_name, int sec_idx)
835 {
836 Elf_Data *symbols = obj->efile.symbols;
837 struct bpf_program *prog, *progs;
838 void *data = sec_data->d_buf;
839 size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
840 int nr_progs, err, i;
841 const char *name;
842 Elf64_Sym *sym;
843
844 progs = obj->programs;
845 nr_progs = obj->nr_programs;
846 nr_syms = symbols->d_size / sizeof(Elf64_Sym);
847
848 for (i = 0; i < nr_syms; i++) {
849 sym = elf_sym_by_idx(obj, i);
850
851 if (sym->st_shndx != sec_idx)
852 continue;
853 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
854 continue;
855
856 prog_sz = sym->st_size;
857 sec_off = sym->st_value;
858
859 name = elf_sym_str(obj, sym->st_name);
860 if (!name) {
861 pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
862 sec_name, sec_off);
863 return -LIBBPF_ERRNO__FORMAT;
864 }
865
866 if (sec_off + prog_sz > sec_sz) {
867 pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
868 sec_name, sec_off);
869 return -LIBBPF_ERRNO__FORMAT;
870 }
871
872 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
873 pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
874 return -ENOTSUP;
875 }
876
877 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
878 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
879
880 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
881 if (!progs) {
882 /*
883 * In this case the original obj->programs
884 * is still valid, so don't need special treat for
885 * bpf_close_object().
886 */
887 pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
888 sec_name, name);
889 return -ENOMEM;
890 }
891 obj->programs = progs;
892
893 prog = &progs[nr_progs];
894
895 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
896 sec_off, data + sec_off, prog_sz);
897 if (err)
898 return err;
899
900 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
901 prog->sym_global = true;
902
903 /* if function is a global/weak symbol, but has restricted
904 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
905 * as static to enable more permissive BPF verification mode
906 * with more outside context available to BPF verifier
907 */
908 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
909 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
910 prog->mark_btf_static = true;
911
912 nr_progs++;
913 obj->nr_programs = nr_progs;
914 }
915
916 return 0;
917 }
918
919 static const struct btf_member *
find_member_by_offset(const struct btf_type * t,__u32 bit_offset)920 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
921 {
922 struct btf_member *m;
923 int i;
924
925 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
926 if (btf_member_bit_offset(t, i) == bit_offset)
927 return m;
928 }
929
930 return NULL;
931 }
932
933 static const struct btf_member *
find_member_by_name(const struct btf * btf,const struct btf_type * t,const char * name)934 find_member_by_name(const struct btf *btf, const struct btf_type *t,
935 const char *name)
936 {
937 struct btf_member *m;
938 int i;
939
940 for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
941 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
942 return m;
943 }
944
945 return NULL;
946 }
947
948 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
949 __u16 kind, struct btf **res_btf,
950 struct module_btf **res_mod_btf);
951
952 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
953 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
954 const char *name, __u32 kind);
955
956 static int
find_struct_ops_kern_types(struct bpf_object * obj,const char * tname_raw,struct module_btf ** mod_btf,const struct btf_type ** type,__u32 * type_id,const struct btf_type ** vtype,__u32 * vtype_id,const struct btf_member ** data_member)957 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
958 struct module_btf **mod_btf,
959 const struct btf_type **type, __u32 *type_id,
960 const struct btf_type **vtype, __u32 *vtype_id,
961 const struct btf_member **data_member)
962 {
963 const struct btf_type *kern_type, *kern_vtype;
964 const struct btf_member *kern_data_member;
965 struct btf *btf;
966 __s32 kern_vtype_id, kern_type_id;
967 char tname[256];
968 __u32 i;
969
970 snprintf(tname, sizeof(tname), "%.*s",
971 (int)bpf_core_essential_name_len(tname_raw), tname_raw);
972
973 kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
974 &btf, mod_btf);
975 if (kern_type_id < 0) {
976 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
977 tname);
978 return kern_type_id;
979 }
980 kern_type = btf__type_by_id(btf, kern_type_id);
981
982 /* Find the corresponding "map_value" type that will be used
983 * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
984 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
985 * btf_vmlinux.
986 */
987 kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
988 tname, BTF_KIND_STRUCT);
989 if (kern_vtype_id < 0) {
990 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
991 STRUCT_OPS_VALUE_PREFIX, tname);
992 return kern_vtype_id;
993 }
994 kern_vtype = btf__type_by_id(btf, kern_vtype_id);
995
996 /* Find "struct tcp_congestion_ops" from
997 * struct bpf_struct_ops_tcp_congestion_ops {
998 * [ ... ]
999 * struct tcp_congestion_ops data;
1000 * }
1001 */
1002 kern_data_member = btf_members(kern_vtype);
1003 for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
1004 if (kern_data_member->type == kern_type_id)
1005 break;
1006 }
1007 if (i == btf_vlen(kern_vtype)) {
1008 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
1009 tname, STRUCT_OPS_VALUE_PREFIX, tname);
1010 return -EINVAL;
1011 }
1012
1013 *type = kern_type;
1014 *type_id = kern_type_id;
1015 *vtype = kern_vtype;
1016 *vtype_id = kern_vtype_id;
1017 *data_member = kern_data_member;
1018
1019 return 0;
1020 }
1021
bpf_map__is_struct_ops(const struct bpf_map * map)1022 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1023 {
1024 return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1025 }
1026
is_valid_st_ops_program(struct bpf_object * obj,const struct bpf_program * prog)1027 static bool is_valid_st_ops_program(struct bpf_object *obj,
1028 const struct bpf_program *prog)
1029 {
1030 int i;
1031
1032 for (i = 0; i < obj->nr_programs; i++) {
1033 if (&obj->programs[i] == prog)
1034 return prog->type == BPF_PROG_TYPE_STRUCT_OPS;
1035 }
1036
1037 return false;
1038 }
1039
1040 /* For each struct_ops program P, referenced from some struct_ops map M,
1041 * enable P.autoload if there are Ms for which M.autocreate is true,
1042 * disable P.autoload if for all Ms M.autocreate is false.
1043 * Don't change P.autoload for programs that are not referenced from any maps.
1044 */
bpf_object_adjust_struct_ops_autoload(struct bpf_object * obj)1045 static int bpf_object_adjust_struct_ops_autoload(struct bpf_object *obj)
1046 {
1047 struct bpf_program *prog, *slot_prog;
1048 struct bpf_map *map;
1049 int i, j, k, vlen;
1050
1051 for (i = 0; i < obj->nr_programs; ++i) {
1052 int should_load = false;
1053 int use_cnt = 0;
1054
1055 prog = &obj->programs[i];
1056 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS)
1057 continue;
1058
1059 for (j = 0; j < obj->nr_maps; ++j) {
1060 map = &obj->maps[j];
1061 if (!bpf_map__is_struct_ops(map))
1062 continue;
1063
1064 vlen = btf_vlen(map->st_ops->type);
1065 for (k = 0; k < vlen; ++k) {
1066 slot_prog = map->st_ops->progs[k];
1067 if (prog != slot_prog)
1068 continue;
1069
1070 use_cnt++;
1071 if (map->autocreate)
1072 should_load = true;
1073 }
1074 }
1075 if (use_cnt)
1076 prog->autoload = should_load;
1077 }
1078
1079 return 0;
1080 }
1081
1082 /* Init the map's fields that depend on kern_btf */
bpf_map__init_kern_struct_ops(struct bpf_map * map)1083 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1084 {
1085 const struct btf_member *member, *kern_member, *kern_data_member;
1086 const struct btf_type *type, *kern_type, *kern_vtype;
1087 __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1088 struct bpf_object *obj = map->obj;
1089 const struct btf *btf = obj->btf;
1090 struct bpf_struct_ops *st_ops;
1091 const struct btf *kern_btf;
1092 struct module_btf *mod_btf;
1093 void *data, *kern_data;
1094 const char *tname;
1095 int err;
1096
1097 st_ops = map->st_ops;
1098 type = st_ops->type;
1099 tname = st_ops->tname;
1100 err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1101 &kern_type, &kern_type_id,
1102 &kern_vtype, &kern_vtype_id,
1103 &kern_data_member);
1104 if (err)
1105 return err;
1106
1107 kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1108
1109 pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1110 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1111
1112 map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1113 map->def.value_size = kern_vtype->size;
1114 map->btf_vmlinux_value_type_id = kern_vtype_id;
1115
1116 st_ops->kern_vdata = calloc(1, kern_vtype->size);
1117 if (!st_ops->kern_vdata)
1118 return -ENOMEM;
1119
1120 data = st_ops->data;
1121 kern_data_off = kern_data_member->offset / 8;
1122 kern_data = st_ops->kern_vdata + kern_data_off;
1123
1124 member = btf_members(type);
1125 for (i = 0; i < btf_vlen(type); i++, member++) {
1126 const struct btf_type *mtype, *kern_mtype;
1127 __u32 mtype_id, kern_mtype_id;
1128 void *mdata, *kern_mdata;
1129 struct bpf_program *prog;
1130 __s64 msize, kern_msize;
1131 __u32 moff, kern_moff;
1132 __u32 kern_member_idx;
1133 const char *mname;
1134
1135 mname = btf__name_by_offset(btf, member->name_off);
1136 moff = member->offset / 8;
1137 mdata = data + moff;
1138 msize = btf__resolve_size(btf, member->type);
1139 if (msize < 0) {
1140 pr_warn("struct_ops init_kern %s: failed to resolve the size of member %s\n",
1141 map->name, mname);
1142 return msize;
1143 }
1144
1145 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1146 if (!kern_member) {
1147 if (!libbpf_is_mem_zeroed(mdata, msize)) {
1148 pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1149 map->name, mname);
1150 return -ENOTSUP;
1151 }
1152
1153 if (st_ops->progs[i]) {
1154 /* If we had declaratively set struct_ops callback, we need to
1155 * force its autoload to false, because it doesn't have
1156 * a chance of succeeding from POV of the current struct_ops map.
1157 * If this program is still referenced somewhere else, though,
1158 * then bpf_object_adjust_struct_ops_autoload() will update its
1159 * autoload accordingly.
1160 */
1161 st_ops->progs[i]->autoload = false;
1162 st_ops->progs[i] = NULL;
1163 }
1164
1165 /* Skip all-zero/NULL fields if they are not present in the kernel BTF */
1166 pr_info("struct_ops %s: member %s not found in kernel, skipping it as it's set to zero\n",
1167 map->name, mname);
1168 continue;
1169 }
1170
1171 kern_member_idx = kern_member - btf_members(kern_type);
1172 if (btf_member_bitfield_size(type, i) ||
1173 btf_member_bitfield_size(kern_type, kern_member_idx)) {
1174 pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1175 map->name, mname);
1176 return -ENOTSUP;
1177 }
1178
1179 kern_moff = kern_member->offset / 8;
1180 kern_mdata = kern_data + kern_moff;
1181
1182 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1183 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1184 &kern_mtype_id);
1185 if (BTF_INFO_KIND(mtype->info) !=
1186 BTF_INFO_KIND(kern_mtype->info)) {
1187 pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1188 map->name, mname, BTF_INFO_KIND(mtype->info),
1189 BTF_INFO_KIND(kern_mtype->info));
1190 return -ENOTSUP;
1191 }
1192
1193 if (btf_is_ptr(mtype)) {
1194 prog = *(void **)mdata;
1195 /* just like for !kern_member case above, reset declaratively
1196 * set (at compile time) program's autload to false,
1197 * if user replaced it with another program or NULL
1198 */
1199 if (st_ops->progs[i] && st_ops->progs[i] != prog)
1200 st_ops->progs[i]->autoload = false;
1201
1202 /* Update the value from the shadow type */
1203 st_ops->progs[i] = prog;
1204 if (!prog)
1205 continue;
1206
1207 if (!is_valid_st_ops_program(obj, prog)) {
1208 pr_warn("struct_ops init_kern %s: member %s is not a struct_ops program\n",
1209 map->name, mname);
1210 return -ENOTSUP;
1211 }
1212
1213 kern_mtype = skip_mods_and_typedefs(kern_btf,
1214 kern_mtype->type,
1215 &kern_mtype_id);
1216
1217 /* mtype->type must be a func_proto which was
1218 * guaranteed in bpf_object__collect_st_ops_relos(),
1219 * so only check kern_mtype for func_proto here.
1220 */
1221 if (!btf_is_func_proto(kern_mtype)) {
1222 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1223 map->name, mname);
1224 return -ENOTSUP;
1225 }
1226
1227 if (mod_btf)
1228 prog->attach_btf_obj_fd = mod_btf->fd;
1229
1230 /* if we haven't yet processed this BPF program, record proper
1231 * attach_btf_id and member_idx
1232 */
1233 if (!prog->attach_btf_id) {
1234 prog->attach_btf_id = kern_type_id;
1235 prog->expected_attach_type = kern_member_idx;
1236 }
1237
1238 /* struct_ops BPF prog can be re-used between multiple
1239 * .struct_ops & .struct_ops.link as long as it's the
1240 * same struct_ops struct definition and the same
1241 * function pointer field
1242 */
1243 if (prog->attach_btf_id != kern_type_id) {
1244 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: attach_btf_id %u != kern_type_id %u\n",
1245 map->name, mname, prog->name, prog->sec_name, prog->type,
1246 prog->attach_btf_id, kern_type_id);
1247 return -EINVAL;
1248 }
1249 if (prog->expected_attach_type != kern_member_idx) {
1250 pr_warn("struct_ops init_kern %s func ptr %s: invalid reuse of prog %s in sec %s with type %u: expected_attach_type %u != kern_member_idx %u\n",
1251 map->name, mname, prog->name, prog->sec_name, prog->type,
1252 prog->expected_attach_type, kern_member_idx);
1253 return -EINVAL;
1254 }
1255
1256 st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1257
1258 pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1259 map->name, mname, prog->name, moff,
1260 kern_moff);
1261
1262 continue;
1263 }
1264
1265 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1266 if (kern_msize < 0 || msize != kern_msize) {
1267 pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1268 map->name, mname, (ssize_t)msize,
1269 (ssize_t)kern_msize);
1270 return -ENOTSUP;
1271 }
1272
1273 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1274 map->name, mname, (unsigned int)msize,
1275 moff, kern_moff);
1276 memcpy(kern_mdata, mdata, msize);
1277 }
1278
1279 return 0;
1280 }
1281
bpf_object__init_kern_struct_ops_maps(struct bpf_object * obj)1282 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1283 {
1284 struct bpf_map *map;
1285 size_t i;
1286 int err;
1287
1288 for (i = 0; i < obj->nr_maps; i++) {
1289 map = &obj->maps[i];
1290
1291 if (!bpf_map__is_struct_ops(map))
1292 continue;
1293
1294 if (!map->autocreate)
1295 continue;
1296
1297 err = bpf_map__init_kern_struct_ops(map);
1298 if (err)
1299 return err;
1300 }
1301
1302 return 0;
1303 }
1304
init_struct_ops_maps(struct bpf_object * obj,const char * sec_name,int shndx,Elf_Data * data)1305 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1306 int shndx, Elf_Data *data)
1307 {
1308 const struct btf_type *type, *datasec;
1309 const struct btf_var_secinfo *vsi;
1310 struct bpf_struct_ops *st_ops;
1311 const char *tname, *var_name;
1312 __s32 type_id, datasec_id;
1313 const struct btf *btf;
1314 struct bpf_map *map;
1315 __u32 i;
1316
1317 if (shndx == -1)
1318 return 0;
1319
1320 btf = obj->btf;
1321 datasec_id = btf__find_by_name_kind(btf, sec_name,
1322 BTF_KIND_DATASEC);
1323 if (datasec_id < 0) {
1324 pr_warn("struct_ops init: DATASEC %s not found\n",
1325 sec_name);
1326 return -EINVAL;
1327 }
1328
1329 datasec = btf__type_by_id(btf, datasec_id);
1330 vsi = btf_var_secinfos(datasec);
1331 for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1332 type = btf__type_by_id(obj->btf, vsi->type);
1333 var_name = btf__name_by_offset(obj->btf, type->name_off);
1334
1335 type_id = btf__resolve_type(obj->btf, vsi->type);
1336 if (type_id < 0) {
1337 pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1338 vsi->type, sec_name);
1339 return -EINVAL;
1340 }
1341
1342 type = btf__type_by_id(obj->btf, type_id);
1343 tname = btf__name_by_offset(obj->btf, type->name_off);
1344 if (!tname[0]) {
1345 pr_warn("struct_ops init: anonymous type is not supported\n");
1346 return -ENOTSUP;
1347 }
1348 if (!btf_is_struct(type)) {
1349 pr_warn("struct_ops init: %s is not a struct\n", tname);
1350 return -EINVAL;
1351 }
1352
1353 map = bpf_object__add_map(obj);
1354 if (IS_ERR(map))
1355 return PTR_ERR(map);
1356
1357 map->sec_idx = shndx;
1358 map->sec_offset = vsi->offset;
1359 map->name = strdup(var_name);
1360 if (!map->name)
1361 return -ENOMEM;
1362 map->btf_value_type_id = type_id;
1363
1364 /* Follow same convention as for programs autoload:
1365 * SEC("?.struct_ops") means map is not created by default.
1366 */
1367 if (sec_name[0] == '?') {
1368 map->autocreate = false;
1369 /* from now on forget there was ? in section name */
1370 sec_name++;
1371 }
1372
1373 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1374 map->def.key_size = sizeof(int);
1375 map->def.value_size = type->size;
1376 map->def.max_entries = 1;
1377 map->def.map_flags = strcmp(sec_name, STRUCT_OPS_LINK_SEC) == 0 ? BPF_F_LINK : 0;
1378
1379 map->st_ops = calloc(1, sizeof(*map->st_ops));
1380 if (!map->st_ops)
1381 return -ENOMEM;
1382 st_ops = map->st_ops;
1383 st_ops->data = malloc(type->size);
1384 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1385 st_ops->kern_func_off = malloc(btf_vlen(type) *
1386 sizeof(*st_ops->kern_func_off));
1387 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1388 return -ENOMEM;
1389
1390 if (vsi->offset + type->size > data->d_size) {
1391 pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1392 var_name, sec_name);
1393 return -EINVAL;
1394 }
1395
1396 memcpy(st_ops->data,
1397 data->d_buf + vsi->offset,
1398 type->size);
1399 st_ops->tname = tname;
1400 st_ops->type = type;
1401 st_ops->type_id = type_id;
1402
1403 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1404 tname, type_id, var_name, vsi->offset);
1405 }
1406
1407 return 0;
1408 }
1409
bpf_object_init_struct_ops(struct bpf_object * obj)1410 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1411 {
1412 const char *sec_name;
1413 int sec_idx, err;
1414
1415 for (sec_idx = 0; sec_idx < obj->efile.sec_cnt; ++sec_idx) {
1416 struct elf_sec_desc *desc = &obj->efile.secs[sec_idx];
1417
1418 if (desc->sec_type != SEC_ST_OPS)
1419 continue;
1420
1421 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1422 if (!sec_name)
1423 return -LIBBPF_ERRNO__FORMAT;
1424
1425 err = init_struct_ops_maps(obj, sec_name, sec_idx, desc->data);
1426 if (err)
1427 return err;
1428 }
1429
1430 return 0;
1431 }
1432
bpf_object__new(const char * path,const void * obj_buf,size_t obj_buf_sz,const char * obj_name)1433 static struct bpf_object *bpf_object__new(const char *path,
1434 const void *obj_buf,
1435 size_t obj_buf_sz,
1436 const char *obj_name)
1437 {
1438 struct bpf_object *obj;
1439 char *end;
1440
1441 obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1442 if (!obj) {
1443 pr_warn("alloc memory failed for %s\n", path);
1444 return ERR_PTR(-ENOMEM);
1445 }
1446
1447 strcpy(obj->path, path);
1448 if (obj_name) {
1449 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1450 } else {
1451 /* Using basename() GNU version which doesn't modify arg. */
1452 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1453 end = strchr(obj->name, '.');
1454 if (end)
1455 *end = 0;
1456 }
1457
1458 obj->efile.fd = -1;
1459 /*
1460 * Caller of this function should also call
1461 * bpf_object__elf_finish() after data collection to return
1462 * obj_buf to user. If not, we should duplicate the buffer to
1463 * avoid user freeing them before elf finish.
1464 */
1465 obj->efile.obj_buf = obj_buf;
1466 obj->efile.obj_buf_sz = obj_buf_sz;
1467 obj->efile.btf_maps_shndx = -1;
1468 obj->kconfig_map_idx = -1;
1469
1470 obj->kern_version = get_kernel_version();
1471 obj->loaded = false;
1472
1473 return obj;
1474 }
1475
bpf_object__elf_finish(struct bpf_object * obj)1476 static void bpf_object__elf_finish(struct bpf_object *obj)
1477 {
1478 if (!obj->efile.elf)
1479 return;
1480
1481 elf_end(obj->efile.elf);
1482 obj->efile.elf = NULL;
1483 obj->efile.symbols = NULL;
1484 obj->efile.arena_data = NULL;
1485
1486 zfree(&obj->efile.secs);
1487 obj->efile.sec_cnt = 0;
1488 zclose(obj->efile.fd);
1489 obj->efile.obj_buf = NULL;
1490 obj->efile.obj_buf_sz = 0;
1491 }
1492
bpf_object__elf_init(struct bpf_object * obj)1493 static int bpf_object__elf_init(struct bpf_object *obj)
1494 {
1495 Elf64_Ehdr *ehdr;
1496 int err = 0;
1497 Elf *elf;
1498
1499 if (obj->efile.elf) {
1500 pr_warn("elf: init internal error\n");
1501 return -LIBBPF_ERRNO__LIBELF;
1502 }
1503
1504 if (obj->efile.obj_buf_sz > 0) {
1505 /* obj_buf should have been validated by bpf_object__open_mem(). */
1506 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1507 } else {
1508 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1509 if (obj->efile.fd < 0) {
1510 char errmsg[STRERR_BUFSIZE], *cp;
1511
1512 err = -errno;
1513 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1514 pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1515 return err;
1516 }
1517
1518 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1519 }
1520
1521 if (!elf) {
1522 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1523 err = -LIBBPF_ERRNO__LIBELF;
1524 goto errout;
1525 }
1526
1527 obj->efile.elf = elf;
1528
1529 if (elf_kind(elf) != ELF_K_ELF) {
1530 err = -LIBBPF_ERRNO__FORMAT;
1531 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1532 goto errout;
1533 }
1534
1535 if (gelf_getclass(elf) != ELFCLASS64) {
1536 err = -LIBBPF_ERRNO__FORMAT;
1537 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1538 goto errout;
1539 }
1540
1541 obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1542 if (!obj->efile.ehdr) {
1543 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1544 err = -LIBBPF_ERRNO__FORMAT;
1545 goto errout;
1546 }
1547
1548 if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1549 pr_warn("elf: failed to get section names section index for %s: %s\n",
1550 obj->path, elf_errmsg(-1));
1551 err = -LIBBPF_ERRNO__FORMAT;
1552 goto errout;
1553 }
1554
1555 /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1556 if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1557 pr_warn("elf: failed to get section names strings from %s: %s\n",
1558 obj->path, elf_errmsg(-1));
1559 err = -LIBBPF_ERRNO__FORMAT;
1560 goto errout;
1561 }
1562
1563 /* Old LLVM set e_machine to EM_NONE */
1564 if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1565 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1566 err = -LIBBPF_ERRNO__FORMAT;
1567 goto errout;
1568 }
1569
1570 return 0;
1571 errout:
1572 bpf_object__elf_finish(obj);
1573 return err;
1574 }
1575
bpf_object__check_endianness(struct bpf_object * obj)1576 static int bpf_object__check_endianness(struct bpf_object *obj)
1577 {
1578 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1579 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1580 return 0;
1581 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1582 if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1583 return 0;
1584 #else
1585 # error "Unrecognized __BYTE_ORDER__"
1586 #endif
1587 pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1588 return -LIBBPF_ERRNO__ENDIAN;
1589 }
1590
1591 static int
bpf_object__init_license(struct bpf_object * obj,void * data,size_t size)1592 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1593 {
1594 if (!data) {
1595 pr_warn("invalid license section in %s\n", obj->path);
1596 return -LIBBPF_ERRNO__FORMAT;
1597 }
1598 /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1599 * go over allowed ELF data section buffer
1600 */
1601 libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1602 pr_debug("license of %s is %s\n", obj->path, obj->license);
1603 return 0;
1604 }
1605
1606 static int
bpf_object__init_kversion(struct bpf_object * obj,void * data,size_t size)1607 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1608 {
1609 __u32 kver;
1610
1611 if (!data || size != sizeof(kver)) {
1612 pr_warn("invalid kver section in %s\n", obj->path);
1613 return -LIBBPF_ERRNO__FORMAT;
1614 }
1615 memcpy(&kver, data, sizeof(kver));
1616 obj->kern_version = kver;
1617 pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1618 return 0;
1619 }
1620
bpf_map_type__is_map_in_map(enum bpf_map_type type)1621 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1622 {
1623 if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1624 type == BPF_MAP_TYPE_HASH_OF_MAPS)
1625 return true;
1626 return false;
1627 }
1628
find_elf_sec_sz(const struct bpf_object * obj,const char * name,__u32 * size)1629 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1630 {
1631 Elf_Data *data;
1632 Elf_Scn *scn;
1633
1634 if (!name)
1635 return -EINVAL;
1636
1637 scn = elf_sec_by_name(obj, name);
1638 data = elf_sec_data(obj, scn);
1639 if (data) {
1640 *size = data->d_size;
1641 return 0; /* found it */
1642 }
1643
1644 return -ENOENT;
1645 }
1646
find_elf_var_sym(const struct bpf_object * obj,const char * name)1647 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1648 {
1649 Elf_Data *symbols = obj->efile.symbols;
1650 const char *sname;
1651 size_t si;
1652
1653 for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1654 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1655
1656 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1657 continue;
1658
1659 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1660 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1661 continue;
1662
1663 sname = elf_sym_str(obj, sym->st_name);
1664 if (!sname) {
1665 pr_warn("failed to get sym name string for var %s\n", name);
1666 return ERR_PTR(-EIO);
1667 }
1668 if (strcmp(name, sname) == 0)
1669 return sym;
1670 }
1671
1672 return ERR_PTR(-ENOENT);
1673 }
1674
1675 /* Some versions of Android don't provide memfd_create() in their libc
1676 * implementation, so avoid complications and just go straight to Linux
1677 * syscall.
1678 */
sys_memfd_create(const char * name,unsigned flags)1679 static int sys_memfd_create(const char *name, unsigned flags)
1680 {
1681 return syscall(__NR_memfd_create, name, flags);
1682 }
1683
1684 #ifndef MFD_CLOEXEC
1685 #define MFD_CLOEXEC 0x0001U
1686 #endif
1687
create_placeholder_fd(void)1688 static int create_placeholder_fd(void)
1689 {
1690 int fd;
1691
1692 fd = ensure_good_fd(sys_memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1693 if (fd < 0)
1694 return -errno;
1695 return fd;
1696 }
1697
bpf_object__add_map(struct bpf_object * obj)1698 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1699 {
1700 struct bpf_map *map;
1701 int err;
1702
1703 err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1704 sizeof(*obj->maps), obj->nr_maps + 1);
1705 if (err)
1706 return ERR_PTR(err);
1707
1708 map = &obj->maps[obj->nr_maps++];
1709 map->obj = obj;
1710 /* Preallocate map FD without actually creating BPF map just yet.
1711 * These map FD "placeholders" will be reused later without changing
1712 * FD value when map is actually created in the kernel.
1713 *
1714 * This is useful to be able to perform BPF program relocations
1715 * without having to create BPF maps before that step. This allows us
1716 * to finalize and load BTF very late in BPF object's loading phase,
1717 * right before BPF maps have to be created and BPF programs have to
1718 * be loaded. By having these map FD placeholders we can perform all
1719 * the sanitizations, relocations, and any other adjustments before we
1720 * start creating actual BPF kernel objects (BTF, maps, progs).
1721 */
1722 map->fd = create_placeholder_fd();
1723 if (map->fd < 0)
1724 return ERR_PTR(map->fd);
1725 map->inner_map_fd = -1;
1726 map->autocreate = true;
1727
1728 return map;
1729 }
1730
array_map_mmap_sz(unsigned int value_sz,unsigned int max_entries)1731 static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1732 {
1733 const long page_sz = sysconf(_SC_PAGE_SIZE);
1734 size_t map_sz;
1735
1736 map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1737 map_sz = roundup(map_sz, page_sz);
1738 return map_sz;
1739 }
1740
bpf_map_mmap_sz(const struct bpf_map * map)1741 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1742 {
1743 const long page_sz = sysconf(_SC_PAGE_SIZE);
1744
1745 switch (map->def.type) {
1746 case BPF_MAP_TYPE_ARRAY:
1747 return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
1748 case BPF_MAP_TYPE_ARENA:
1749 return page_sz * map->def.max_entries;
1750 default:
1751 return 0; /* not supported */
1752 }
1753 }
1754
bpf_map_mmap_resize(struct bpf_map * map,size_t old_sz,size_t new_sz)1755 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1756 {
1757 void *mmaped;
1758
1759 if (!map->mmaped)
1760 return -EINVAL;
1761
1762 if (old_sz == new_sz)
1763 return 0;
1764
1765 mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1766 if (mmaped == MAP_FAILED)
1767 return -errno;
1768
1769 memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1770 munmap(map->mmaped, old_sz);
1771 map->mmaped = mmaped;
1772 return 0;
1773 }
1774
internal_map_name(struct bpf_object * obj,const char * real_name)1775 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1776 {
1777 char map_name[BPF_OBJ_NAME_LEN], *p;
1778 int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1779
1780 /* This is one of the more confusing parts of libbpf for various
1781 * reasons, some of which are historical. The original idea for naming
1782 * internal names was to include as much of BPF object name prefix as
1783 * possible, so that it can be distinguished from similar internal
1784 * maps of a different BPF object.
1785 * As an example, let's say we have bpf_object named 'my_object_name'
1786 * and internal map corresponding to '.rodata' ELF section. The final
1787 * map name advertised to user and to the kernel will be
1788 * 'my_objec.rodata', taking first 8 characters of object name and
1789 * entire 7 characters of '.rodata'.
1790 * Somewhat confusingly, if internal map ELF section name is shorter
1791 * than 7 characters, e.g., '.bss', we still reserve 7 characters
1792 * for the suffix, even though we only have 4 actual characters, and
1793 * resulting map will be called 'my_objec.bss', not even using all 15
1794 * characters allowed by the kernel. Oh well, at least the truncated
1795 * object name is somewhat consistent in this case. But if the map
1796 * name is '.kconfig', we'll still have entirety of '.kconfig' added
1797 * (8 chars) and thus will be left with only first 7 characters of the
1798 * object name ('my_obje'). Happy guessing, user, that the final map
1799 * name will be "my_obje.kconfig".
1800 * Now, with libbpf starting to support arbitrarily named .rodata.*
1801 * and .data.* data sections, it's possible that ELF section name is
1802 * longer than allowed 15 chars, so we now need to be careful to take
1803 * only up to 15 first characters of ELF name, taking no BPF object
1804 * name characters at all. So '.rodata.abracadabra' will result in
1805 * '.rodata.abracad' kernel and user-visible name.
1806 * We need to keep this convoluted logic intact for .data, .bss and
1807 * .rodata maps, but for new custom .data.custom and .rodata.custom
1808 * maps we use their ELF names as is, not prepending bpf_object name
1809 * in front. We still need to truncate them to 15 characters for the
1810 * kernel. Full name can be recovered for such maps by using DATASEC
1811 * BTF type associated with such map's value type, though.
1812 */
1813 if (sfx_len >= BPF_OBJ_NAME_LEN)
1814 sfx_len = BPF_OBJ_NAME_LEN - 1;
1815
1816 /* if there are two or more dots in map name, it's a custom dot map */
1817 if (strchr(real_name + 1, '.') != NULL)
1818 pfx_len = 0;
1819 else
1820 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1821
1822 snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1823 sfx_len, real_name);
1824
1825 /* sanitise map name to characters allowed by kernel */
1826 for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1827 if (!isalnum(*p) && *p != '_' && *p != '.')
1828 *p = '_';
1829
1830 return strdup(map_name);
1831 }
1832
1833 static int
1834 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1835
1836 /* Internal BPF map is mmap()'able only if at least one of corresponding
1837 * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1838 * variable and it's not marked as __hidden (which turns it into, effectively,
1839 * a STATIC variable).
1840 */
map_is_mmapable(struct bpf_object * obj,struct bpf_map * map)1841 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1842 {
1843 const struct btf_type *t, *vt;
1844 struct btf_var_secinfo *vsi;
1845 int i, n;
1846
1847 if (!map->btf_value_type_id)
1848 return false;
1849
1850 t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1851 if (!btf_is_datasec(t))
1852 return false;
1853
1854 vsi = btf_var_secinfos(t);
1855 for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1856 vt = btf__type_by_id(obj->btf, vsi->type);
1857 if (!btf_is_var(vt))
1858 continue;
1859
1860 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1861 return true;
1862 }
1863
1864 return false;
1865 }
1866
1867 static int
bpf_object__init_internal_map(struct bpf_object * obj,enum libbpf_map_type type,const char * real_name,int sec_idx,void * data,size_t data_sz)1868 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1869 const char *real_name, int sec_idx, void *data, size_t data_sz)
1870 {
1871 struct bpf_map_def *def;
1872 struct bpf_map *map;
1873 size_t mmap_sz;
1874 int err;
1875
1876 map = bpf_object__add_map(obj);
1877 if (IS_ERR(map))
1878 return PTR_ERR(map);
1879
1880 map->libbpf_type = type;
1881 map->sec_idx = sec_idx;
1882 map->sec_offset = 0;
1883 map->real_name = strdup(real_name);
1884 map->name = internal_map_name(obj, real_name);
1885 if (!map->real_name || !map->name) {
1886 zfree(&map->real_name);
1887 zfree(&map->name);
1888 return -ENOMEM;
1889 }
1890
1891 def = &map->def;
1892 def->type = BPF_MAP_TYPE_ARRAY;
1893 def->key_size = sizeof(int);
1894 def->value_size = data_sz;
1895 def->max_entries = 1;
1896 def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1897 ? BPF_F_RDONLY_PROG : 0;
1898
1899 /* failures are fine because of maps like .rodata.str1.1 */
1900 (void) map_fill_btf_type_info(obj, map);
1901
1902 if (map_is_mmapable(obj, map))
1903 def->map_flags |= BPF_F_MMAPABLE;
1904
1905 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1906 map->name, map->sec_idx, map->sec_offset, def->map_flags);
1907
1908 mmap_sz = bpf_map_mmap_sz(map);
1909 map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1910 MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1911 if (map->mmaped == MAP_FAILED) {
1912 err = -errno;
1913 map->mmaped = NULL;
1914 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1915 map->name, err);
1916 zfree(&map->real_name);
1917 zfree(&map->name);
1918 return err;
1919 }
1920
1921 if (data)
1922 memcpy(map->mmaped, data, data_sz);
1923
1924 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1925 return 0;
1926 }
1927
bpf_object__init_global_data_maps(struct bpf_object * obj)1928 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1929 {
1930 struct elf_sec_desc *sec_desc;
1931 const char *sec_name;
1932 int err = 0, sec_idx;
1933
1934 /*
1935 * Populate obj->maps with libbpf internal maps.
1936 */
1937 for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1938 sec_desc = &obj->efile.secs[sec_idx];
1939
1940 /* Skip recognized sections with size 0. */
1941 if (!sec_desc->data || sec_desc->data->d_size == 0)
1942 continue;
1943
1944 switch (sec_desc->sec_type) {
1945 case SEC_DATA:
1946 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1947 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1948 sec_name, sec_idx,
1949 sec_desc->data->d_buf,
1950 sec_desc->data->d_size);
1951 break;
1952 case SEC_RODATA:
1953 obj->has_rodata = true;
1954 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1955 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1956 sec_name, sec_idx,
1957 sec_desc->data->d_buf,
1958 sec_desc->data->d_size);
1959 break;
1960 case SEC_BSS:
1961 sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1962 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1963 sec_name, sec_idx,
1964 NULL,
1965 sec_desc->data->d_size);
1966 break;
1967 default:
1968 /* skip */
1969 break;
1970 }
1971 if (err)
1972 return err;
1973 }
1974 return 0;
1975 }
1976
1977
find_extern_by_name(const struct bpf_object * obj,const void * name)1978 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1979 const void *name)
1980 {
1981 int i;
1982
1983 for (i = 0; i < obj->nr_extern; i++) {
1984 if (strcmp(obj->externs[i].name, name) == 0)
1985 return &obj->externs[i];
1986 }
1987 return NULL;
1988 }
1989
set_kcfg_value_tri(struct extern_desc * ext,void * ext_val,char value)1990 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1991 char value)
1992 {
1993 switch (ext->kcfg.type) {
1994 case KCFG_BOOL:
1995 if (value == 'm') {
1996 pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1997 ext->name, value);
1998 return -EINVAL;
1999 }
2000 *(bool *)ext_val = value == 'y' ? true : false;
2001 break;
2002 case KCFG_TRISTATE:
2003 if (value == 'y')
2004 *(enum libbpf_tristate *)ext_val = TRI_YES;
2005 else if (value == 'm')
2006 *(enum libbpf_tristate *)ext_val = TRI_MODULE;
2007 else /* value == 'n' */
2008 *(enum libbpf_tristate *)ext_val = TRI_NO;
2009 break;
2010 case KCFG_CHAR:
2011 *(char *)ext_val = value;
2012 break;
2013 case KCFG_UNKNOWN:
2014 case KCFG_INT:
2015 case KCFG_CHAR_ARR:
2016 default:
2017 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
2018 ext->name, value);
2019 return -EINVAL;
2020 }
2021 ext->is_set = true;
2022 return 0;
2023 }
2024
set_kcfg_value_str(struct extern_desc * ext,char * ext_val,const char * value)2025 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
2026 const char *value)
2027 {
2028 size_t len;
2029
2030 if (ext->kcfg.type != KCFG_CHAR_ARR) {
2031 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
2032 ext->name, value);
2033 return -EINVAL;
2034 }
2035
2036 len = strlen(value);
2037 if (value[len - 1] != '"') {
2038 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
2039 ext->name, value);
2040 return -EINVAL;
2041 }
2042
2043 /* strip quotes */
2044 len -= 2;
2045 if (len >= ext->kcfg.sz) {
2046 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
2047 ext->name, value, len, ext->kcfg.sz - 1);
2048 len = ext->kcfg.sz - 1;
2049 }
2050 memcpy(ext_val, value + 1, len);
2051 ext_val[len] = '\0';
2052 ext->is_set = true;
2053 return 0;
2054 }
2055
parse_u64(const char * value,__u64 * res)2056 static int parse_u64(const char *value, __u64 *res)
2057 {
2058 char *value_end;
2059 int err;
2060
2061 errno = 0;
2062 *res = strtoull(value, &value_end, 0);
2063 if (errno) {
2064 err = -errno;
2065 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
2066 return err;
2067 }
2068 if (*value_end) {
2069 pr_warn("failed to parse '%s' as integer completely\n", value);
2070 return -EINVAL;
2071 }
2072 return 0;
2073 }
2074
is_kcfg_value_in_range(const struct extern_desc * ext,__u64 v)2075 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
2076 {
2077 int bit_sz = ext->kcfg.sz * 8;
2078
2079 if (ext->kcfg.sz == 8)
2080 return true;
2081
2082 /* Validate that value stored in u64 fits in integer of `ext->sz`
2083 * bytes size without any loss of information. If the target integer
2084 * is signed, we rely on the following limits of integer type of
2085 * Y bits and subsequent transformation:
2086 *
2087 * -2^(Y-1) <= X <= 2^(Y-1) - 1
2088 * 0 <= X + 2^(Y-1) <= 2^Y - 1
2089 * 0 <= X + 2^(Y-1) < 2^Y
2090 *
2091 * For unsigned target integer, check that all the (64 - Y) bits are
2092 * zero.
2093 */
2094 if (ext->kcfg.is_signed)
2095 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
2096 else
2097 return (v >> bit_sz) == 0;
2098 }
2099
set_kcfg_value_num(struct extern_desc * ext,void * ext_val,__u64 value)2100 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
2101 __u64 value)
2102 {
2103 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
2104 ext->kcfg.type != KCFG_BOOL) {
2105 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
2106 ext->name, (unsigned long long)value);
2107 return -EINVAL;
2108 }
2109 if (ext->kcfg.type == KCFG_BOOL && value > 1) {
2110 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
2111 ext->name, (unsigned long long)value);
2112 return -EINVAL;
2113
2114 }
2115 if (!is_kcfg_value_in_range(ext, value)) {
2116 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
2117 ext->name, (unsigned long long)value, ext->kcfg.sz);
2118 return -ERANGE;
2119 }
2120 switch (ext->kcfg.sz) {
2121 case 1:
2122 *(__u8 *)ext_val = value;
2123 break;
2124 case 2:
2125 *(__u16 *)ext_val = value;
2126 break;
2127 case 4:
2128 *(__u32 *)ext_val = value;
2129 break;
2130 case 8:
2131 *(__u64 *)ext_val = value;
2132 break;
2133 default:
2134 return -EINVAL;
2135 }
2136 ext->is_set = true;
2137 return 0;
2138 }
2139
bpf_object__process_kconfig_line(struct bpf_object * obj,char * buf,void * data)2140 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
2141 char *buf, void *data)
2142 {
2143 struct extern_desc *ext;
2144 char *sep, *value;
2145 int len, err = 0;
2146 void *ext_val;
2147 __u64 num;
2148
2149 if (!str_has_pfx(buf, "CONFIG_"))
2150 return 0;
2151
2152 sep = strchr(buf, '=');
2153 if (!sep) {
2154 pr_warn("failed to parse '%s': no separator\n", buf);
2155 return -EINVAL;
2156 }
2157
2158 /* Trim ending '\n' */
2159 len = strlen(buf);
2160 if (buf[len - 1] == '\n')
2161 buf[len - 1] = '\0';
2162 /* Split on '=' and ensure that a value is present. */
2163 *sep = '\0';
2164 if (!sep[1]) {
2165 *sep = '=';
2166 pr_warn("failed to parse '%s': no value\n", buf);
2167 return -EINVAL;
2168 }
2169
2170 ext = find_extern_by_name(obj, buf);
2171 if (!ext || ext->is_set)
2172 return 0;
2173
2174 ext_val = data + ext->kcfg.data_off;
2175 value = sep + 1;
2176
2177 switch (*value) {
2178 case 'y': case 'n': case 'm':
2179 err = set_kcfg_value_tri(ext, ext_val, *value);
2180 break;
2181 case '"':
2182 err = set_kcfg_value_str(ext, ext_val, value);
2183 break;
2184 default:
2185 /* assume integer */
2186 err = parse_u64(value, &num);
2187 if (err) {
2188 pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2189 return err;
2190 }
2191 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2192 pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2193 return -EINVAL;
2194 }
2195 err = set_kcfg_value_num(ext, ext_val, num);
2196 break;
2197 }
2198 if (err)
2199 return err;
2200 pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2201 return 0;
2202 }
2203
bpf_object__read_kconfig_file(struct bpf_object * obj,void * data)2204 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2205 {
2206 char buf[PATH_MAX];
2207 struct utsname uts;
2208 int len, err = 0;
2209 gzFile file;
2210
2211 uname(&uts);
2212 len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2213 if (len < 0)
2214 return -EINVAL;
2215 else if (len >= PATH_MAX)
2216 return -ENAMETOOLONG;
2217
2218 /* gzopen also accepts uncompressed files. */
2219 file = gzopen(buf, "re");
2220 if (!file)
2221 file = gzopen("/proc/config.gz", "re");
2222
2223 if (!file) {
2224 pr_warn("failed to open system Kconfig\n");
2225 return -ENOENT;
2226 }
2227
2228 while (gzgets(file, buf, sizeof(buf))) {
2229 err = bpf_object__process_kconfig_line(obj, buf, data);
2230 if (err) {
2231 pr_warn("error parsing system Kconfig line '%s': %d\n",
2232 buf, err);
2233 goto out;
2234 }
2235 }
2236
2237 out:
2238 gzclose(file);
2239 return err;
2240 }
2241
bpf_object__read_kconfig_mem(struct bpf_object * obj,const char * config,void * data)2242 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2243 const char *config, void *data)
2244 {
2245 char buf[PATH_MAX];
2246 int err = 0;
2247 FILE *file;
2248
2249 file = fmemopen((void *)config, strlen(config), "r");
2250 if (!file) {
2251 err = -errno;
2252 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2253 return err;
2254 }
2255
2256 while (fgets(buf, sizeof(buf), file)) {
2257 err = bpf_object__process_kconfig_line(obj, buf, data);
2258 if (err) {
2259 pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2260 buf, err);
2261 break;
2262 }
2263 }
2264
2265 fclose(file);
2266 return err;
2267 }
2268
bpf_object__init_kconfig_map(struct bpf_object * obj)2269 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2270 {
2271 struct extern_desc *last_ext = NULL, *ext;
2272 size_t map_sz;
2273 int i, err;
2274
2275 for (i = 0; i < obj->nr_extern; i++) {
2276 ext = &obj->externs[i];
2277 if (ext->type == EXT_KCFG)
2278 last_ext = ext;
2279 }
2280
2281 if (!last_ext)
2282 return 0;
2283
2284 map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2285 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2286 ".kconfig", obj->efile.symbols_shndx,
2287 NULL, map_sz);
2288 if (err)
2289 return err;
2290
2291 obj->kconfig_map_idx = obj->nr_maps - 1;
2292
2293 return 0;
2294 }
2295
2296 const struct btf_type *
skip_mods_and_typedefs(const struct btf * btf,__u32 id,__u32 * res_id)2297 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2298 {
2299 const struct btf_type *t = btf__type_by_id(btf, id);
2300
2301 if (res_id)
2302 *res_id = id;
2303
2304 while (btf_is_mod(t) || btf_is_typedef(t)) {
2305 if (res_id)
2306 *res_id = t->type;
2307 t = btf__type_by_id(btf, t->type);
2308 }
2309
2310 return t;
2311 }
2312
2313 static const struct btf_type *
resolve_func_ptr(const struct btf * btf,__u32 id,__u32 * res_id)2314 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2315 {
2316 const struct btf_type *t;
2317
2318 t = skip_mods_and_typedefs(btf, id, NULL);
2319 if (!btf_is_ptr(t))
2320 return NULL;
2321
2322 t = skip_mods_and_typedefs(btf, t->type, res_id);
2323
2324 return btf_is_func_proto(t) ? t : NULL;
2325 }
2326
__btf_kind_str(__u16 kind)2327 static const char *__btf_kind_str(__u16 kind)
2328 {
2329 switch (kind) {
2330 case BTF_KIND_UNKN: return "void";
2331 case BTF_KIND_INT: return "int";
2332 case BTF_KIND_PTR: return "ptr";
2333 case BTF_KIND_ARRAY: return "array";
2334 case BTF_KIND_STRUCT: return "struct";
2335 case BTF_KIND_UNION: return "union";
2336 case BTF_KIND_ENUM: return "enum";
2337 case BTF_KIND_FWD: return "fwd";
2338 case BTF_KIND_TYPEDEF: return "typedef";
2339 case BTF_KIND_VOLATILE: return "volatile";
2340 case BTF_KIND_CONST: return "const";
2341 case BTF_KIND_RESTRICT: return "restrict";
2342 case BTF_KIND_FUNC: return "func";
2343 case BTF_KIND_FUNC_PROTO: return "func_proto";
2344 case BTF_KIND_VAR: return "var";
2345 case BTF_KIND_DATASEC: return "datasec";
2346 case BTF_KIND_FLOAT: return "float";
2347 case BTF_KIND_DECL_TAG: return "decl_tag";
2348 case BTF_KIND_TYPE_TAG: return "type_tag";
2349 case BTF_KIND_ENUM64: return "enum64";
2350 default: return "unknown";
2351 }
2352 }
2353
btf_kind_str(const struct btf_type * t)2354 const char *btf_kind_str(const struct btf_type *t)
2355 {
2356 return __btf_kind_str(btf_kind(t));
2357 }
2358
2359 /*
2360 * Fetch integer attribute of BTF map definition. Such attributes are
2361 * represented using a pointer to an array, in which dimensionality of array
2362 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2363 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2364 * type definition, while using only sizeof(void *) space in ELF data section.
2365 */
get_map_field_int(const char * map_name,const struct btf * btf,const struct btf_member * m,__u32 * res)2366 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2367 const struct btf_member *m, __u32 *res)
2368 {
2369 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2370 const char *name = btf__name_by_offset(btf, m->name_off);
2371 const struct btf_array *arr_info;
2372 const struct btf_type *arr_t;
2373
2374 if (!btf_is_ptr(t)) {
2375 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2376 map_name, name, btf_kind_str(t));
2377 return false;
2378 }
2379
2380 arr_t = btf__type_by_id(btf, t->type);
2381 if (!arr_t) {
2382 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2383 map_name, name, t->type);
2384 return false;
2385 }
2386 if (!btf_is_array(arr_t)) {
2387 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2388 map_name, name, btf_kind_str(arr_t));
2389 return false;
2390 }
2391 arr_info = btf_array(arr_t);
2392 *res = arr_info->nelems;
2393 return true;
2394 }
2395
get_map_field_long(const char * map_name,const struct btf * btf,const struct btf_member * m,__u64 * res)2396 static bool get_map_field_long(const char *map_name, const struct btf *btf,
2397 const struct btf_member *m, __u64 *res)
2398 {
2399 const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2400 const char *name = btf__name_by_offset(btf, m->name_off);
2401
2402 if (btf_is_ptr(t)) {
2403 __u32 res32;
2404 bool ret;
2405
2406 ret = get_map_field_int(map_name, btf, m, &res32);
2407 if (ret)
2408 *res = (__u64)res32;
2409 return ret;
2410 }
2411
2412 if (!btf_is_enum(t) && !btf_is_enum64(t)) {
2413 pr_warn("map '%s': attr '%s': expected ENUM or ENUM64, got %s.\n",
2414 map_name, name, btf_kind_str(t));
2415 return false;
2416 }
2417
2418 if (btf_vlen(t) != 1) {
2419 pr_warn("map '%s': attr '%s': invalid __ulong\n",
2420 map_name, name);
2421 return false;
2422 }
2423
2424 if (btf_is_enum(t)) {
2425 const struct btf_enum *e = btf_enum(t);
2426
2427 *res = e->val;
2428 } else {
2429 const struct btf_enum64 *e = btf_enum64(t);
2430
2431 *res = btf_enum64_value(e);
2432 }
2433 return true;
2434 }
2435
pathname_concat(char * buf,size_t buf_sz,const char * path,const char * name)2436 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2437 {
2438 int len;
2439
2440 len = snprintf(buf, buf_sz, "%s/%s", path, name);
2441 if (len < 0)
2442 return -EINVAL;
2443 if (len >= buf_sz)
2444 return -ENAMETOOLONG;
2445
2446 return 0;
2447 }
2448
build_map_pin_path(struct bpf_map * map,const char * path)2449 static int build_map_pin_path(struct bpf_map *map, const char *path)
2450 {
2451 char buf[PATH_MAX];
2452 int err;
2453
2454 if (!path)
2455 path = BPF_FS_DEFAULT_PATH;
2456
2457 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2458 if (err)
2459 return err;
2460
2461 return bpf_map__set_pin_path(map, buf);
2462 }
2463
2464 /* should match definition in bpf_helpers.h */
2465 enum libbpf_pin_type {
2466 LIBBPF_PIN_NONE,
2467 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2468 LIBBPF_PIN_BY_NAME,
2469 };
2470
parse_btf_map_def(const char * map_name,struct btf * btf,const struct btf_type * def_t,bool strict,struct btf_map_def * map_def,struct btf_map_def * inner_def)2471 int parse_btf_map_def(const char *map_name, struct btf *btf,
2472 const struct btf_type *def_t, bool strict,
2473 struct btf_map_def *map_def, struct btf_map_def *inner_def)
2474 {
2475 const struct btf_type *t;
2476 const struct btf_member *m;
2477 bool is_inner = inner_def == NULL;
2478 int vlen, i;
2479
2480 vlen = btf_vlen(def_t);
2481 m = btf_members(def_t);
2482 for (i = 0; i < vlen; i++, m++) {
2483 const char *name = btf__name_by_offset(btf, m->name_off);
2484
2485 if (!name) {
2486 pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2487 return -EINVAL;
2488 }
2489 if (strcmp(name, "type") == 0) {
2490 if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2491 return -EINVAL;
2492 map_def->parts |= MAP_DEF_MAP_TYPE;
2493 } else if (strcmp(name, "max_entries") == 0) {
2494 if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2495 return -EINVAL;
2496 map_def->parts |= MAP_DEF_MAX_ENTRIES;
2497 } else if (strcmp(name, "map_flags") == 0) {
2498 if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2499 return -EINVAL;
2500 map_def->parts |= MAP_DEF_MAP_FLAGS;
2501 } else if (strcmp(name, "numa_node") == 0) {
2502 if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2503 return -EINVAL;
2504 map_def->parts |= MAP_DEF_NUMA_NODE;
2505 } else if (strcmp(name, "key_size") == 0) {
2506 __u32 sz;
2507
2508 if (!get_map_field_int(map_name, btf, m, &sz))
2509 return -EINVAL;
2510 if (map_def->key_size && map_def->key_size != sz) {
2511 pr_warn("map '%s': conflicting key size %u != %u.\n",
2512 map_name, map_def->key_size, sz);
2513 return -EINVAL;
2514 }
2515 map_def->key_size = sz;
2516 map_def->parts |= MAP_DEF_KEY_SIZE;
2517 } else if (strcmp(name, "key") == 0) {
2518 __s64 sz;
2519
2520 t = btf__type_by_id(btf, m->type);
2521 if (!t) {
2522 pr_warn("map '%s': key type [%d] not found.\n",
2523 map_name, m->type);
2524 return -EINVAL;
2525 }
2526 if (!btf_is_ptr(t)) {
2527 pr_warn("map '%s': key spec is not PTR: %s.\n",
2528 map_name, btf_kind_str(t));
2529 return -EINVAL;
2530 }
2531 sz = btf__resolve_size(btf, t->type);
2532 if (sz < 0) {
2533 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2534 map_name, t->type, (ssize_t)sz);
2535 return sz;
2536 }
2537 if (map_def->key_size && map_def->key_size != sz) {
2538 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2539 map_name, map_def->key_size, (ssize_t)sz);
2540 return -EINVAL;
2541 }
2542 map_def->key_size = sz;
2543 map_def->key_type_id = t->type;
2544 map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2545 } else if (strcmp(name, "value_size") == 0) {
2546 __u32 sz;
2547
2548 if (!get_map_field_int(map_name, btf, m, &sz))
2549 return -EINVAL;
2550 if (map_def->value_size && map_def->value_size != sz) {
2551 pr_warn("map '%s': conflicting value size %u != %u.\n",
2552 map_name, map_def->value_size, sz);
2553 return -EINVAL;
2554 }
2555 map_def->value_size = sz;
2556 map_def->parts |= MAP_DEF_VALUE_SIZE;
2557 } else if (strcmp(name, "value") == 0) {
2558 __s64 sz;
2559
2560 t = btf__type_by_id(btf, m->type);
2561 if (!t) {
2562 pr_warn("map '%s': value type [%d] not found.\n",
2563 map_name, m->type);
2564 return -EINVAL;
2565 }
2566 if (!btf_is_ptr(t)) {
2567 pr_warn("map '%s': value spec is not PTR: %s.\n",
2568 map_name, btf_kind_str(t));
2569 return -EINVAL;
2570 }
2571 sz = btf__resolve_size(btf, t->type);
2572 if (sz < 0) {
2573 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2574 map_name, t->type, (ssize_t)sz);
2575 return sz;
2576 }
2577 if (map_def->value_size && map_def->value_size != sz) {
2578 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2579 map_name, map_def->value_size, (ssize_t)sz);
2580 return -EINVAL;
2581 }
2582 map_def->value_size = sz;
2583 map_def->value_type_id = t->type;
2584 map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2585 }
2586 else if (strcmp(name, "values") == 0) {
2587 bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2588 bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2589 const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2590 char inner_map_name[128];
2591 int err;
2592
2593 if (is_inner) {
2594 pr_warn("map '%s': multi-level inner maps not supported.\n",
2595 map_name);
2596 return -ENOTSUP;
2597 }
2598 if (i != vlen - 1) {
2599 pr_warn("map '%s': '%s' member should be last.\n",
2600 map_name, name);
2601 return -EINVAL;
2602 }
2603 if (!is_map_in_map && !is_prog_array) {
2604 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2605 map_name);
2606 return -ENOTSUP;
2607 }
2608 if (map_def->value_size && map_def->value_size != 4) {
2609 pr_warn("map '%s': conflicting value size %u != 4.\n",
2610 map_name, map_def->value_size);
2611 return -EINVAL;
2612 }
2613 map_def->value_size = 4;
2614 t = btf__type_by_id(btf, m->type);
2615 if (!t) {
2616 pr_warn("map '%s': %s type [%d] not found.\n",
2617 map_name, desc, m->type);
2618 return -EINVAL;
2619 }
2620 if (!btf_is_array(t) || btf_array(t)->nelems) {
2621 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2622 map_name, desc);
2623 return -EINVAL;
2624 }
2625 t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2626 if (!btf_is_ptr(t)) {
2627 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2628 map_name, desc, btf_kind_str(t));
2629 return -EINVAL;
2630 }
2631 t = skip_mods_and_typedefs(btf, t->type, NULL);
2632 if (is_prog_array) {
2633 if (!btf_is_func_proto(t)) {
2634 pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2635 map_name, btf_kind_str(t));
2636 return -EINVAL;
2637 }
2638 continue;
2639 }
2640 if (!btf_is_struct(t)) {
2641 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2642 map_name, btf_kind_str(t));
2643 return -EINVAL;
2644 }
2645
2646 snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2647 err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2648 if (err)
2649 return err;
2650
2651 map_def->parts |= MAP_DEF_INNER_MAP;
2652 } else if (strcmp(name, "pinning") == 0) {
2653 __u32 val;
2654
2655 if (is_inner) {
2656 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2657 return -EINVAL;
2658 }
2659 if (!get_map_field_int(map_name, btf, m, &val))
2660 return -EINVAL;
2661 if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2662 pr_warn("map '%s': invalid pinning value %u.\n",
2663 map_name, val);
2664 return -EINVAL;
2665 }
2666 map_def->pinning = val;
2667 map_def->parts |= MAP_DEF_PINNING;
2668 } else if (strcmp(name, "map_extra") == 0) {
2669 __u64 map_extra;
2670
2671 if (!get_map_field_long(map_name, btf, m, &map_extra))
2672 return -EINVAL;
2673 map_def->map_extra = map_extra;
2674 map_def->parts |= MAP_DEF_MAP_EXTRA;
2675 } else {
2676 if (strict) {
2677 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2678 return -ENOTSUP;
2679 }
2680 pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2681 }
2682 }
2683
2684 if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2685 pr_warn("map '%s': map type isn't specified.\n", map_name);
2686 return -EINVAL;
2687 }
2688
2689 return 0;
2690 }
2691
adjust_ringbuf_sz(size_t sz)2692 static size_t adjust_ringbuf_sz(size_t sz)
2693 {
2694 __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2695 __u32 mul;
2696
2697 /* if user forgot to set any size, make sure they see error */
2698 if (sz == 0)
2699 return 0;
2700 /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2701 * a power-of-2 multiple of kernel's page size. If user diligently
2702 * satisified these conditions, pass the size through.
2703 */
2704 if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2705 return sz;
2706
2707 /* Otherwise find closest (page_sz * power_of_2) product bigger than
2708 * user-set size to satisfy both user size request and kernel
2709 * requirements and substitute correct max_entries for map creation.
2710 */
2711 for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2712 if (mul * page_sz > sz)
2713 return mul * page_sz;
2714 }
2715
2716 /* if it's impossible to satisfy the conditions (i.e., user size is
2717 * very close to UINT_MAX but is not a power-of-2 multiple of
2718 * page_size) then just return original size and let kernel reject it
2719 */
2720 return sz;
2721 }
2722
map_is_ringbuf(const struct bpf_map * map)2723 static bool map_is_ringbuf(const struct bpf_map *map)
2724 {
2725 return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2726 map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2727 }
2728
fill_map_from_def(struct bpf_map * map,const struct btf_map_def * def)2729 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2730 {
2731 map->def.type = def->map_type;
2732 map->def.key_size = def->key_size;
2733 map->def.value_size = def->value_size;
2734 map->def.max_entries = def->max_entries;
2735 map->def.map_flags = def->map_flags;
2736 map->map_extra = def->map_extra;
2737
2738 map->numa_node = def->numa_node;
2739 map->btf_key_type_id = def->key_type_id;
2740 map->btf_value_type_id = def->value_type_id;
2741
2742 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2743 if (map_is_ringbuf(map))
2744 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2745
2746 if (def->parts & MAP_DEF_MAP_TYPE)
2747 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2748
2749 if (def->parts & MAP_DEF_KEY_TYPE)
2750 pr_debug("map '%s': found key [%u], sz = %u.\n",
2751 map->name, def->key_type_id, def->key_size);
2752 else if (def->parts & MAP_DEF_KEY_SIZE)
2753 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2754
2755 if (def->parts & MAP_DEF_VALUE_TYPE)
2756 pr_debug("map '%s': found value [%u], sz = %u.\n",
2757 map->name, def->value_type_id, def->value_size);
2758 else if (def->parts & MAP_DEF_VALUE_SIZE)
2759 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2760
2761 if (def->parts & MAP_DEF_MAX_ENTRIES)
2762 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2763 if (def->parts & MAP_DEF_MAP_FLAGS)
2764 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2765 if (def->parts & MAP_DEF_MAP_EXTRA)
2766 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2767 (unsigned long long)def->map_extra);
2768 if (def->parts & MAP_DEF_PINNING)
2769 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2770 if (def->parts & MAP_DEF_NUMA_NODE)
2771 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2772
2773 if (def->parts & MAP_DEF_INNER_MAP)
2774 pr_debug("map '%s': found inner map definition.\n", map->name);
2775 }
2776
btf_var_linkage_str(__u32 linkage)2777 static const char *btf_var_linkage_str(__u32 linkage)
2778 {
2779 switch (linkage) {
2780 case BTF_VAR_STATIC: return "static";
2781 case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2782 case BTF_VAR_GLOBAL_EXTERN: return "extern";
2783 default: return "unknown";
2784 }
2785 }
2786
bpf_object__init_user_btf_map(struct bpf_object * obj,const struct btf_type * sec,int var_idx,int sec_idx,const Elf_Data * data,bool strict,const char * pin_root_path)2787 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2788 const struct btf_type *sec,
2789 int var_idx, int sec_idx,
2790 const Elf_Data *data, bool strict,
2791 const char *pin_root_path)
2792 {
2793 struct btf_map_def map_def = {}, inner_def = {};
2794 const struct btf_type *var, *def;
2795 const struct btf_var_secinfo *vi;
2796 const struct btf_var *var_extra;
2797 const char *map_name;
2798 struct bpf_map *map;
2799 int err;
2800
2801 vi = btf_var_secinfos(sec) + var_idx;
2802 var = btf__type_by_id(obj->btf, vi->type);
2803 var_extra = btf_var(var);
2804 map_name = btf__name_by_offset(obj->btf, var->name_off);
2805
2806 if (map_name == NULL || map_name[0] == '\0') {
2807 pr_warn("map #%d: empty name.\n", var_idx);
2808 return -EINVAL;
2809 }
2810 if ((__u64)vi->offset + vi->size > data->d_size) {
2811 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2812 return -EINVAL;
2813 }
2814 if (!btf_is_var(var)) {
2815 pr_warn("map '%s': unexpected var kind %s.\n",
2816 map_name, btf_kind_str(var));
2817 return -EINVAL;
2818 }
2819 if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2820 pr_warn("map '%s': unsupported map linkage %s.\n",
2821 map_name, btf_var_linkage_str(var_extra->linkage));
2822 return -EOPNOTSUPP;
2823 }
2824
2825 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2826 if (!btf_is_struct(def)) {
2827 pr_warn("map '%s': unexpected def kind %s.\n",
2828 map_name, btf_kind_str(var));
2829 return -EINVAL;
2830 }
2831 if (def->size > vi->size) {
2832 pr_warn("map '%s': invalid def size.\n", map_name);
2833 return -EINVAL;
2834 }
2835
2836 map = bpf_object__add_map(obj);
2837 if (IS_ERR(map))
2838 return PTR_ERR(map);
2839 map->name = strdup(map_name);
2840 if (!map->name) {
2841 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2842 return -ENOMEM;
2843 }
2844 map->libbpf_type = LIBBPF_MAP_UNSPEC;
2845 map->def.type = BPF_MAP_TYPE_UNSPEC;
2846 map->sec_idx = sec_idx;
2847 map->sec_offset = vi->offset;
2848 map->btf_var_idx = var_idx;
2849 pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2850 map_name, map->sec_idx, map->sec_offset);
2851
2852 err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2853 if (err)
2854 return err;
2855
2856 fill_map_from_def(map, &map_def);
2857
2858 if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2859 err = build_map_pin_path(map, pin_root_path);
2860 if (err) {
2861 pr_warn("map '%s': couldn't build pin path.\n", map->name);
2862 return err;
2863 }
2864 }
2865
2866 if (map_def.parts & MAP_DEF_INNER_MAP) {
2867 map->inner_map = calloc(1, sizeof(*map->inner_map));
2868 if (!map->inner_map)
2869 return -ENOMEM;
2870 map->inner_map->fd = create_placeholder_fd();
2871 if (map->inner_map->fd < 0)
2872 return map->inner_map->fd;
2873 map->inner_map->sec_idx = sec_idx;
2874 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2875 if (!map->inner_map->name)
2876 return -ENOMEM;
2877 sprintf(map->inner_map->name, "%s.inner", map_name);
2878
2879 fill_map_from_def(map->inner_map, &inner_def);
2880 }
2881
2882 err = map_fill_btf_type_info(obj, map);
2883 if (err)
2884 return err;
2885
2886 return 0;
2887 }
2888
init_arena_map_data(struct bpf_object * obj,struct bpf_map * map,const char * sec_name,int sec_idx,void * data,size_t data_sz)2889 static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
2890 const char *sec_name, int sec_idx,
2891 void *data, size_t data_sz)
2892 {
2893 const long page_sz = sysconf(_SC_PAGE_SIZE);
2894 size_t mmap_sz;
2895
2896 mmap_sz = bpf_map_mmap_sz(obj->arena_map);
2897 if (roundup(data_sz, page_sz) > mmap_sz) {
2898 pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
2899 sec_name, mmap_sz, data_sz);
2900 return -E2BIG;
2901 }
2902
2903 obj->arena_data = malloc(data_sz);
2904 if (!obj->arena_data)
2905 return -ENOMEM;
2906 memcpy(obj->arena_data, data, data_sz);
2907 obj->arena_data_sz = data_sz;
2908
2909 /* make bpf_map__init_value() work for ARENA maps */
2910 map->mmaped = obj->arena_data;
2911
2912 return 0;
2913 }
2914
bpf_object__init_user_btf_maps(struct bpf_object * obj,bool strict,const char * pin_root_path)2915 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2916 const char *pin_root_path)
2917 {
2918 const struct btf_type *sec = NULL;
2919 int nr_types, i, vlen, err;
2920 const struct btf_type *t;
2921 const char *name;
2922 Elf_Data *data;
2923 Elf_Scn *scn;
2924
2925 if (obj->efile.btf_maps_shndx < 0)
2926 return 0;
2927
2928 scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2929 data = elf_sec_data(obj, scn);
2930 if (!scn || !data) {
2931 pr_warn("elf: failed to get %s map definitions for %s\n",
2932 MAPS_ELF_SEC, obj->path);
2933 return -EINVAL;
2934 }
2935
2936 nr_types = btf__type_cnt(obj->btf);
2937 for (i = 1; i < nr_types; i++) {
2938 t = btf__type_by_id(obj->btf, i);
2939 if (!btf_is_datasec(t))
2940 continue;
2941 name = btf__name_by_offset(obj->btf, t->name_off);
2942 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2943 sec = t;
2944 obj->efile.btf_maps_sec_btf_id = i;
2945 break;
2946 }
2947 }
2948
2949 if (!sec) {
2950 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2951 return -ENOENT;
2952 }
2953
2954 vlen = btf_vlen(sec);
2955 for (i = 0; i < vlen; i++) {
2956 err = bpf_object__init_user_btf_map(obj, sec, i,
2957 obj->efile.btf_maps_shndx,
2958 data, strict,
2959 pin_root_path);
2960 if (err)
2961 return err;
2962 }
2963
2964 for (i = 0; i < obj->nr_maps; i++) {
2965 struct bpf_map *map = &obj->maps[i];
2966
2967 if (map->def.type != BPF_MAP_TYPE_ARENA)
2968 continue;
2969
2970 if (obj->arena_map) {
2971 pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
2972 map->name, obj->arena_map->name);
2973 return -EINVAL;
2974 }
2975 obj->arena_map = map;
2976
2977 if (obj->efile.arena_data) {
2978 err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
2979 obj->efile.arena_data->d_buf,
2980 obj->efile.arena_data->d_size);
2981 if (err)
2982 return err;
2983 }
2984 }
2985 if (obj->efile.arena_data && !obj->arena_map) {
2986 pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
2987 ARENA_SEC);
2988 return -ENOENT;
2989 }
2990
2991 return 0;
2992 }
2993
bpf_object__init_maps(struct bpf_object * obj,const struct bpf_object_open_opts * opts)2994 static int bpf_object__init_maps(struct bpf_object *obj,
2995 const struct bpf_object_open_opts *opts)
2996 {
2997 const char *pin_root_path;
2998 bool strict;
2999 int err = 0;
3000
3001 strict = !OPTS_GET(opts, relaxed_maps, false);
3002 pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
3003
3004 err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
3005 err = err ?: bpf_object__init_global_data_maps(obj);
3006 err = err ?: bpf_object__init_kconfig_map(obj);
3007 err = err ?: bpf_object_init_struct_ops(obj);
3008
3009 return err;
3010 }
3011
section_have_execinstr(struct bpf_object * obj,int idx)3012 static bool section_have_execinstr(struct bpf_object *obj, int idx)
3013 {
3014 Elf64_Shdr *sh;
3015
3016 sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
3017 if (!sh)
3018 return false;
3019
3020 return sh->sh_flags & SHF_EXECINSTR;
3021 }
3022
starts_with_qmark(const char * s)3023 static bool starts_with_qmark(const char *s)
3024 {
3025 return s && s[0] == '?';
3026 }
3027
btf_needs_sanitization(struct bpf_object * obj)3028 static bool btf_needs_sanitization(struct bpf_object *obj)
3029 {
3030 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3031 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3032 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3033 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3034 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3035 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3036 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3037 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3038
3039 return !has_func || !has_datasec || !has_func_global || !has_float ||
3040 !has_decl_tag || !has_type_tag || !has_enum64 || !has_qmark_datasec;
3041 }
3042
bpf_object__sanitize_btf(struct bpf_object * obj,struct btf * btf)3043 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
3044 {
3045 bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
3046 bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
3047 bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
3048 bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
3049 bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
3050 bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
3051 bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
3052 bool has_qmark_datasec = kernel_supports(obj, FEAT_BTF_QMARK_DATASEC);
3053 int enum64_placeholder_id = 0;
3054 struct btf_type *t;
3055 int i, j, vlen;
3056
3057 for (i = 1; i < btf__type_cnt(btf); i++) {
3058 t = (struct btf_type *)btf__type_by_id(btf, i);
3059
3060 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
3061 /* replace VAR/DECL_TAG with INT */
3062 t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
3063 /*
3064 * using size = 1 is the safest choice, 4 will be too
3065 * big and cause kernel BTF validation failure if
3066 * original variable took less than 4 bytes
3067 */
3068 t->size = 1;
3069 *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
3070 } else if (!has_datasec && btf_is_datasec(t)) {
3071 /* replace DATASEC with STRUCT */
3072 const struct btf_var_secinfo *v = btf_var_secinfos(t);
3073 struct btf_member *m = btf_members(t);
3074 struct btf_type *vt;
3075 char *name;
3076
3077 name = (char *)btf__name_by_offset(btf, t->name_off);
3078 while (*name) {
3079 if (*name == '.' || *name == '?')
3080 *name = '_';
3081 name++;
3082 }
3083
3084 vlen = btf_vlen(t);
3085 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
3086 for (j = 0; j < vlen; j++, v++, m++) {
3087 /* order of field assignments is important */
3088 m->offset = v->offset * 8;
3089 m->type = v->type;
3090 /* preserve variable name as member name */
3091 vt = (void *)btf__type_by_id(btf, v->type);
3092 m->name_off = vt->name_off;
3093 }
3094 } else if (!has_qmark_datasec && btf_is_datasec(t) &&
3095 starts_with_qmark(btf__name_by_offset(btf, t->name_off))) {
3096 /* replace '?' prefix with '_' for DATASEC names */
3097 char *name;
3098
3099 name = (char *)btf__name_by_offset(btf, t->name_off);
3100 if (name[0] == '?')
3101 name[0] = '_';
3102 } else if (!has_func && btf_is_func_proto(t)) {
3103 /* replace FUNC_PROTO with ENUM */
3104 vlen = btf_vlen(t);
3105 t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
3106 t->size = sizeof(__u32); /* kernel enforced */
3107 } else if (!has_func && btf_is_func(t)) {
3108 /* replace FUNC with TYPEDEF */
3109 t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
3110 } else if (!has_func_global && btf_is_func(t)) {
3111 /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
3112 t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
3113 } else if (!has_float && btf_is_float(t)) {
3114 /* replace FLOAT with an equally-sized empty STRUCT;
3115 * since C compilers do not accept e.g. "float" as a
3116 * valid struct name, make it anonymous
3117 */
3118 t->name_off = 0;
3119 t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
3120 } else if (!has_type_tag && btf_is_type_tag(t)) {
3121 /* replace TYPE_TAG with a CONST */
3122 t->name_off = 0;
3123 t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
3124 } else if (!has_enum64 && btf_is_enum(t)) {
3125 /* clear the kflag */
3126 t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
3127 } else if (!has_enum64 && btf_is_enum64(t)) {
3128 /* replace ENUM64 with a union */
3129 struct btf_member *m;
3130
3131 if (enum64_placeholder_id == 0) {
3132 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
3133 if (enum64_placeholder_id < 0)
3134 return enum64_placeholder_id;
3135
3136 t = (struct btf_type *)btf__type_by_id(btf, i);
3137 }
3138
3139 m = btf_members(t);
3140 vlen = btf_vlen(t);
3141 t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
3142 for (j = 0; j < vlen; j++, m++) {
3143 m->type = enum64_placeholder_id;
3144 m->offset = 0;
3145 }
3146 }
3147 }
3148
3149 return 0;
3150 }
3151
libbpf_needs_btf(const struct bpf_object * obj)3152 static bool libbpf_needs_btf(const struct bpf_object *obj)
3153 {
3154 return obj->efile.btf_maps_shndx >= 0 ||
3155 obj->efile.has_st_ops ||
3156 obj->nr_extern > 0;
3157 }
3158
kernel_needs_btf(const struct bpf_object * obj)3159 static bool kernel_needs_btf(const struct bpf_object *obj)
3160 {
3161 return obj->efile.has_st_ops;
3162 }
3163
bpf_object__init_btf(struct bpf_object * obj,Elf_Data * btf_data,Elf_Data * btf_ext_data)3164 static int bpf_object__init_btf(struct bpf_object *obj,
3165 Elf_Data *btf_data,
3166 Elf_Data *btf_ext_data)
3167 {
3168 int err = -ENOENT;
3169
3170 if (btf_data) {
3171 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
3172 err = libbpf_get_error(obj->btf);
3173 if (err) {
3174 obj->btf = NULL;
3175 pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
3176 goto out;
3177 }
3178 /* enforce 8-byte pointers for BPF-targeted BTFs */
3179 btf__set_pointer_size(obj->btf, 8);
3180 }
3181 if (btf_ext_data) {
3182 struct btf_ext_info *ext_segs[3];
3183 int seg_num, sec_num;
3184
3185 if (!obj->btf) {
3186 pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
3187 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
3188 goto out;
3189 }
3190 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
3191 err = libbpf_get_error(obj->btf_ext);
3192 if (err) {
3193 pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
3194 BTF_EXT_ELF_SEC, err);
3195 obj->btf_ext = NULL;
3196 goto out;
3197 }
3198
3199 /* setup .BTF.ext to ELF section mapping */
3200 ext_segs[0] = &obj->btf_ext->func_info;
3201 ext_segs[1] = &obj->btf_ext->line_info;
3202 ext_segs[2] = &obj->btf_ext->core_relo_info;
3203 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
3204 struct btf_ext_info *seg = ext_segs[seg_num];
3205 const struct btf_ext_info_sec *sec;
3206 const char *sec_name;
3207 Elf_Scn *scn;
3208
3209 if (seg->sec_cnt == 0)
3210 continue;
3211
3212 seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
3213 if (!seg->sec_idxs) {
3214 err = -ENOMEM;
3215 goto out;
3216 }
3217
3218 sec_num = 0;
3219 for_each_btf_ext_sec(seg, sec) {
3220 /* preventively increment index to avoid doing
3221 * this before every continue below
3222 */
3223 sec_num++;
3224
3225 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
3226 if (str_is_empty(sec_name))
3227 continue;
3228 scn = elf_sec_by_name(obj, sec_name);
3229 if (!scn)
3230 continue;
3231
3232 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
3233 }
3234 }
3235 }
3236 out:
3237 if (err && libbpf_needs_btf(obj)) {
3238 pr_warn("BTF is required, but is missing or corrupted.\n");
3239 return err;
3240 }
3241 return 0;
3242 }
3243
compare_vsi_off(const void * _a,const void * _b)3244 static int compare_vsi_off(const void *_a, const void *_b)
3245 {
3246 const struct btf_var_secinfo *a = _a;
3247 const struct btf_var_secinfo *b = _b;
3248
3249 return a->offset - b->offset;
3250 }
3251
btf_fixup_datasec(struct bpf_object * obj,struct btf * btf,struct btf_type * t)3252 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
3253 struct btf_type *t)
3254 {
3255 __u32 size = 0, i, vars = btf_vlen(t);
3256 const char *sec_name = btf__name_by_offset(btf, t->name_off);
3257 struct btf_var_secinfo *vsi;
3258 bool fixup_offsets = false;
3259 int err;
3260
3261 if (!sec_name) {
3262 pr_debug("No name found in string section for DATASEC kind.\n");
3263 return -ENOENT;
3264 }
3265
3266 /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
3267 * variable offsets set at the previous step. Further, not every
3268 * extern BTF VAR has corresponding ELF symbol preserved, so we skip
3269 * all fixups altogether for such sections and go straight to sorting
3270 * VARs within their DATASEC.
3271 */
3272 if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
3273 goto sort_vars;
3274
3275 /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
3276 * fix this up. But BPF static linker already fixes this up and fills
3277 * all the sizes and offsets during static linking. So this step has
3278 * to be optional. But the STV_HIDDEN handling is non-optional for any
3279 * non-extern DATASEC, so the variable fixup loop below handles both
3280 * functions at the same time, paying the cost of BTF VAR <-> ELF
3281 * symbol matching just once.
3282 */
3283 if (t->size == 0) {
3284 err = find_elf_sec_sz(obj, sec_name, &size);
3285 if (err || !size) {
3286 pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3287 sec_name, size, err);
3288 return -ENOENT;
3289 }
3290
3291 t->size = size;
3292 fixup_offsets = true;
3293 }
3294
3295 for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3296 const struct btf_type *t_var;
3297 struct btf_var *var;
3298 const char *var_name;
3299 Elf64_Sym *sym;
3300
3301 t_var = btf__type_by_id(btf, vsi->type);
3302 if (!t_var || !btf_is_var(t_var)) {
3303 pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3304 return -EINVAL;
3305 }
3306
3307 var = btf_var(t_var);
3308 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3309 continue;
3310
3311 var_name = btf__name_by_offset(btf, t_var->name_off);
3312 if (!var_name) {
3313 pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3314 sec_name, i);
3315 return -ENOENT;
3316 }
3317
3318 sym = find_elf_var_sym(obj, var_name);
3319 if (IS_ERR(sym)) {
3320 pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3321 sec_name, var_name);
3322 return -ENOENT;
3323 }
3324
3325 if (fixup_offsets)
3326 vsi->offset = sym->st_value;
3327
3328 /* if variable is a global/weak symbol, but has restricted
3329 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3330 * as static. This follows similar logic for functions (BPF
3331 * subprogs) and influences libbpf's further decisions about
3332 * whether to make global data BPF array maps as
3333 * BPF_F_MMAPABLE.
3334 */
3335 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3336 || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3337 var->linkage = BTF_VAR_STATIC;
3338 }
3339
3340 sort_vars:
3341 qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3342 return 0;
3343 }
3344
bpf_object_fixup_btf(struct bpf_object * obj)3345 static int bpf_object_fixup_btf(struct bpf_object *obj)
3346 {
3347 int i, n, err = 0;
3348
3349 if (!obj->btf)
3350 return 0;
3351
3352 n = btf__type_cnt(obj->btf);
3353 for (i = 1; i < n; i++) {
3354 struct btf_type *t = btf_type_by_id(obj->btf, i);
3355
3356 /* Loader needs to fix up some of the things compiler
3357 * couldn't get its hands on while emitting BTF. This
3358 * is section size and global variable offset. We use
3359 * the info from the ELF itself for this purpose.
3360 */
3361 if (btf_is_datasec(t)) {
3362 err = btf_fixup_datasec(obj, obj->btf, t);
3363 if (err)
3364 return err;
3365 }
3366 }
3367
3368 return 0;
3369 }
3370
prog_needs_vmlinux_btf(struct bpf_program * prog)3371 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3372 {
3373 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3374 prog->type == BPF_PROG_TYPE_LSM)
3375 return true;
3376
3377 /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3378 * also need vmlinux BTF
3379 */
3380 if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3381 return true;
3382
3383 return false;
3384 }
3385
map_needs_vmlinux_btf(struct bpf_map * map)3386 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3387 {
3388 return bpf_map__is_struct_ops(map);
3389 }
3390
obj_needs_vmlinux_btf(const struct bpf_object * obj)3391 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3392 {
3393 struct bpf_program *prog;
3394 struct bpf_map *map;
3395 int i;
3396
3397 /* CO-RE relocations need kernel BTF, only when btf_custom_path
3398 * is not specified
3399 */
3400 if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3401 return true;
3402
3403 /* Support for typed ksyms needs kernel BTF */
3404 for (i = 0; i < obj->nr_extern; i++) {
3405 const struct extern_desc *ext;
3406
3407 ext = &obj->externs[i];
3408 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3409 return true;
3410 }
3411
3412 bpf_object__for_each_program(prog, obj) {
3413 if (!prog->autoload)
3414 continue;
3415 if (prog_needs_vmlinux_btf(prog))
3416 return true;
3417 }
3418
3419 bpf_object__for_each_map(map, obj) {
3420 if (map_needs_vmlinux_btf(map))
3421 return true;
3422 }
3423
3424 return false;
3425 }
3426
bpf_object__load_vmlinux_btf(struct bpf_object * obj,bool force)3427 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3428 {
3429 int err;
3430
3431 /* btf_vmlinux could be loaded earlier */
3432 if (obj->btf_vmlinux || obj->gen_loader)
3433 return 0;
3434
3435 if (!force && !obj_needs_vmlinux_btf(obj))
3436 return 0;
3437
3438 obj->btf_vmlinux = btf__load_vmlinux_btf();
3439 err = libbpf_get_error(obj->btf_vmlinux);
3440 if (err) {
3441 pr_warn("Error loading vmlinux BTF: %d\n", err);
3442 obj->btf_vmlinux = NULL;
3443 return err;
3444 }
3445 return 0;
3446 }
3447
bpf_object__sanitize_and_load_btf(struct bpf_object * obj)3448 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3449 {
3450 struct btf *kern_btf = obj->btf;
3451 bool btf_mandatory, sanitize;
3452 int i, err = 0;
3453
3454 if (!obj->btf)
3455 return 0;
3456
3457 if (!kernel_supports(obj, FEAT_BTF)) {
3458 if (kernel_needs_btf(obj)) {
3459 err = -EOPNOTSUPP;
3460 goto report;
3461 }
3462 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3463 return 0;
3464 }
3465
3466 /* Even though some subprogs are global/weak, user might prefer more
3467 * permissive BPF verification process that BPF verifier performs for
3468 * static functions, taking into account more context from the caller
3469 * functions. In such case, they need to mark such subprogs with
3470 * __attribute__((visibility("hidden"))) and libbpf will adjust
3471 * corresponding FUNC BTF type to be marked as static and trigger more
3472 * involved BPF verification process.
3473 */
3474 for (i = 0; i < obj->nr_programs; i++) {
3475 struct bpf_program *prog = &obj->programs[i];
3476 struct btf_type *t;
3477 const char *name;
3478 int j, n;
3479
3480 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3481 continue;
3482
3483 n = btf__type_cnt(obj->btf);
3484 for (j = 1; j < n; j++) {
3485 t = btf_type_by_id(obj->btf, j);
3486 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3487 continue;
3488
3489 name = btf__str_by_offset(obj->btf, t->name_off);
3490 if (strcmp(name, prog->name) != 0)
3491 continue;
3492
3493 t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3494 break;
3495 }
3496 }
3497
3498 sanitize = btf_needs_sanitization(obj);
3499 if (sanitize) {
3500 const void *raw_data;
3501 __u32 sz;
3502
3503 /* clone BTF to sanitize a copy and leave the original intact */
3504 raw_data = btf__raw_data(obj->btf, &sz);
3505 kern_btf = btf__new(raw_data, sz);
3506 err = libbpf_get_error(kern_btf);
3507 if (err)
3508 return err;
3509
3510 /* enforce 8-byte pointers for BPF-targeted BTFs */
3511 btf__set_pointer_size(obj->btf, 8);
3512 err = bpf_object__sanitize_btf(obj, kern_btf);
3513 if (err)
3514 return err;
3515 }
3516
3517 if (obj->gen_loader) {
3518 __u32 raw_size = 0;
3519 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3520
3521 if (!raw_data)
3522 return -ENOMEM;
3523 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3524 /* Pretend to have valid FD to pass various fd >= 0 checks.
3525 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3526 */
3527 btf__set_fd(kern_btf, 0);
3528 } else {
3529 /* currently BPF_BTF_LOAD only supports log_level 1 */
3530 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3531 obj->log_level ? 1 : 0, obj->token_fd);
3532 }
3533 if (sanitize) {
3534 if (!err) {
3535 /* move fd to libbpf's BTF */
3536 btf__set_fd(obj->btf, btf__fd(kern_btf));
3537 btf__set_fd(kern_btf, -1);
3538 }
3539 btf__free(kern_btf);
3540 }
3541 report:
3542 if (err) {
3543 btf_mandatory = kernel_needs_btf(obj);
3544 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3545 btf_mandatory ? "BTF is mandatory, can't proceed."
3546 : "BTF is optional, ignoring.");
3547 if (!btf_mandatory)
3548 err = 0;
3549 }
3550 return err;
3551 }
3552
elf_sym_str(const struct bpf_object * obj,size_t off)3553 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3554 {
3555 const char *name;
3556
3557 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3558 if (!name) {
3559 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3560 off, obj->path, elf_errmsg(-1));
3561 return NULL;
3562 }
3563
3564 return name;
3565 }
3566
elf_sec_str(const struct bpf_object * obj,size_t off)3567 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3568 {
3569 const char *name;
3570
3571 name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3572 if (!name) {
3573 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3574 off, obj->path, elf_errmsg(-1));
3575 return NULL;
3576 }
3577
3578 return name;
3579 }
3580
elf_sec_by_idx(const struct bpf_object * obj,size_t idx)3581 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3582 {
3583 Elf_Scn *scn;
3584
3585 scn = elf_getscn(obj->efile.elf, idx);
3586 if (!scn) {
3587 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3588 idx, obj->path, elf_errmsg(-1));
3589 return NULL;
3590 }
3591 return scn;
3592 }
3593
elf_sec_by_name(const struct bpf_object * obj,const char * name)3594 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3595 {
3596 Elf_Scn *scn = NULL;
3597 Elf *elf = obj->efile.elf;
3598 const char *sec_name;
3599
3600 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3601 sec_name = elf_sec_name(obj, scn);
3602 if (!sec_name)
3603 return NULL;
3604
3605 if (strcmp(sec_name, name) != 0)
3606 continue;
3607
3608 return scn;
3609 }
3610 return NULL;
3611 }
3612
elf_sec_hdr(const struct bpf_object * obj,Elf_Scn * scn)3613 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3614 {
3615 Elf64_Shdr *shdr;
3616
3617 if (!scn)
3618 return NULL;
3619
3620 shdr = elf64_getshdr(scn);
3621 if (!shdr) {
3622 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3623 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3624 return NULL;
3625 }
3626
3627 return shdr;
3628 }
3629
elf_sec_name(const struct bpf_object * obj,Elf_Scn * scn)3630 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3631 {
3632 const char *name;
3633 Elf64_Shdr *sh;
3634
3635 if (!scn)
3636 return NULL;
3637
3638 sh = elf_sec_hdr(obj, scn);
3639 if (!sh)
3640 return NULL;
3641
3642 name = elf_sec_str(obj, sh->sh_name);
3643 if (!name) {
3644 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3645 elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3646 return NULL;
3647 }
3648
3649 return name;
3650 }
3651
elf_sec_data(const struct bpf_object * obj,Elf_Scn * scn)3652 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3653 {
3654 Elf_Data *data;
3655
3656 if (!scn)
3657 return NULL;
3658
3659 data = elf_getdata(scn, 0);
3660 if (!data) {
3661 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3662 elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3663 obj->path, elf_errmsg(-1));
3664 return NULL;
3665 }
3666
3667 return data;
3668 }
3669
elf_sym_by_idx(const struct bpf_object * obj,size_t idx)3670 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3671 {
3672 if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3673 return NULL;
3674
3675 return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3676 }
3677
elf_rel_by_idx(Elf_Data * data,size_t idx)3678 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3679 {
3680 if (idx >= data->d_size / sizeof(Elf64_Rel))
3681 return NULL;
3682
3683 return (Elf64_Rel *)data->d_buf + idx;
3684 }
3685
is_sec_name_dwarf(const char * name)3686 static bool is_sec_name_dwarf(const char *name)
3687 {
3688 /* approximation, but the actual list is too long */
3689 return str_has_pfx(name, ".debug_");
3690 }
3691
ignore_elf_section(Elf64_Shdr * hdr,const char * name)3692 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3693 {
3694 /* no special handling of .strtab */
3695 if (hdr->sh_type == SHT_STRTAB)
3696 return true;
3697
3698 /* ignore .llvm_addrsig section as well */
3699 if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3700 return true;
3701
3702 /* no subprograms will lead to an empty .text section, ignore it */
3703 if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3704 strcmp(name, ".text") == 0)
3705 return true;
3706
3707 /* DWARF sections */
3708 if (is_sec_name_dwarf(name))
3709 return true;
3710
3711 if (str_has_pfx(name, ".rel")) {
3712 name += sizeof(".rel") - 1;
3713 /* DWARF section relocations */
3714 if (is_sec_name_dwarf(name))
3715 return true;
3716
3717 /* .BTF and .BTF.ext don't need relocations */
3718 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3719 strcmp(name, BTF_EXT_ELF_SEC) == 0)
3720 return true;
3721 }
3722
3723 return false;
3724 }
3725
cmp_progs(const void * _a,const void * _b)3726 static int cmp_progs(const void *_a, const void *_b)
3727 {
3728 const struct bpf_program *a = _a;
3729 const struct bpf_program *b = _b;
3730
3731 if (a->sec_idx != b->sec_idx)
3732 return a->sec_idx < b->sec_idx ? -1 : 1;
3733
3734 /* sec_insn_off can't be the same within the section */
3735 return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3736 }
3737
bpf_object__elf_collect(struct bpf_object * obj)3738 static int bpf_object__elf_collect(struct bpf_object *obj)
3739 {
3740 struct elf_sec_desc *sec_desc;
3741 Elf *elf = obj->efile.elf;
3742 Elf_Data *btf_ext_data = NULL;
3743 Elf_Data *btf_data = NULL;
3744 int idx = 0, err = 0;
3745 const char *name;
3746 Elf_Data *data;
3747 Elf_Scn *scn;
3748 Elf64_Shdr *sh;
3749
3750 /* ELF section indices are 0-based, but sec #0 is special "invalid"
3751 * section. Since section count retrieved by elf_getshdrnum() does
3752 * include sec #0, it is already the necessary size of an array to keep
3753 * all the sections.
3754 */
3755 if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3756 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3757 obj->path, elf_errmsg(-1));
3758 return -LIBBPF_ERRNO__FORMAT;
3759 }
3760 obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3761 if (!obj->efile.secs)
3762 return -ENOMEM;
3763
3764 /* a bunch of ELF parsing functionality depends on processing symbols,
3765 * so do the first pass and find the symbol table
3766 */
3767 scn = NULL;
3768 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3769 sh = elf_sec_hdr(obj, scn);
3770 if (!sh)
3771 return -LIBBPF_ERRNO__FORMAT;
3772
3773 if (sh->sh_type == SHT_SYMTAB) {
3774 if (obj->efile.symbols) {
3775 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3776 return -LIBBPF_ERRNO__FORMAT;
3777 }
3778
3779 data = elf_sec_data(obj, scn);
3780 if (!data)
3781 return -LIBBPF_ERRNO__FORMAT;
3782
3783 idx = elf_ndxscn(scn);
3784
3785 obj->efile.symbols = data;
3786 obj->efile.symbols_shndx = idx;
3787 obj->efile.strtabidx = sh->sh_link;
3788 }
3789 }
3790
3791 if (!obj->efile.symbols) {
3792 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3793 obj->path);
3794 return -ENOENT;
3795 }
3796
3797 scn = NULL;
3798 while ((scn = elf_nextscn(elf, scn)) != NULL) {
3799 idx = elf_ndxscn(scn);
3800 sec_desc = &obj->efile.secs[idx];
3801
3802 sh = elf_sec_hdr(obj, scn);
3803 if (!sh)
3804 return -LIBBPF_ERRNO__FORMAT;
3805
3806 name = elf_sec_str(obj, sh->sh_name);
3807 if (!name)
3808 return -LIBBPF_ERRNO__FORMAT;
3809
3810 if (ignore_elf_section(sh, name))
3811 continue;
3812
3813 data = elf_sec_data(obj, scn);
3814 if (!data)
3815 return -LIBBPF_ERRNO__FORMAT;
3816
3817 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3818 idx, name, (unsigned long)data->d_size,
3819 (int)sh->sh_link, (unsigned long)sh->sh_flags,
3820 (int)sh->sh_type);
3821
3822 if (strcmp(name, "license") == 0) {
3823 err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3824 if (err)
3825 return err;
3826 } else if (strcmp(name, "version") == 0) {
3827 err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3828 if (err)
3829 return err;
3830 } else if (strcmp(name, "maps") == 0) {
3831 pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3832 return -ENOTSUP;
3833 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3834 obj->efile.btf_maps_shndx = idx;
3835 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3836 if (sh->sh_type != SHT_PROGBITS)
3837 return -LIBBPF_ERRNO__FORMAT;
3838 btf_data = data;
3839 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3840 if (sh->sh_type != SHT_PROGBITS)
3841 return -LIBBPF_ERRNO__FORMAT;
3842 btf_ext_data = data;
3843 } else if (sh->sh_type == SHT_SYMTAB) {
3844 /* already processed during the first pass above */
3845 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3846 if (sh->sh_flags & SHF_EXECINSTR) {
3847 if (strcmp(name, ".text") == 0)
3848 obj->efile.text_shndx = idx;
3849 err = bpf_object__add_programs(obj, data, name, idx);
3850 if (err)
3851 return err;
3852 } else if (strcmp(name, DATA_SEC) == 0 ||
3853 str_has_pfx(name, DATA_SEC ".")) {
3854 sec_desc->sec_type = SEC_DATA;
3855 sec_desc->shdr = sh;
3856 sec_desc->data = data;
3857 } else if (strcmp(name, RODATA_SEC) == 0 ||
3858 str_has_pfx(name, RODATA_SEC ".")) {
3859 sec_desc->sec_type = SEC_RODATA;
3860 sec_desc->shdr = sh;
3861 sec_desc->data = data;
3862 } else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
3863 strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
3864 strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
3865 strcmp(name, "?" STRUCT_OPS_LINK_SEC) == 0) {
3866 sec_desc->sec_type = SEC_ST_OPS;
3867 sec_desc->shdr = sh;
3868 sec_desc->data = data;
3869 obj->efile.has_st_ops = true;
3870 } else if (strcmp(name, ARENA_SEC) == 0) {
3871 obj->efile.arena_data = data;
3872 obj->efile.arena_data_shndx = idx;
3873 } else {
3874 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3875 idx, name);
3876 }
3877 } else if (sh->sh_type == SHT_REL) {
3878 int targ_sec_idx = sh->sh_info; /* points to other section */
3879
3880 if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3881 targ_sec_idx >= obj->efile.sec_cnt)
3882 return -LIBBPF_ERRNO__FORMAT;
3883
3884 /* Only do relo for section with exec instructions */
3885 if (!section_have_execinstr(obj, targ_sec_idx) &&
3886 strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3887 strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3888 strcmp(name, ".rel?" STRUCT_OPS_SEC) &&
3889 strcmp(name, ".rel?" STRUCT_OPS_LINK_SEC) &&
3890 strcmp(name, ".rel" MAPS_ELF_SEC)) {
3891 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3892 idx, name, targ_sec_idx,
3893 elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3894 continue;
3895 }
3896
3897 sec_desc->sec_type = SEC_RELO;
3898 sec_desc->shdr = sh;
3899 sec_desc->data = data;
3900 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3901 str_has_pfx(name, BSS_SEC "."))) {
3902 sec_desc->sec_type = SEC_BSS;
3903 sec_desc->shdr = sh;
3904 sec_desc->data = data;
3905 } else {
3906 pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3907 (size_t)sh->sh_size);
3908 }
3909 }
3910
3911 if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3912 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3913 return -LIBBPF_ERRNO__FORMAT;
3914 }
3915
3916 /* sort BPF programs by section name and in-section instruction offset
3917 * for faster search
3918 */
3919 if (obj->nr_programs)
3920 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3921
3922 return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3923 }
3924
sym_is_extern(const Elf64_Sym * sym)3925 static bool sym_is_extern(const Elf64_Sym *sym)
3926 {
3927 int bind = ELF64_ST_BIND(sym->st_info);
3928 /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3929 return sym->st_shndx == SHN_UNDEF &&
3930 (bind == STB_GLOBAL || bind == STB_WEAK) &&
3931 ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3932 }
3933
sym_is_subprog(const Elf64_Sym * sym,int text_shndx)3934 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3935 {
3936 int bind = ELF64_ST_BIND(sym->st_info);
3937 int type = ELF64_ST_TYPE(sym->st_info);
3938
3939 /* in .text section */
3940 if (sym->st_shndx != text_shndx)
3941 return false;
3942
3943 /* local function */
3944 if (bind == STB_LOCAL && type == STT_SECTION)
3945 return true;
3946
3947 /* global function */
3948 return bind == STB_GLOBAL && type == STT_FUNC;
3949 }
3950
find_extern_btf_id(const struct btf * btf,const char * ext_name)3951 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3952 {
3953 const struct btf_type *t;
3954 const char *tname;
3955 int i, n;
3956
3957 if (!btf)
3958 return -ESRCH;
3959
3960 n = btf__type_cnt(btf);
3961 for (i = 1; i < n; i++) {
3962 t = btf__type_by_id(btf, i);
3963
3964 if (!btf_is_var(t) && !btf_is_func(t))
3965 continue;
3966
3967 tname = btf__name_by_offset(btf, t->name_off);
3968 if (strcmp(tname, ext_name))
3969 continue;
3970
3971 if (btf_is_var(t) &&
3972 btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3973 return -EINVAL;
3974
3975 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3976 return -EINVAL;
3977
3978 return i;
3979 }
3980
3981 return -ENOENT;
3982 }
3983
find_extern_sec_btf_id(struct btf * btf,int ext_btf_id)3984 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3985 const struct btf_var_secinfo *vs;
3986 const struct btf_type *t;
3987 int i, j, n;
3988
3989 if (!btf)
3990 return -ESRCH;
3991
3992 n = btf__type_cnt(btf);
3993 for (i = 1; i < n; i++) {
3994 t = btf__type_by_id(btf, i);
3995
3996 if (!btf_is_datasec(t))
3997 continue;
3998
3999 vs = btf_var_secinfos(t);
4000 for (j = 0; j < btf_vlen(t); j++, vs++) {
4001 if (vs->type == ext_btf_id)
4002 return i;
4003 }
4004 }
4005
4006 return -ENOENT;
4007 }
4008
find_kcfg_type(const struct btf * btf,int id,bool * is_signed)4009 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
4010 bool *is_signed)
4011 {
4012 const struct btf_type *t;
4013 const char *name;
4014
4015 t = skip_mods_and_typedefs(btf, id, NULL);
4016 name = btf__name_by_offset(btf, t->name_off);
4017
4018 if (is_signed)
4019 *is_signed = false;
4020 switch (btf_kind(t)) {
4021 case BTF_KIND_INT: {
4022 int enc = btf_int_encoding(t);
4023
4024 if (enc & BTF_INT_BOOL)
4025 return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
4026 if (is_signed)
4027 *is_signed = enc & BTF_INT_SIGNED;
4028 if (t->size == 1)
4029 return KCFG_CHAR;
4030 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
4031 return KCFG_UNKNOWN;
4032 return KCFG_INT;
4033 }
4034 case BTF_KIND_ENUM:
4035 if (t->size != 4)
4036 return KCFG_UNKNOWN;
4037 if (strcmp(name, "libbpf_tristate"))
4038 return KCFG_UNKNOWN;
4039 return KCFG_TRISTATE;
4040 case BTF_KIND_ENUM64:
4041 if (strcmp(name, "libbpf_tristate"))
4042 return KCFG_UNKNOWN;
4043 return KCFG_TRISTATE;
4044 case BTF_KIND_ARRAY:
4045 if (btf_array(t)->nelems == 0)
4046 return KCFG_UNKNOWN;
4047 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
4048 return KCFG_UNKNOWN;
4049 return KCFG_CHAR_ARR;
4050 default:
4051 return KCFG_UNKNOWN;
4052 }
4053 }
4054
cmp_externs(const void * _a,const void * _b)4055 static int cmp_externs(const void *_a, const void *_b)
4056 {
4057 const struct extern_desc *a = _a;
4058 const struct extern_desc *b = _b;
4059
4060 if (a->type != b->type)
4061 return a->type < b->type ? -1 : 1;
4062
4063 if (a->type == EXT_KCFG) {
4064 /* descending order by alignment requirements */
4065 if (a->kcfg.align != b->kcfg.align)
4066 return a->kcfg.align > b->kcfg.align ? -1 : 1;
4067 /* ascending order by size, within same alignment class */
4068 if (a->kcfg.sz != b->kcfg.sz)
4069 return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
4070 }
4071
4072 /* resolve ties by name */
4073 return strcmp(a->name, b->name);
4074 }
4075
find_int_btf_id(const struct btf * btf)4076 static int find_int_btf_id(const struct btf *btf)
4077 {
4078 const struct btf_type *t;
4079 int i, n;
4080
4081 n = btf__type_cnt(btf);
4082 for (i = 1; i < n; i++) {
4083 t = btf__type_by_id(btf, i);
4084
4085 if (btf_is_int(t) && btf_int_bits(t) == 32)
4086 return i;
4087 }
4088
4089 return 0;
4090 }
4091
add_dummy_ksym_var(struct btf * btf)4092 static int add_dummy_ksym_var(struct btf *btf)
4093 {
4094 int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
4095 const struct btf_var_secinfo *vs;
4096 const struct btf_type *sec;
4097
4098 if (!btf)
4099 return 0;
4100
4101 sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
4102 BTF_KIND_DATASEC);
4103 if (sec_btf_id < 0)
4104 return 0;
4105
4106 sec = btf__type_by_id(btf, sec_btf_id);
4107 vs = btf_var_secinfos(sec);
4108 for (i = 0; i < btf_vlen(sec); i++, vs++) {
4109 const struct btf_type *vt;
4110
4111 vt = btf__type_by_id(btf, vs->type);
4112 if (btf_is_func(vt))
4113 break;
4114 }
4115
4116 /* No func in ksyms sec. No need to add dummy var. */
4117 if (i == btf_vlen(sec))
4118 return 0;
4119
4120 int_btf_id = find_int_btf_id(btf);
4121 dummy_var_btf_id = btf__add_var(btf,
4122 "dummy_ksym",
4123 BTF_VAR_GLOBAL_ALLOCATED,
4124 int_btf_id);
4125 if (dummy_var_btf_id < 0)
4126 pr_warn("cannot create a dummy_ksym var\n");
4127
4128 return dummy_var_btf_id;
4129 }
4130
bpf_object__collect_externs(struct bpf_object * obj)4131 static int bpf_object__collect_externs(struct bpf_object *obj)
4132 {
4133 struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
4134 const struct btf_type *t;
4135 struct extern_desc *ext;
4136 int i, n, off, dummy_var_btf_id;
4137 const char *ext_name, *sec_name;
4138 size_t ext_essent_len;
4139 Elf_Scn *scn;
4140 Elf64_Shdr *sh;
4141
4142 if (!obj->efile.symbols)
4143 return 0;
4144
4145 scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
4146 sh = elf_sec_hdr(obj, scn);
4147 if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
4148 return -LIBBPF_ERRNO__FORMAT;
4149
4150 dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
4151 if (dummy_var_btf_id < 0)
4152 return dummy_var_btf_id;
4153
4154 n = sh->sh_size / sh->sh_entsize;
4155 pr_debug("looking for externs among %d symbols...\n", n);
4156
4157 for (i = 0; i < n; i++) {
4158 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
4159
4160 if (!sym)
4161 return -LIBBPF_ERRNO__FORMAT;
4162 if (!sym_is_extern(sym))
4163 continue;
4164 ext_name = elf_sym_str(obj, sym->st_name);
4165 if (!ext_name || !ext_name[0])
4166 continue;
4167
4168 ext = obj->externs;
4169 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
4170 if (!ext)
4171 return -ENOMEM;
4172 obj->externs = ext;
4173 ext = &ext[obj->nr_extern];
4174 memset(ext, 0, sizeof(*ext));
4175 obj->nr_extern++;
4176
4177 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
4178 if (ext->btf_id <= 0) {
4179 pr_warn("failed to find BTF for extern '%s': %d\n",
4180 ext_name, ext->btf_id);
4181 return ext->btf_id;
4182 }
4183 t = btf__type_by_id(obj->btf, ext->btf_id);
4184 ext->name = btf__name_by_offset(obj->btf, t->name_off);
4185 ext->sym_idx = i;
4186 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
4187
4188 ext_essent_len = bpf_core_essential_name_len(ext->name);
4189 ext->essent_name = NULL;
4190 if (ext_essent_len != strlen(ext->name)) {
4191 ext->essent_name = strndup(ext->name, ext_essent_len);
4192 if (!ext->essent_name)
4193 return -ENOMEM;
4194 }
4195
4196 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
4197 if (ext->sec_btf_id <= 0) {
4198 pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
4199 ext_name, ext->btf_id, ext->sec_btf_id);
4200 return ext->sec_btf_id;
4201 }
4202 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
4203 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
4204
4205 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
4206 if (btf_is_func(t)) {
4207 pr_warn("extern function %s is unsupported under %s section\n",
4208 ext->name, KCONFIG_SEC);
4209 return -ENOTSUP;
4210 }
4211 kcfg_sec = sec;
4212 ext->type = EXT_KCFG;
4213 ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
4214 if (ext->kcfg.sz <= 0) {
4215 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
4216 ext_name, ext->kcfg.sz);
4217 return ext->kcfg.sz;
4218 }
4219 ext->kcfg.align = btf__align_of(obj->btf, t->type);
4220 if (ext->kcfg.align <= 0) {
4221 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
4222 ext_name, ext->kcfg.align);
4223 return -EINVAL;
4224 }
4225 ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
4226 &ext->kcfg.is_signed);
4227 if (ext->kcfg.type == KCFG_UNKNOWN) {
4228 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
4229 return -ENOTSUP;
4230 }
4231 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
4232 ksym_sec = sec;
4233 ext->type = EXT_KSYM;
4234 skip_mods_and_typedefs(obj->btf, t->type,
4235 &ext->ksym.type_id);
4236 } else {
4237 pr_warn("unrecognized extern section '%s'\n", sec_name);
4238 return -ENOTSUP;
4239 }
4240 }
4241 pr_debug("collected %d externs total\n", obj->nr_extern);
4242
4243 if (!obj->nr_extern)
4244 return 0;
4245
4246 /* sort externs by type, for kcfg ones also by (align, size, name) */
4247 qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
4248
4249 /* for .ksyms section, we need to turn all externs into allocated
4250 * variables in BTF to pass kernel verification; we do this by
4251 * pretending that each extern is a 8-byte variable
4252 */
4253 if (ksym_sec) {
4254 /* find existing 4-byte integer type in BTF to use for fake
4255 * extern variables in DATASEC
4256 */
4257 int int_btf_id = find_int_btf_id(obj->btf);
4258 /* For extern function, a dummy_var added earlier
4259 * will be used to replace the vs->type and
4260 * its name string will be used to refill
4261 * the missing param's name.
4262 */
4263 const struct btf_type *dummy_var;
4264
4265 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
4266 for (i = 0; i < obj->nr_extern; i++) {
4267 ext = &obj->externs[i];
4268 if (ext->type != EXT_KSYM)
4269 continue;
4270 pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
4271 i, ext->sym_idx, ext->name);
4272 }
4273
4274 sec = ksym_sec;
4275 n = btf_vlen(sec);
4276 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
4277 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4278 struct btf_type *vt;
4279
4280 vt = (void *)btf__type_by_id(obj->btf, vs->type);
4281 ext_name = btf__name_by_offset(obj->btf, vt->name_off);
4282 ext = find_extern_by_name(obj, ext_name);
4283 if (!ext) {
4284 pr_warn("failed to find extern definition for BTF %s '%s'\n",
4285 btf_kind_str(vt), ext_name);
4286 return -ESRCH;
4287 }
4288 if (btf_is_func(vt)) {
4289 const struct btf_type *func_proto;
4290 struct btf_param *param;
4291 int j;
4292
4293 func_proto = btf__type_by_id(obj->btf,
4294 vt->type);
4295 param = btf_params(func_proto);
4296 /* Reuse the dummy_var string if the
4297 * func proto does not have param name.
4298 */
4299 for (j = 0; j < btf_vlen(func_proto); j++)
4300 if (param[j].type && !param[j].name_off)
4301 param[j].name_off =
4302 dummy_var->name_off;
4303 vs->type = dummy_var_btf_id;
4304 vt->info &= ~0xffff;
4305 vt->info |= BTF_FUNC_GLOBAL;
4306 } else {
4307 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4308 vt->type = int_btf_id;
4309 }
4310 vs->offset = off;
4311 vs->size = sizeof(int);
4312 }
4313 sec->size = off;
4314 }
4315
4316 if (kcfg_sec) {
4317 sec = kcfg_sec;
4318 /* for kcfg externs calculate their offsets within a .kconfig map */
4319 off = 0;
4320 for (i = 0; i < obj->nr_extern; i++) {
4321 ext = &obj->externs[i];
4322 if (ext->type != EXT_KCFG)
4323 continue;
4324
4325 ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4326 off = ext->kcfg.data_off + ext->kcfg.sz;
4327 pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4328 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4329 }
4330 sec->size = off;
4331 n = btf_vlen(sec);
4332 for (i = 0; i < n; i++) {
4333 struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4334
4335 t = btf__type_by_id(obj->btf, vs->type);
4336 ext_name = btf__name_by_offset(obj->btf, t->name_off);
4337 ext = find_extern_by_name(obj, ext_name);
4338 if (!ext) {
4339 pr_warn("failed to find extern definition for BTF var '%s'\n",
4340 ext_name);
4341 return -ESRCH;
4342 }
4343 btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4344 vs->offset = ext->kcfg.data_off;
4345 }
4346 }
4347 return 0;
4348 }
4349
prog_is_subprog(const struct bpf_object * obj,const struct bpf_program * prog)4350 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4351 {
4352 return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
4353 }
4354
4355 struct bpf_program *
bpf_object__find_program_by_name(const struct bpf_object * obj,const char * name)4356 bpf_object__find_program_by_name(const struct bpf_object *obj,
4357 const char *name)
4358 {
4359 struct bpf_program *prog;
4360
4361 bpf_object__for_each_program(prog, obj) {
4362 if (prog_is_subprog(obj, prog))
4363 continue;
4364 if (!strcmp(prog->name, name))
4365 return prog;
4366 }
4367 return errno = ENOENT, NULL;
4368 }
4369
bpf_object__shndx_is_data(const struct bpf_object * obj,int shndx)4370 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4371 int shndx)
4372 {
4373 switch (obj->efile.secs[shndx].sec_type) {
4374 case SEC_BSS:
4375 case SEC_DATA:
4376 case SEC_RODATA:
4377 return true;
4378 default:
4379 return false;
4380 }
4381 }
4382
bpf_object__shndx_is_maps(const struct bpf_object * obj,int shndx)4383 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4384 int shndx)
4385 {
4386 return shndx == obj->efile.btf_maps_shndx;
4387 }
4388
4389 static enum libbpf_map_type
bpf_object__section_to_libbpf_map_type(const struct bpf_object * obj,int shndx)4390 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4391 {
4392 if (shndx == obj->efile.symbols_shndx)
4393 return LIBBPF_MAP_KCONFIG;
4394
4395 switch (obj->efile.secs[shndx].sec_type) {
4396 case SEC_BSS:
4397 return LIBBPF_MAP_BSS;
4398 case SEC_DATA:
4399 return LIBBPF_MAP_DATA;
4400 case SEC_RODATA:
4401 return LIBBPF_MAP_RODATA;
4402 default:
4403 return LIBBPF_MAP_UNSPEC;
4404 }
4405 }
4406
bpf_program__record_reloc(struct bpf_program * prog,struct reloc_desc * reloc_desc,__u32 insn_idx,const char * sym_name,const Elf64_Sym * sym,const Elf64_Rel * rel)4407 static int bpf_program__record_reloc(struct bpf_program *prog,
4408 struct reloc_desc *reloc_desc,
4409 __u32 insn_idx, const char *sym_name,
4410 const Elf64_Sym *sym, const Elf64_Rel *rel)
4411 {
4412 struct bpf_insn *insn = &prog->insns[insn_idx];
4413 size_t map_idx, nr_maps = prog->obj->nr_maps;
4414 struct bpf_object *obj = prog->obj;
4415 __u32 shdr_idx = sym->st_shndx;
4416 enum libbpf_map_type type;
4417 const char *sym_sec_name;
4418 struct bpf_map *map;
4419
4420 if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4421 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4422 prog->name, sym_name, insn_idx, insn->code);
4423 return -LIBBPF_ERRNO__RELOC;
4424 }
4425
4426 if (sym_is_extern(sym)) {
4427 int sym_idx = ELF64_R_SYM(rel->r_info);
4428 int i, n = obj->nr_extern;
4429 struct extern_desc *ext;
4430
4431 for (i = 0; i < n; i++) {
4432 ext = &obj->externs[i];
4433 if (ext->sym_idx == sym_idx)
4434 break;
4435 }
4436 if (i >= n) {
4437 pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4438 prog->name, sym_name, sym_idx);
4439 return -LIBBPF_ERRNO__RELOC;
4440 }
4441 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4442 prog->name, i, ext->name, ext->sym_idx, insn_idx);
4443 if (insn->code == (BPF_JMP | BPF_CALL))
4444 reloc_desc->type = RELO_EXTERN_CALL;
4445 else
4446 reloc_desc->type = RELO_EXTERN_LD64;
4447 reloc_desc->insn_idx = insn_idx;
4448 reloc_desc->ext_idx = i;
4449 return 0;
4450 }
4451
4452 /* sub-program call relocation */
4453 if (is_call_insn(insn)) {
4454 if (insn->src_reg != BPF_PSEUDO_CALL) {
4455 pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4456 return -LIBBPF_ERRNO__RELOC;
4457 }
4458 /* text_shndx can be 0, if no default "main" program exists */
4459 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4460 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4461 pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4462 prog->name, sym_name, sym_sec_name);
4463 return -LIBBPF_ERRNO__RELOC;
4464 }
4465 if (sym->st_value % BPF_INSN_SZ) {
4466 pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4467 prog->name, sym_name, (size_t)sym->st_value);
4468 return -LIBBPF_ERRNO__RELOC;
4469 }
4470 reloc_desc->type = RELO_CALL;
4471 reloc_desc->insn_idx = insn_idx;
4472 reloc_desc->sym_off = sym->st_value;
4473 return 0;
4474 }
4475
4476 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4477 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4478 prog->name, sym_name, shdr_idx);
4479 return -LIBBPF_ERRNO__RELOC;
4480 }
4481
4482 /* loading subprog addresses */
4483 if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4484 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4485 * local_func: sym->st_value = 0, insn->imm = offset in the section.
4486 */
4487 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4488 pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4489 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4490 return -LIBBPF_ERRNO__RELOC;
4491 }
4492
4493 reloc_desc->type = RELO_SUBPROG_ADDR;
4494 reloc_desc->insn_idx = insn_idx;
4495 reloc_desc->sym_off = sym->st_value;
4496 return 0;
4497 }
4498
4499 type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4500 sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4501
4502 /* arena data relocation */
4503 if (shdr_idx == obj->efile.arena_data_shndx) {
4504 reloc_desc->type = RELO_DATA;
4505 reloc_desc->insn_idx = insn_idx;
4506 reloc_desc->map_idx = obj->arena_map - obj->maps;
4507 reloc_desc->sym_off = sym->st_value;
4508 return 0;
4509 }
4510
4511 /* generic map reference relocation */
4512 if (type == LIBBPF_MAP_UNSPEC) {
4513 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4514 pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4515 prog->name, sym_name, sym_sec_name);
4516 return -LIBBPF_ERRNO__RELOC;
4517 }
4518 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4519 map = &obj->maps[map_idx];
4520 if (map->libbpf_type != type ||
4521 map->sec_idx != sym->st_shndx ||
4522 map->sec_offset != sym->st_value)
4523 continue;
4524 pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4525 prog->name, map_idx, map->name, map->sec_idx,
4526 map->sec_offset, insn_idx);
4527 break;
4528 }
4529 if (map_idx >= nr_maps) {
4530 pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4531 prog->name, sym_sec_name, (size_t)sym->st_value);
4532 return -LIBBPF_ERRNO__RELOC;
4533 }
4534 reloc_desc->type = RELO_LD64;
4535 reloc_desc->insn_idx = insn_idx;
4536 reloc_desc->map_idx = map_idx;
4537 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4538 return 0;
4539 }
4540
4541 /* global data map relocation */
4542 if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4543 pr_warn("prog '%s': bad data relo against section '%s'\n",
4544 prog->name, sym_sec_name);
4545 return -LIBBPF_ERRNO__RELOC;
4546 }
4547 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4548 map = &obj->maps[map_idx];
4549 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4550 continue;
4551 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4552 prog->name, map_idx, map->name, map->sec_idx,
4553 map->sec_offset, insn_idx);
4554 break;
4555 }
4556 if (map_idx >= nr_maps) {
4557 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4558 prog->name, sym_sec_name);
4559 return -LIBBPF_ERRNO__RELOC;
4560 }
4561
4562 reloc_desc->type = RELO_DATA;
4563 reloc_desc->insn_idx = insn_idx;
4564 reloc_desc->map_idx = map_idx;
4565 reloc_desc->sym_off = sym->st_value;
4566 return 0;
4567 }
4568
prog_contains_insn(const struct bpf_program * prog,size_t insn_idx)4569 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4570 {
4571 return insn_idx >= prog->sec_insn_off &&
4572 insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4573 }
4574
find_prog_by_sec_insn(const struct bpf_object * obj,size_t sec_idx,size_t insn_idx)4575 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4576 size_t sec_idx, size_t insn_idx)
4577 {
4578 int l = 0, r = obj->nr_programs - 1, m;
4579 struct bpf_program *prog;
4580
4581 if (!obj->nr_programs)
4582 return NULL;
4583
4584 while (l < r) {
4585 m = l + (r - l + 1) / 2;
4586 prog = &obj->programs[m];
4587
4588 if (prog->sec_idx < sec_idx ||
4589 (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4590 l = m;
4591 else
4592 r = m - 1;
4593 }
4594 /* matching program could be at index l, but it still might be the
4595 * wrong one, so we need to double check conditions for the last time
4596 */
4597 prog = &obj->programs[l];
4598 if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4599 return prog;
4600 return NULL;
4601 }
4602
4603 static int
bpf_object__collect_prog_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)4604 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4605 {
4606 const char *relo_sec_name, *sec_name;
4607 size_t sec_idx = shdr->sh_info, sym_idx;
4608 struct bpf_program *prog;
4609 struct reloc_desc *relos;
4610 int err, i, nrels;
4611 const char *sym_name;
4612 __u32 insn_idx;
4613 Elf_Scn *scn;
4614 Elf_Data *scn_data;
4615 Elf64_Sym *sym;
4616 Elf64_Rel *rel;
4617
4618 if (sec_idx >= obj->efile.sec_cnt)
4619 return -EINVAL;
4620
4621 scn = elf_sec_by_idx(obj, sec_idx);
4622 scn_data = elf_sec_data(obj, scn);
4623 if (!scn_data)
4624 return -LIBBPF_ERRNO__FORMAT;
4625
4626 relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4627 sec_name = elf_sec_name(obj, scn);
4628 if (!relo_sec_name || !sec_name)
4629 return -EINVAL;
4630
4631 pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4632 relo_sec_name, sec_idx, sec_name);
4633 nrels = shdr->sh_size / shdr->sh_entsize;
4634
4635 for (i = 0; i < nrels; i++) {
4636 rel = elf_rel_by_idx(data, i);
4637 if (!rel) {
4638 pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4639 return -LIBBPF_ERRNO__FORMAT;
4640 }
4641
4642 sym_idx = ELF64_R_SYM(rel->r_info);
4643 sym = elf_sym_by_idx(obj, sym_idx);
4644 if (!sym) {
4645 pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4646 relo_sec_name, sym_idx, i);
4647 return -LIBBPF_ERRNO__FORMAT;
4648 }
4649
4650 if (sym->st_shndx >= obj->efile.sec_cnt) {
4651 pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4652 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4653 return -LIBBPF_ERRNO__FORMAT;
4654 }
4655
4656 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4657 pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4658 relo_sec_name, (size_t)rel->r_offset, i);
4659 return -LIBBPF_ERRNO__FORMAT;
4660 }
4661
4662 insn_idx = rel->r_offset / BPF_INSN_SZ;
4663 /* relocations against static functions are recorded as
4664 * relocations against the section that contains a function;
4665 * in such case, symbol will be STT_SECTION and sym.st_name
4666 * will point to empty string (0), so fetch section name
4667 * instead
4668 */
4669 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4670 sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4671 else
4672 sym_name = elf_sym_str(obj, sym->st_name);
4673 sym_name = sym_name ?: "<?";
4674
4675 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4676 relo_sec_name, i, insn_idx, sym_name);
4677
4678 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4679 if (!prog) {
4680 pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4681 relo_sec_name, i, sec_name, insn_idx);
4682 continue;
4683 }
4684
4685 relos = libbpf_reallocarray(prog->reloc_desc,
4686 prog->nr_reloc + 1, sizeof(*relos));
4687 if (!relos)
4688 return -ENOMEM;
4689 prog->reloc_desc = relos;
4690
4691 /* adjust insn_idx to local BPF program frame of reference */
4692 insn_idx -= prog->sec_insn_off;
4693 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4694 insn_idx, sym_name, sym, rel);
4695 if (err)
4696 return err;
4697
4698 prog->nr_reloc++;
4699 }
4700 return 0;
4701 }
4702
map_fill_btf_type_info(struct bpf_object * obj,struct bpf_map * map)4703 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4704 {
4705 int id;
4706
4707 if (!obj->btf)
4708 return -ENOENT;
4709
4710 /* if it's BTF-defined map, we don't need to search for type IDs.
4711 * For struct_ops map, it does not need btf_key_type_id and
4712 * btf_value_type_id.
4713 */
4714 if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4715 return 0;
4716
4717 /*
4718 * LLVM annotates global data differently in BTF, that is,
4719 * only as '.data', '.bss' or '.rodata'.
4720 */
4721 if (!bpf_map__is_internal(map))
4722 return -ENOENT;
4723
4724 id = btf__find_by_name(obj->btf, map->real_name);
4725 if (id < 0)
4726 return id;
4727
4728 map->btf_key_type_id = 0;
4729 map->btf_value_type_id = id;
4730 return 0;
4731 }
4732
bpf_get_map_info_from_fdinfo(int fd,struct bpf_map_info * info)4733 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4734 {
4735 char file[PATH_MAX], buff[4096];
4736 FILE *fp;
4737 __u32 val;
4738 int err;
4739
4740 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4741 memset(info, 0, sizeof(*info));
4742
4743 fp = fopen(file, "re");
4744 if (!fp) {
4745 err = -errno;
4746 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4747 err);
4748 return err;
4749 }
4750
4751 while (fgets(buff, sizeof(buff), fp)) {
4752 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4753 info->type = val;
4754 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4755 info->key_size = val;
4756 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4757 info->value_size = val;
4758 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4759 info->max_entries = val;
4760 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4761 info->map_flags = val;
4762 }
4763
4764 fclose(fp);
4765
4766 return 0;
4767 }
4768
bpf_map__autocreate(const struct bpf_map * map)4769 bool bpf_map__autocreate(const struct bpf_map *map)
4770 {
4771 return map->autocreate;
4772 }
4773
bpf_map__set_autocreate(struct bpf_map * map,bool autocreate)4774 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4775 {
4776 if (map->obj->loaded)
4777 return libbpf_err(-EBUSY);
4778
4779 map->autocreate = autocreate;
4780 return 0;
4781 }
4782
bpf_map__reuse_fd(struct bpf_map * map,int fd)4783 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4784 {
4785 struct bpf_map_info info;
4786 __u32 len = sizeof(info), name_len;
4787 int new_fd, err;
4788 char *new_name;
4789
4790 memset(&info, 0, len);
4791 err = bpf_map_get_info_by_fd(fd, &info, &len);
4792 if (err && errno == EINVAL)
4793 err = bpf_get_map_info_from_fdinfo(fd, &info);
4794 if (err)
4795 return libbpf_err(err);
4796
4797 name_len = strlen(info.name);
4798 if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4799 new_name = strdup(map->name);
4800 else
4801 new_name = strdup(info.name);
4802
4803 if (!new_name)
4804 return libbpf_err(-errno);
4805
4806 /*
4807 * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4808 * This is similar to what we do in ensure_good_fd(), but without
4809 * closing original FD.
4810 */
4811 new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4812 if (new_fd < 0) {
4813 err = -errno;
4814 goto err_free_new_name;
4815 }
4816
4817 err = reuse_fd(map->fd, new_fd);
4818 if (err)
4819 goto err_free_new_name;
4820
4821 free(map->name);
4822
4823 map->name = new_name;
4824 map->def.type = info.type;
4825 map->def.key_size = info.key_size;
4826 map->def.value_size = info.value_size;
4827 map->def.max_entries = info.max_entries;
4828 map->def.map_flags = info.map_flags;
4829 map->btf_key_type_id = info.btf_key_type_id;
4830 map->btf_value_type_id = info.btf_value_type_id;
4831 map->reused = true;
4832 map->map_extra = info.map_extra;
4833
4834 return 0;
4835
4836 err_free_new_name:
4837 free(new_name);
4838 return libbpf_err(err);
4839 }
4840
bpf_map__max_entries(const struct bpf_map * map)4841 __u32 bpf_map__max_entries(const struct bpf_map *map)
4842 {
4843 return map->def.max_entries;
4844 }
4845
bpf_map__inner_map(struct bpf_map * map)4846 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4847 {
4848 if (!bpf_map_type__is_map_in_map(map->def.type))
4849 return errno = EINVAL, NULL;
4850
4851 return map->inner_map;
4852 }
4853
bpf_map__set_max_entries(struct bpf_map * map,__u32 max_entries)4854 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4855 {
4856 if (map->obj->loaded)
4857 return libbpf_err(-EBUSY);
4858
4859 map->def.max_entries = max_entries;
4860
4861 /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4862 if (map_is_ringbuf(map))
4863 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4864
4865 return 0;
4866 }
4867
bpf_object_prepare_token(struct bpf_object * obj)4868 static int bpf_object_prepare_token(struct bpf_object *obj)
4869 {
4870 const char *bpffs_path;
4871 int bpffs_fd = -1, token_fd, err;
4872 bool mandatory;
4873 enum libbpf_print_level level;
4874
4875 /* token is explicitly prevented */
4876 if (obj->token_path && obj->token_path[0] == '\0') {
4877 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
4878 return 0;
4879 }
4880
4881 mandatory = obj->token_path != NULL;
4882 level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
4883
4884 bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
4885 bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
4886 if (bpffs_fd < 0) {
4887 err = -errno;
4888 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4889 obj->name, err, bpffs_path,
4890 mandatory ? "" : ", skipping optional step...");
4891 return mandatory ? err : 0;
4892 }
4893
4894 token_fd = bpf_token_create(bpffs_fd, 0);
4895 close(bpffs_fd);
4896 if (token_fd < 0) {
4897 if (!mandatory && token_fd == -ENOENT) {
4898 pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4899 obj->name, bpffs_path);
4900 return 0;
4901 }
4902 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4903 obj->name, token_fd, bpffs_path,
4904 mandatory ? "" : ", skipping optional step...");
4905 return mandatory ? token_fd : 0;
4906 }
4907
4908 obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
4909 if (!obj->feat_cache) {
4910 close(token_fd);
4911 return -ENOMEM;
4912 }
4913
4914 obj->token_fd = token_fd;
4915 obj->feat_cache->token_fd = token_fd;
4916
4917 return 0;
4918 }
4919
4920 static int
bpf_object__probe_loading(struct bpf_object * obj)4921 bpf_object__probe_loading(struct bpf_object *obj)
4922 {
4923 char *cp, errmsg[STRERR_BUFSIZE];
4924 struct bpf_insn insns[] = {
4925 BPF_MOV64_IMM(BPF_REG_0, 0),
4926 BPF_EXIT_INSN(),
4927 };
4928 int ret, insn_cnt = ARRAY_SIZE(insns);
4929 LIBBPF_OPTS(bpf_prog_load_opts, opts,
4930 .token_fd = obj->token_fd,
4931 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
4932 );
4933
4934 if (obj->gen_loader)
4935 return 0;
4936
4937 ret = bump_rlimit_memlock();
4938 if (ret)
4939 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4940
4941 /* make sure basic loading works */
4942 ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
4943 if (ret < 0)
4944 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
4945 if (ret < 0) {
4946 ret = errno;
4947 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4948 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4949 "program. Make sure your kernel supports BPF "
4950 "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4951 "set to big enough value.\n", __func__, cp, ret);
4952 return -ret;
4953 }
4954 close(ret);
4955
4956 return 0;
4957 }
4958
kernel_supports(const struct bpf_object * obj,enum kern_feature_id feat_id)4959 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4960 {
4961 if (obj->gen_loader)
4962 /* To generate loader program assume the latest kernel
4963 * to avoid doing extra prog_load, map_create syscalls.
4964 */
4965 return true;
4966
4967 if (obj->token_fd)
4968 return feat_supported(obj->feat_cache, feat_id);
4969
4970 return feat_supported(NULL, feat_id);
4971 }
4972
map_is_reuse_compat(const struct bpf_map * map,int map_fd)4973 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4974 {
4975 struct bpf_map_info map_info;
4976 char msg[STRERR_BUFSIZE];
4977 __u32 map_info_len = sizeof(map_info);
4978 int err;
4979
4980 memset(&map_info, 0, map_info_len);
4981 err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4982 if (err && errno == EINVAL)
4983 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4984 if (err) {
4985 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4986 libbpf_strerror_r(errno, msg, sizeof(msg)));
4987 return false;
4988 }
4989
4990 return (map_info.type == map->def.type &&
4991 map_info.key_size == map->def.key_size &&
4992 map_info.value_size == map->def.value_size &&
4993 map_info.max_entries == map->def.max_entries &&
4994 map_info.map_flags == map->def.map_flags &&
4995 map_info.map_extra == map->map_extra);
4996 }
4997
4998 static int
bpf_object__reuse_map(struct bpf_map * map)4999 bpf_object__reuse_map(struct bpf_map *map)
5000 {
5001 char *cp, errmsg[STRERR_BUFSIZE];
5002 int err, pin_fd;
5003
5004 pin_fd = bpf_obj_get(map->pin_path);
5005 if (pin_fd < 0) {
5006 err = -errno;
5007 if (err == -ENOENT) {
5008 pr_debug("found no pinned map to reuse at '%s'\n",
5009 map->pin_path);
5010 return 0;
5011 }
5012
5013 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
5014 pr_warn("couldn't retrieve pinned map '%s': %s\n",
5015 map->pin_path, cp);
5016 return err;
5017 }
5018
5019 if (!map_is_reuse_compat(map, pin_fd)) {
5020 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
5021 map->pin_path);
5022 close(pin_fd);
5023 return -EINVAL;
5024 }
5025
5026 err = bpf_map__reuse_fd(map, pin_fd);
5027 close(pin_fd);
5028 if (err)
5029 return err;
5030
5031 map->pinned = true;
5032 pr_debug("reused pinned map at '%s'\n", map->pin_path);
5033
5034 return 0;
5035 }
5036
5037 static int
bpf_object__populate_internal_map(struct bpf_object * obj,struct bpf_map * map)5038 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5039 {
5040 enum libbpf_map_type map_type = map->libbpf_type;
5041 char *cp, errmsg[STRERR_BUFSIZE];
5042 int err, zero = 0;
5043
5044 if (obj->gen_loader) {
5045 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5046 map->mmaped, map->def.value_size);
5047 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5048 bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5049 return 0;
5050 }
5051
5052 err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5053 if (err) {
5054 err = -errno;
5055 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5056 pr_warn("Error setting initial map(%s) contents: %s\n",
5057 map->name, cp);
5058 return err;
5059 }
5060
5061 /* Freeze .rodata and .kconfig map as read-only from syscall side. */
5062 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5063 err = bpf_map_freeze(map->fd);
5064 if (err) {
5065 err = -errno;
5066 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5067 pr_warn("Error freezing map(%s) as read-only: %s\n",
5068 map->name, cp);
5069 return err;
5070 }
5071 }
5072 return 0;
5073 }
5074
5075 static void bpf_map__destroy(struct bpf_map *map);
5076
map_is_created(const struct bpf_map * map)5077 static bool map_is_created(const struct bpf_map *map)
5078 {
5079 return map->obj->loaded || map->reused;
5080 }
5081
bpf_object__create_map(struct bpf_object * obj,struct bpf_map * map,bool is_inner)5082 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5083 {
5084 LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5085 struct bpf_map_def *def = &map->def;
5086 const char *map_name = NULL;
5087 int err = 0, map_fd;
5088
5089 if (kernel_supports(obj, FEAT_PROG_NAME))
5090 map_name = map->name;
5091 create_attr.map_ifindex = map->map_ifindex;
5092 create_attr.map_flags = def->map_flags;
5093 create_attr.numa_node = map->numa_node;
5094 create_attr.map_extra = map->map_extra;
5095 create_attr.token_fd = obj->token_fd;
5096 if (obj->token_fd)
5097 create_attr.map_flags |= BPF_F_TOKEN_FD;
5098
5099 if (bpf_map__is_struct_ops(map)) {
5100 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5101 if (map->mod_btf_fd >= 0) {
5102 create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
5103 create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
5104 }
5105 }
5106
5107 if (obj->btf && btf__fd(obj->btf) >= 0) {
5108 create_attr.btf_fd = btf__fd(obj->btf);
5109 create_attr.btf_key_type_id = map->btf_key_type_id;
5110 create_attr.btf_value_type_id = map->btf_value_type_id;
5111 }
5112
5113 if (bpf_map_type__is_map_in_map(def->type)) {
5114 if (map->inner_map) {
5115 err = map_set_def_max_entries(map->inner_map);
5116 if (err)
5117 return err;
5118 err = bpf_object__create_map(obj, map->inner_map, true);
5119 if (err) {
5120 pr_warn("map '%s': failed to create inner map: %d\n",
5121 map->name, err);
5122 return err;
5123 }
5124 map->inner_map_fd = map->inner_map->fd;
5125 }
5126 if (map->inner_map_fd >= 0)
5127 create_attr.inner_map_fd = map->inner_map_fd;
5128 }
5129
5130 switch (def->type) {
5131 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5132 case BPF_MAP_TYPE_CGROUP_ARRAY:
5133 case BPF_MAP_TYPE_STACK_TRACE:
5134 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5135 case BPF_MAP_TYPE_HASH_OF_MAPS:
5136 case BPF_MAP_TYPE_DEVMAP:
5137 case BPF_MAP_TYPE_DEVMAP_HASH:
5138 case BPF_MAP_TYPE_CPUMAP:
5139 case BPF_MAP_TYPE_XSKMAP:
5140 case BPF_MAP_TYPE_SOCKMAP:
5141 case BPF_MAP_TYPE_SOCKHASH:
5142 case BPF_MAP_TYPE_QUEUE:
5143 case BPF_MAP_TYPE_STACK:
5144 case BPF_MAP_TYPE_ARENA:
5145 create_attr.btf_fd = 0;
5146 create_attr.btf_key_type_id = 0;
5147 create_attr.btf_value_type_id = 0;
5148 map->btf_key_type_id = 0;
5149 map->btf_value_type_id = 0;
5150 break;
5151 case BPF_MAP_TYPE_STRUCT_OPS:
5152 create_attr.btf_value_type_id = 0;
5153 break;
5154 default:
5155 break;
5156 }
5157
5158 if (obj->gen_loader) {
5159 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5160 def->key_size, def->value_size, def->max_entries,
5161 &create_attr, is_inner ? -1 : map - obj->maps);
5162 /* We keep pretenting we have valid FD to pass various fd >= 0
5163 * checks by just keeping original placeholder FDs in place.
5164 * See bpf_object__add_map() comment.
5165 * This placeholder fd will not be used with any syscall and
5166 * will be reset to -1 eventually.
5167 */
5168 map_fd = map->fd;
5169 } else {
5170 map_fd = bpf_map_create(def->type, map_name,
5171 def->key_size, def->value_size,
5172 def->max_entries, &create_attr);
5173 }
5174 if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
5175 char *cp, errmsg[STRERR_BUFSIZE];
5176
5177 err = -errno;
5178 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5179 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5180 map->name, cp, err);
5181 create_attr.btf_fd = 0;
5182 create_attr.btf_key_type_id = 0;
5183 create_attr.btf_value_type_id = 0;
5184 map->btf_key_type_id = 0;
5185 map->btf_value_type_id = 0;
5186 map_fd = bpf_map_create(def->type, map_name,
5187 def->key_size, def->value_size,
5188 def->max_entries, &create_attr);
5189 }
5190
5191 if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5192 if (obj->gen_loader)
5193 map->inner_map->fd = -1;
5194 bpf_map__destroy(map->inner_map);
5195 zfree(&map->inner_map);
5196 }
5197
5198 if (map_fd < 0)
5199 return map_fd;
5200
5201 /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
5202 if (map->fd == map_fd)
5203 return 0;
5204
5205 /* Keep placeholder FD value but now point it to the BPF map object.
5206 * This way everything that relied on this map's FD (e.g., relocated
5207 * ldimm64 instructions) will stay valid and won't need adjustments.
5208 * map->fd stays valid but now point to what map_fd points to.
5209 */
5210 return reuse_fd(map->fd, map_fd);
5211 }
5212
init_map_in_map_slots(struct bpf_object * obj,struct bpf_map * map)5213 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5214 {
5215 const struct bpf_map *targ_map;
5216 unsigned int i;
5217 int fd, err = 0;
5218
5219 for (i = 0; i < map->init_slots_sz; i++) {
5220 if (!map->init_slots[i])
5221 continue;
5222
5223 targ_map = map->init_slots[i];
5224 fd = targ_map->fd;
5225
5226 if (obj->gen_loader) {
5227 bpf_gen__populate_outer_map(obj->gen_loader,
5228 map - obj->maps, i,
5229 targ_map - obj->maps);
5230 } else {
5231 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5232 }
5233 if (err) {
5234 err = -errno;
5235 pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5236 map->name, i, targ_map->name, fd, err);
5237 return err;
5238 }
5239 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5240 map->name, i, targ_map->name, fd);
5241 }
5242
5243 zfree(&map->init_slots);
5244 map->init_slots_sz = 0;
5245
5246 return 0;
5247 }
5248
init_prog_array_slots(struct bpf_object * obj,struct bpf_map * map)5249 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5250 {
5251 const struct bpf_program *targ_prog;
5252 unsigned int i;
5253 int fd, err;
5254
5255 if (obj->gen_loader)
5256 return -ENOTSUP;
5257
5258 for (i = 0; i < map->init_slots_sz; i++) {
5259 if (!map->init_slots[i])
5260 continue;
5261
5262 targ_prog = map->init_slots[i];
5263 fd = bpf_program__fd(targ_prog);
5264
5265 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5266 if (err) {
5267 err = -errno;
5268 pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5269 map->name, i, targ_prog->name, fd, err);
5270 return err;
5271 }
5272 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5273 map->name, i, targ_prog->name, fd);
5274 }
5275
5276 zfree(&map->init_slots);
5277 map->init_slots_sz = 0;
5278
5279 return 0;
5280 }
5281
bpf_object_init_prog_arrays(struct bpf_object * obj)5282 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5283 {
5284 struct bpf_map *map;
5285 int i, err;
5286
5287 for (i = 0; i < obj->nr_maps; i++) {
5288 map = &obj->maps[i];
5289
5290 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5291 continue;
5292
5293 err = init_prog_array_slots(obj, map);
5294 if (err < 0)
5295 return err;
5296 }
5297 return 0;
5298 }
5299
map_set_def_max_entries(struct bpf_map * map)5300 static int map_set_def_max_entries(struct bpf_map *map)
5301 {
5302 if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5303 int nr_cpus;
5304
5305 nr_cpus = libbpf_num_possible_cpus();
5306 if (nr_cpus < 0) {
5307 pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5308 map->name, nr_cpus);
5309 return nr_cpus;
5310 }
5311 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5312 map->def.max_entries = nr_cpus;
5313 }
5314
5315 return 0;
5316 }
5317
5318 static int
bpf_object__create_maps(struct bpf_object * obj)5319 bpf_object__create_maps(struct bpf_object *obj)
5320 {
5321 struct bpf_map *map;
5322 char *cp, errmsg[STRERR_BUFSIZE];
5323 unsigned int i, j;
5324 int err;
5325 bool retried;
5326
5327 for (i = 0; i < obj->nr_maps; i++) {
5328 map = &obj->maps[i];
5329
5330 /* To support old kernels, we skip creating global data maps
5331 * (.rodata, .data, .kconfig, etc); later on, during program
5332 * loading, if we detect that at least one of the to-be-loaded
5333 * programs is referencing any global data map, we'll error
5334 * out with program name and relocation index logged.
5335 * This approach allows to accommodate Clang emitting
5336 * unnecessary .rodata.str1.1 sections for string literals,
5337 * but also it allows to have CO-RE applications that use
5338 * global variables in some of BPF programs, but not others.
5339 * If those global variable-using programs are not loaded at
5340 * runtime due to bpf_program__set_autoload(prog, false),
5341 * bpf_object loading will succeed just fine even on old
5342 * kernels.
5343 */
5344 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5345 map->autocreate = false;
5346
5347 if (!map->autocreate) {
5348 pr_debug("map '%s': skipped auto-creating...\n", map->name);
5349 continue;
5350 }
5351
5352 err = map_set_def_max_entries(map);
5353 if (err)
5354 goto err_out;
5355
5356 retried = false;
5357 retry:
5358 if (map->pin_path) {
5359 err = bpf_object__reuse_map(map);
5360 if (err) {
5361 pr_warn("map '%s': error reusing pinned map\n",
5362 map->name);
5363 goto err_out;
5364 }
5365 if (retried && map->fd < 0) {
5366 pr_warn("map '%s': cannot find pinned map\n",
5367 map->name);
5368 err = -ENOENT;
5369 goto err_out;
5370 }
5371 }
5372
5373 if (map->reused) {
5374 pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5375 map->name, map->fd);
5376 } else {
5377 err = bpf_object__create_map(obj, map, false);
5378 if (err)
5379 goto err_out;
5380
5381 pr_debug("map '%s': created successfully, fd=%d\n",
5382 map->name, map->fd);
5383
5384 if (bpf_map__is_internal(map)) {
5385 err = bpf_object__populate_internal_map(obj, map);
5386 if (err < 0)
5387 goto err_out;
5388 }
5389 if (map->def.type == BPF_MAP_TYPE_ARENA) {
5390 map->mmaped = mmap((void *)(long)map->map_extra,
5391 bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
5392 map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
5393 map->fd, 0);
5394 if (map->mmaped == MAP_FAILED) {
5395 err = -errno;
5396 map->mmaped = NULL;
5397 pr_warn("map '%s': failed to mmap arena: %d\n",
5398 map->name, err);
5399 return err;
5400 }
5401 if (obj->arena_data) {
5402 memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
5403 zfree(&obj->arena_data);
5404 }
5405 }
5406 if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5407 err = init_map_in_map_slots(obj, map);
5408 if (err < 0)
5409 goto err_out;
5410 }
5411 }
5412
5413 if (map->pin_path && !map->pinned) {
5414 err = bpf_map__pin(map, NULL);
5415 if (err) {
5416 if (!retried && err == -EEXIST) {
5417 retried = true;
5418 goto retry;
5419 }
5420 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5421 map->name, map->pin_path, err);
5422 goto err_out;
5423 }
5424 }
5425 }
5426
5427 return 0;
5428
5429 err_out:
5430 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5431 pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5432 pr_perm_msg(err);
5433 for (j = 0; j < i; j++)
5434 zclose(obj->maps[j].fd);
5435 return err;
5436 }
5437
bpf_core_is_flavor_sep(const char * s)5438 static bool bpf_core_is_flavor_sep(const char *s)
5439 {
5440 /* check X___Y name pattern, where X and Y are not underscores */
5441 return s[0] != '_' && /* X */
5442 s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
5443 s[4] != '_'; /* Y */
5444 }
5445
5446 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5447 * before last triple underscore. Struct name part after last triple
5448 * underscore is ignored by BPF CO-RE relocation during relocation matching.
5449 */
bpf_core_essential_name_len(const char * name)5450 size_t bpf_core_essential_name_len(const char *name)
5451 {
5452 size_t n = strlen(name);
5453 int i;
5454
5455 for (i = n - 5; i >= 0; i--) {
5456 if (bpf_core_is_flavor_sep(name + i))
5457 return i + 1;
5458 }
5459 return n;
5460 }
5461
bpf_core_free_cands(struct bpf_core_cand_list * cands)5462 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5463 {
5464 if (!cands)
5465 return;
5466
5467 free(cands->cands);
5468 free(cands);
5469 }
5470
bpf_core_add_cands(struct bpf_core_cand * local_cand,size_t local_essent_len,const struct btf * targ_btf,const char * targ_btf_name,int targ_start_id,struct bpf_core_cand_list * cands)5471 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5472 size_t local_essent_len,
5473 const struct btf *targ_btf,
5474 const char *targ_btf_name,
5475 int targ_start_id,
5476 struct bpf_core_cand_list *cands)
5477 {
5478 struct bpf_core_cand *new_cands, *cand;
5479 const struct btf_type *t, *local_t;
5480 const char *targ_name, *local_name;
5481 size_t targ_essent_len;
5482 int n, i;
5483
5484 local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5485 local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5486
5487 n = btf__type_cnt(targ_btf);
5488 for (i = targ_start_id; i < n; i++) {
5489 t = btf__type_by_id(targ_btf, i);
5490 if (!btf_kind_core_compat(t, local_t))
5491 continue;
5492
5493 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5494 if (str_is_empty(targ_name))
5495 continue;
5496
5497 targ_essent_len = bpf_core_essential_name_len(targ_name);
5498 if (targ_essent_len != local_essent_len)
5499 continue;
5500
5501 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5502 continue;
5503
5504 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5505 local_cand->id, btf_kind_str(local_t),
5506 local_name, i, btf_kind_str(t), targ_name,
5507 targ_btf_name);
5508 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5509 sizeof(*cands->cands));
5510 if (!new_cands)
5511 return -ENOMEM;
5512
5513 cand = &new_cands[cands->len];
5514 cand->btf = targ_btf;
5515 cand->id = i;
5516
5517 cands->cands = new_cands;
5518 cands->len++;
5519 }
5520 return 0;
5521 }
5522
load_module_btfs(struct bpf_object * obj)5523 static int load_module_btfs(struct bpf_object *obj)
5524 {
5525 struct bpf_btf_info info;
5526 struct module_btf *mod_btf;
5527 struct btf *btf;
5528 char name[64];
5529 __u32 id = 0, len;
5530 int err, fd;
5531
5532 if (obj->btf_modules_loaded)
5533 return 0;
5534
5535 if (obj->gen_loader)
5536 return 0;
5537
5538 /* don't do this again, even if we find no module BTFs */
5539 obj->btf_modules_loaded = true;
5540
5541 /* kernel too old to support module BTFs */
5542 if (!kernel_supports(obj, FEAT_MODULE_BTF))
5543 return 0;
5544
5545 while (true) {
5546 err = bpf_btf_get_next_id(id, &id);
5547 if (err && errno == ENOENT)
5548 return 0;
5549 if (err && errno == EPERM) {
5550 pr_debug("skipping module BTFs loading, missing privileges\n");
5551 return 0;
5552 }
5553 if (err) {
5554 err = -errno;
5555 pr_warn("failed to iterate BTF objects: %d\n", err);
5556 return err;
5557 }
5558
5559 fd = bpf_btf_get_fd_by_id(id);
5560 if (fd < 0) {
5561 if (errno == ENOENT)
5562 continue; /* expected race: BTF was unloaded */
5563 err = -errno;
5564 pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5565 return err;
5566 }
5567
5568 len = sizeof(info);
5569 memset(&info, 0, sizeof(info));
5570 info.name = ptr_to_u64(name);
5571 info.name_len = sizeof(name);
5572
5573 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5574 if (err) {
5575 err = -errno;
5576 pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5577 goto err_out;
5578 }
5579
5580 /* ignore non-module BTFs */
5581 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5582 close(fd);
5583 continue;
5584 }
5585
5586 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5587 err = libbpf_get_error(btf);
5588 if (err) {
5589 pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5590 name, id, err);
5591 goto err_out;
5592 }
5593
5594 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5595 sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5596 if (err)
5597 goto err_out;
5598
5599 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5600
5601 mod_btf->btf = btf;
5602 mod_btf->id = id;
5603 mod_btf->fd = fd;
5604 mod_btf->name = strdup(name);
5605 if (!mod_btf->name) {
5606 err = -ENOMEM;
5607 goto err_out;
5608 }
5609 continue;
5610
5611 err_out:
5612 close(fd);
5613 return err;
5614 }
5615
5616 return 0;
5617 }
5618
5619 static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object * obj,const struct btf * local_btf,__u32 local_type_id)5620 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5621 {
5622 struct bpf_core_cand local_cand = {};
5623 struct bpf_core_cand_list *cands;
5624 const struct btf *main_btf;
5625 const struct btf_type *local_t;
5626 const char *local_name;
5627 size_t local_essent_len;
5628 int err, i;
5629
5630 local_cand.btf = local_btf;
5631 local_cand.id = local_type_id;
5632 local_t = btf__type_by_id(local_btf, local_type_id);
5633 if (!local_t)
5634 return ERR_PTR(-EINVAL);
5635
5636 local_name = btf__name_by_offset(local_btf, local_t->name_off);
5637 if (str_is_empty(local_name))
5638 return ERR_PTR(-EINVAL);
5639 local_essent_len = bpf_core_essential_name_len(local_name);
5640
5641 cands = calloc(1, sizeof(*cands));
5642 if (!cands)
5643 return ERR_PTR(-ENOMEM);
5644
5645 /* Attempt to find target candidates in vmlinux BTF first */
5646 main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5647 err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5648 if (err)
5649 goto err_out;
5650
5651 /* if vmlinux BTF has any candidate, don't got for module BTFs */
5652 if (cands->len)
5653 return cands;
5654
5655 /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5656 if (obj->btf_vmlinux_override)
5657 return cands;
5658
5659 /* now look through module BTFs, trying to still find candidates */
5660 err = load_module_btfs(obj);
5661 if (err)
5662 goto err_out;
5663
5664 for (i = 0; i < obj->btf_module_cnt; i++) {
5665 err = bpf_core_add_cands(&local_cand, local_essent_len,
5666 obj->btf_modules[i].btf,
5667 obj->btf_modules[i].name,
5668 btf__type_cnt(obj->btf_vmlinux),
5669 cands);
5670 if (err)
5671 goto err_out;
5672 }
5673
5674 return cands;
5675 err_out:
5676 bpf_core_free_cands(cands);
5677 return ERR_PTR(err);
5678 }
5679
5680 /* Check local and target types for compatibility. This check is used for
5681 * type-based CO-RE relocations and follow slightly different rules than
5682 * field-based relocations. This function assumes that root types were already
5683 * checked for name match. Beyond that initial root-level name check, names
5684 * are completely ignored. Compatibility rules are as follows:
5685 * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5686 * kind should match for local and target types (i.e., STRUCT is not
5687 * compatible with UNION);
5688 * - for ENUMs, the size is ignored;
5689 * - for INT, size and signedness are ignored;
5690 * - for ARRAY, dimensionality is ignored, element types are checked for
5691 * compatibility recursively;
5692 * - CONST/VOLATILE/RESTRICT modifiers are ignored;
5693 * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5694 * - FUNC_PROTOs are compatible if they have compatible signature: same
5695 * number of input args and compatible return and argument types.
5696 * These rules are not set in stone and probably will be adjusted as we get
5697 * more experience with using BPF CO-RE relocations.
5698 */
bpf_core_types_are_compat(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5699 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5700 const struct btf *targ_btf, __u32 targ_id)
5701 {
5702 return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5703 }
5704
bpf_core_types_match(const struct btf * local_btf,__u32 local_id,const struct btf * targ_btf,__u32 targ_id)5705 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5706 const struct btf *targ_btf, __u32 targ_id)
5707 {
5708 return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5709 }
5710
bpf_core_hash_fn(const long key,void * ctx)5711 static size_t bpf_core_hash_fn(const long key, void *ctx)
5712 {
5713 return key;
5714 }
5715
bpf_core_equal_fn(const long k1,const long k2,void * ctx)5716 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5717 {
5718 return k1 == k2;
5719 }
5720
record_relo_core(struct bpf_program * prog,const struct bpf_core_relo * core_relo,int insn_idx)5721 static int record_relo_core(struct bpf_program *prog,
5722 const struct bpf_core_relo *core_relo, int insn_idx)
5723 {
5724 struct reloc_desc *relos, *relo;
5725
5726 relos = libbpf_reallocarray(prog->reloc_desc,
5727 prog->nr_reloc + 1, sizeof(*relos));
5728 if (!relos)
5729 return -ENOMEM;
5730 relo = &relos[prog->nr_reloc];
5731 relo->type = RELO_CORE;
5732 relo->insn_idx = insn_idx;
5733 relo->core_relo = core_relo;
5734 prog->reloc_desc = relos;
5735 prog->nr_reloc++;
5736 return 0;
5737 }
5738
find_relo_core(struct bpf_program * prog,int insn_idx)5739 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5740 {
5741 struct reloc_desc *relo;
5742 int i;
5743
5744 for (i = 0; i < prog->nr_reloc; i++) {
5745 relo = &prog->reloc_desc[i];
5746 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5747 continue;
5748
5749 return relo->core_relo;
5750 }
5751
5752 return NULL;
5753 }
5754
bpf_core_resolve_relo(struct bpf_program * prog,const struct bpf_core_relo * relo,int relo_idx,const struct btf * local_btf,struct hashmap * cand_cache,struct bpf_core_relo_res * targ_res)5755 static int bpf_core_resolve_relo(struct bpf_program *prog,
5756 const struct bpf_core_relo *relo,
5757 int relo_idx,
5758 const struct btf *local_btf,
5759 struct hashmap *cand_cache,
5760 struct bpf_core_relo_res *targ_res)
5761 {
5762 struct bpf_core_spec specs_scratch[3] = {};
5763 struct bpf_core_cand_list *cands = NULL;
5764 const char *prog_name = prog->name;
5765 const struct btf_type *local_type;
5766 const char *local_name;
5767 __u32 local_id = relo->type_id;
5768 int err;
5769
5770 local_type = btf__type_by_id(local_btf, local_id);
5771 if (!local_type)
5772 return -EINVAL;
5773
5774 local_name = btf__name_by_offset(local_btf, local_type->name_off);
5775 if (!local_name)
5776 return -EINVAL;
5777
5778 if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5779 !hashmap__find(cand_cache, local_id, &cands)) {
5780 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5781 if (IS_ERR(cands)) {
5782 pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5783 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5784 local_name, PTR_ERR(cands));
5785 return PTR_ERR(cands);
5786 }
5787 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5788 if (err) {
5789 bpf_core_free_cands(cands);
5790 return err;
5791 }
5792 }
5793
5794 return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5795 targ_res);
5796 }
5797
5798 static int
bpf_object__relocate_core(struct bpf_object * obj,const char * targ_btf_path)5799 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5800 {
5801 const struct btf_ext_info_sec *sec;
5802 struct bpf_core_relo_res targ_res;
5803 const struct bpf_core_relo *rec;
5804 const struct btf_ext_info *seg;
5805 struct hashmap_entry *entry;
5806 struct hashmap *cand_cache = NULL;
5807 struct bpf_program *prog;
5808 struct bpf_insn *insn;
5809 const char *sec_name;
5810 int i, err = 0, insn_idx, sec_idx, sec_num;
5811
5812 if (obj->btf_ext->core_relo_info.len == 0)
5813 return 0;
5814
5815 if (targ_btf_path) {
5816 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5817 err = libbpf_get_error(obj->btf_vmlinux_override);
5818 if (err) {
5819 pr_warn("failed to parse target BTF: %d\n", err);
5820 return err;
5821 }
5822 }
5823
5824 cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5825 if (IS_ERR(cand_cache)) {
5826 err = PTR_ERR(cand_cache);
5827 goto out;
5828 }
5829
5830 seg = &obj->btf_ext->core_relo_info;
5831 sec_num = 0;
5832 for_each_btf_ext_sec(seg, sec) {
5833 sec_idx = seg->sec_idxs[sec_num];
5834 sec_num++;
5835
5836 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5837 if (str_is_empty(sec_name)) {
5838 err = -EINVAL;
5839 goto out;
5840 }
5841
5842 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5843
5844 for_each_btf_ext_rec(seg, sec, i, rec) {
5845 if (rec->insn_off % BPF_INSN_SZ)
5846 return -EINVAL;
5847 insn_idx = rec->insn_off / BPF_INSN_SZ;
5848 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5849 if (!prog) {
5850 /* When __weak subprog is "overridden" by another instance
5851 * of the subprog from a different object file, linker still
5852 * appends all the .BTF.ext info that used to belong to that
5853 * eliminated subprogram.
5854 * This is similar to what x86-64 linker does for relocations.
5855 * So just ignore such relocations just like we ignore
5856 * subprog instructions when discovering subprograms.
5857 */
5858 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5859 sec_name, i, insn_idx);
5860 continue;
5861 }
5862 /* no need to apply CO-RE relocation if the program is
5863 * not going to be loaded
5864 */
5865 if (!prog->autoload)
5866 continue;
5867
5868 /* adjust insn_idx from section frame of reference to the local
5869 * program's frame of reference; (sub-)program code is not yet
5870 * relocated, so it's enough to just subtract in-section offset
5871 */
5872 insn_idx = insn_idx - prog->sec_insn_off;
5873 if (insn_idx >= prog->insns_cnt)
5874 return -EINVAL;
5875 insn = &prog->insns[insn_idx];
5876
5877 err = record_relo_core(prog, rec, insn_idx);
5878 if (err) {
5879 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5880 prog->name, i, err);
5881 goto out;
5882 }
5883
5884 if (prog->obj->gen_loader)
5885 continue;
5886
5887 err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5888 if (err) {
5889 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5890 prog->name, i, err);
5891 goto out;
5892 }
5893
5894 err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5895 if (err) {
5896 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5897 prog->name, i, insn_idx, err);
5898 goto out;
5899 }
5900 }
5901 }
5902
5903 out:
5904 /* obj->btf_vmlinux and module BTFs are freed after object load */
5905 btf__free(obj->btf_vmlinux_override);
5906 obj->btf_vmlinux_override = NULL;
5907
5908 if (!IS_ERR_OR_NULL(cand_cache)) {
5909 hashmap__for_each_entry(cand_cache, entry, i) {
5910 bpf_core_free_cands(entry->pvalue);
5911 }
5912 hashmap__free(cand_cache);
5913 }
5914 return err;
5915 }
5916
5917 /* base map load ldimm64 special constant, used also for log fixup logic */
5918 #define POISON_LDIMM64_MAP_BASE 2001000000
5919 #define POISON_LDIMM64_MAP_PFX "200100"
5920
poison_map_ldimm64(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int map_idx,const struct bpf_map * map)5921 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5922 int insn_idx, struct bpf_insn *insn,
5923 int map_idx, const struct bpf_map *map)
5924 {
5925 int i;
5926
5927 pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5928 prog->name, relo_idx, insn_idx, map_idx, map->name);
5929
5930 /* we turn single ldimm64 into two identical invalid calls */
5931 for (i = 0; i < 2; i++) {
5932 insn->code = BPF_JMP | BPF_CALL;
5933 insn->dst_reg = 0;
5934 insn->src_reg = 0;
5935 insn->off = 0;
5936 /* if this instruction is reachable (not a dead code),
5937 * verifier will complain with something like:
5938 * invalid func unknown#2001000123
5939 * where lower 123 is map index into obj->maps[] array
5940 */
5941 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5942
5943 insn++;
5944 }
5945 }
5946
5947 /* unresolved kfunc call special constant, used also for log fixup logic */
5948 #define POISON_CALL_KFUNC_BASE 2002000000
5949 #define POISON_CALL_KFUNC_PFX "2002"
5950
poison_kfunc_call(struct bpf_program * prog,int relo_idx,int insn_idx,struct bpf_insn * insn,int ext_idx,const struct extern_desc * ext)5951 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5952 int insn_idx, struct bpf_insn *insn,
5953 int ext_idx, const struct extern_desc *ext)
5954 {
5955 pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5956 prog->name, relo_idx, insn_idx, ext->name);
5957
5958 /* we turn kfunc call into invalid helper call with identifiable constant */
5959 insn->code = BPF_JMP | BPF_CALL;
5960 insn->dst_reg = 0;
5961 insn->src_reg = 0;
5962 insn->off = 0;
5963 /* if this instruction is reachable (not a dead code),
5964 * verifier will complain with something like:
5965 * invalid func unknown#2001000123
5966 * where lower 123 is extern index into obj->externs[] array
5967 */
5968 insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5969 }
5970
5971 /* Relocate data references within program code:
5972 * - map references;
5973 * - global variable references;
5974 * - extern references.
5975 */
5976 static int
bpf_object__relocate_data(struct bpf_object * obj,struct bpf_program * prog)5977 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5978 {
5979 int i;
5980
5981 for (i = 0; i < prog->nr_reloc; i++) {
5982 struct reloc_desc *relo = &prog->reloc_desc[i];
5983 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5984 const struct bpf_map *map;
5985 struct extern_desc *ext;
5986
5987 switch (relo->type) {
5988 case RELO_LD64:
5989 map = &obj->maps[relo->map_idx];
5990 if (obj->gen_loader) {
5991 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5992 insn[0].imm = relo->map_idx;
5993 } else if (map->autocreate) {
5994 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5995 insn[0].imm = map->fd;
5996 } else {
5997 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5998 relo->map_idx, map);
5999 }
6000 break;
6001 case RELO_DATA:
6002 map = &obj->maps[relo->map_idx];
6003 insn[1].imm = insn[0].imm + relo->sym_off;
6004 if (obj->gen_loader) {
6005 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6006 insn[0].imm = relo->map_idx;
6007 } else if (map->autocreate) {
6008 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6009 insn[0].imm = map->fd;
6010 } else {
6011 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
6012 relo->map_idx, map);
6013 }
6014 break;
6015 case RELO_EXTERN_LD64:
6016 ext = &obj->externs[relo->ext_idx];
6017 if (ext->type == EXT_KCFG) {
6018 if (obj->gen_loader) {
6019 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
6020 insn[0].imm = obj->kconfig_map_idx;
6021 } else {
6022 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
6023 insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
6024 }
6025 insn[1].imm = ext->kcfg.data_off;
6026 } else /* EXT_KSYM */ {
6027 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
6028 insn[0].src_reg = BPF_PSEUDO_BTF_ID;
6029 insn[0].imm = ext->ksym.kernel_btf_id;
6030 insn[1].imm = ext->ksym.kernel_btf_obj_fd;
6031 } else { /* typeless ksyms or unresolved typed ksyms */
6032 insn[0].imm = (__u32)ext->ksym.addr;
6033 insn[1].imm = ext->ksym.addr >> 32;
6034 }
6035 }
6036 break;
6037 case RELO_EXTERN_CALL:
6038 ext = &obj->externs[relo->ext_idx];
6039 insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
6040 if (ext->is_set) {
6041 insn[0].imm = ext->ksym.kernel_btf_id;
6042 insn[0].off = ext->ksym.btf_fd_idx;
6043 } else { /* unresolved weak kfunc call */
6044 poison_kfunc_call(prog, i, relo->insn_idx, insn,
6045 relo->ext_idx, ext);
6046 }
6047 break;
6048 case RELO_SUBPROG_ADDR:
6049 if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
6050 pr_warn("prog '%s': relo #%d: bad insn\n",
6051 prog->name, i);
6052 return -EINVAL;
6053 }
6054 /* handled already */
6055 break;
6056 case RELO_CALL:
6057 /* handled already */
6058 break;
6059 case RELO_CORE:
6060 /* will be handled by bpf_program_record_relos() */
6061 break;
6062 default:
6063 pr_warn("prog '%s': relo #%d: bad relo type %d\n",
6064 prog->name, i, relo->type);
6065 return -EINVAL;
6066 }
6067 }
6068
6069 return 0;
6070 }
6071
adjust_prog_btf_ext_info(const struct bpf_object * obj,const struct bpf_program * prog,const struct btf_ext_info * ext_info,void ** prog_info,__u32 * prog_rec_cnt,__u32 * prog_rec_sz)6072 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
6073 const struct bpf_program *prog,
6074 const struct btf_ext_info *ext_info,
6075 void **prog_info, __u32 *prog_rec_cnt,
6076 __u32 *prog_rec_sz)
6077 {
6078 void *copy_start = NULL, *copy_end = NULL;
6079 void *rec, *rec_end, *new_prog_info;
6080 const struct btf_ext_info_sec *sec;
6081 size_t old_sz, new_sz;
6082 int i, sec_num, sec_idx, off_adj;
6083
6084 sec_num = 0;
6085 for_each_btf_ext_sec(ext_info, sec) {
6086 sec_idx = ext_info->sec_idxs[sec_num];
6087 sec_num++;
6088 if (prog->sec_idx != sec_idx)
6089 continue;
6090
6091 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6092 __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6093
6094 if (insn_off < prog->sec_insn_off)
6095 continue;
6096 if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6097 break;
6098
6099 if (!copy_start)
6100 copy_start = rec;
6101 copy_end = rec + ext_info->rec_size;
6102 }
6103
6104 if (!copy_start)
6105 return -ENOENT;
6106
6107 /* append func/line info of a given (sub-)program to the main
6108 * program func/line info
6109 */
6110 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6111 new_sz = old_sz + (copy_end - copy_start);
6112 new_prog_info = realloc(*prog_info, new_sz);
6113 if (!new_prog_info)
6114 return -ENOMEM;
6115 *prog_info = new_prog_info;
6116 *prog_rec_cnt = new_sz / ext_info->rec_size;
6117 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6118
6119 /* Kernel instruction offsets are in units of 8-byte
6120 * instructions, while .BTF.ext instruction offsets generated
6121 * by Clang are in units of bytes. So convert Clang offsets
6122 * into kernel offsets and adjust offset according to program
6123 * relocated position.
6124 */
6125 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6126 rec = new_prog_info + old_sz;
6127 rec_end = new_prog_info + new_sz;
6128 for (; rec < rec_end; rec += ext_info->rec_size) {
6129 __u32 *insn_off = rec;
6130
6131 *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6132 }
6133 *prog_rec_sz = ext_info->rec_size;
6134 return 0;
6135 }
6136
6137 return -ENOENT;
6138 }
6139
6140 static int
reloc_prog_func_and_line_info(const struct bpf_object * obj,struct bpf_program * main_prog,const struct bpf_program * prog)6141 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6142 struct bpf_program *main_prog,
6143 const struct bpf_program *prog)
6144 {
6145 int err;
6146
6147 /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6148 * support func/line info
6149 */
6150 if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6151 return 0;
6152
6153 /* only attempt func info relocation if main program's func_info
6154 * relocation was successful
6155 */
6156 if (main_prog != prog && !main_prog->func_info)
6157 goto line_info;
6158
6159 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6160 &main_prog->func_info,
6161 &main_prog->func_info_cnt,
6162 &main_prog->func_info_rec_size);
6163 if (err) {
6164 if (err != -ENOENT) {
6165 pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6166 prog->name, err);
6167 return err;
6168 }
6169 if (main_prog->func_info) {
6170 /*
6171 * Some info has already been found but has problem
6172 * in the last btf_ext reloc. Must have to error out.
6173 */
6174 pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6175 return err;
6176 }
6177 /* Have problem loading the very first info. Ignore the rest. */
6178 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6179 prog->name);
6180 }
6181
6182 line_info:
6183 /* don't relocate line info if main program's relocation failed */
6184 if (main_prog != prog && !main_prog->line_info)
6185 return 0;
6186
6187 err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6188 &main_prog->line_info,
6189 &main_prog->line_info_cnt,
6190 &main_prog->line_info_rec_size);
6191 if (err) {
6192 if (err != -ENOENT) {
6193 pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6194 prog->name, err);
6195 return err;
6196 }
6197 if (main_prog->line_info) {
6198 /*
6199 * Some info has already been found but has problem
6200 * in the last btf_ext reloc. Must have to error out.
6201 */
6202 pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6203 return err;
6204 }
6205 /* Have problem loading the very first info. Ignore the rest. */
6206 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6207 prog->name);
6208 }
6209 return 0;
6210 }
6211
cmp_relo_by_insn_idx(const void * key,const void * elem)6212 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6213 {
6214 size_t insn_idx = *(const size_t *)key;
6215 const struct reloc_desc *relo = elem;
6216
6217 if (insn_idx == relo->insn_idx)
6218 return 0;
6219 return insn_idx < relo->insn_idx ? -1 : 1;
6220 }
6221
find_prog_insn_relo(const struct bpf_program * prog,size_t insn_idx)6222 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6223 {
6224 if (!prog->nr_reloc)
6225 return NULL;
6226 return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6227 sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6228 }
6229
append_subprog_relos(struct bpf_program * main_prog,struct bpf_program * subprog)6230 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6231 {
6232 int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6233 struct reloc_desc *relos;
6234 int i;
6235
6236 if (main_prog == subprog)
6237 return 0;
6238 relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6239 /* if new count is zero, reallocarray can return a valid NULL result;
6240 * in this case the previous pointer will be freed, so we *have to*
6241 * reassign old pointer to the new value (even if it's NULL)
6242 */
6243 if (!relos && new_cnt)
6244 return -ENOMEM;
6245 if (subprog->nr_reloc)
6246 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6247 sizeof(*relos) * subprog->nr_reloc);
6248
6249 for (i = main_prog->nr_reloc; i < new_cnt; i++)
6250 relos[i].insn_idx += subprog->sub_insn_off;
6251 /* After insn_idx adjustment the 'relos' array is still sorted
6252 * by insn_idx and doesn't break bsearch.
6253 */
6254 main_prog->reloc_desc = relos;
6255 main_prog->nr_reloc = new_cnt;
6256 return 0;
6257 }
6258
6259 static int
bpf_object__append_subprog_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * subprog)6260 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
6261 struct bpf_program *subprog)
6262 {
6263 struct bpf_insn *insns;
6264 size_t new_cnt;
6265 int err;
6266
6267 subprog->sub_insn_off = main_prog->insns_cnt;
6268
6269 new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6270 insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6271 if (!insns) {
6272 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6273 return -ENOMEM;
6274 }
6275 main_prog->insns = insns;
6276 main_prog->insns_cnt = new_cnt;
6277
6278 memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6279 subprog->insns_cnt * sizeof(*insns));
6280
6281 pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6282 main_prog->name, subprog->insns_cnt, subprog->name);
6283
6284 /* The subprog insns are now appended. Append its relos too. */
6285 err = append_subprog_relos(main_prog, subprog);
6286 if (err)
6287 return err;
6288 return 0;
6289 }
6290
6291 static int
bpf_object__reloc_code(struct bpf_object * obj,struct bpf_program * main_prog,struct bpf_program * prog)6292 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6293 struct bpf_program *prog)
6294 {
6295 size_t sub_insn_idx, insn_idx;
6296 struct bpf_program *subprog;
6297 struct reloc_desc *relo;
6298 struct bpf_insn *insn;
6299 int err;
6300
6301 err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6302 if (err)
6303 return err;
6304
6305 for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6306 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6307 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6308 continue;
6309
6310 relo = find_prog_insn_relo(prog, insn_idx);
6311 if (relo && relo->type == RELO_EXTERN_CALL)
6312 /* kfunc relocations will be handled later
6313 * in bpf_object__relocate_data()
6314 */
6315 continue;
6316 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6317 pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6318 prog->name, insn_idx, relo->type);
6319 return -LIBBPF_ERRNO__RELOC;
6320 }
6321 if (relo) {
6322 /* sub-program instruction index is a combination of
6323 * an offset of a symbol pointed to by relocation and
6324 * call instruction's imm field; for global functions,
6325 * call always has imm = -1, but for static functions
6326 * relocation is against STT_SECTION and insn->imm
6327 * points to a start of a static function
6328 *
6329 * for subprog addr relocation, the relo->sym_off + insn->imm is
6330 * the byte offset in the corresponding section.
6331 */
6332 if (relo->type == RELO_CALL)
6333 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6334 else
6335 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6336 } else if (insn_is_pseudo_func(insn)) {
6337 /*
6338 * RELO_SUBPROG_ADDR relo is always emitted even if both
6339 * functions are in the same section, so it shouldn't reach here.
6340 */
6341 pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6342 prog->name, insn_idx);
6343 return -LIBBPF_ERRNO__RELOC;
6344 } else {
6345 /* if subprogram call is to a static function within
6346 * the same ELF section, there won't be any relocation
6347 * emitted, but it also means there is no additional
6348 * offset necessary, insns->imm is relative to
6349 * instruction's original position within the section
6350 */
6351 sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6352 }
6353
6354 /* we enforce that sub-programs should be in .text section */
6355 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6356 if (!subprog) {
6357 pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6358 prog->name);
6359 return -LIBBPF_ERRNO__RELOC;
6360 }
6361
6362 /* if it's the first call instruction calling into this
6363 * subprogram (meaning this subprog hasn't been processed
6364 * yet) within the context of current main program:
6365 * - append it at the end of main program's instructions blog;
6366 * - process is recursively, while current program is put on hold;
6367 * - if that subprogram calls some other not yet processes
6368 * subprogram, same thing will happen recursively until
6369 * there are no more unprocesses subprograms left to append
6370 * and relocate.
6371 */
6372 if (subprog->sub_insn_off == 0) {
6373 err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6374 if (err)
6375 return err;
6376 err = bpf_object__reloc_code(obj, main_prog, subprog);
6377 if (err)
6378 return err;
6379 }
6380
6381 /* main_prog->insns memory could have been re-allocated, so
6382 * calculate pointer again
6383 */
6384 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6385 /* calculate correct instruction position within current main
6386 * prog; each main prog can have a different set of
6387 * subprograms appended (potentially in different order as
6388 * well), so position of any subprog can be different for
6389 * different main programs
6390 */
6391 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6392
6393 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6394 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6395 }
6396
6397 return 0;
6398 }
6399
6400 /*
6401 * Relocate sub-program calls.
6402 *
6403 * Algorithm operates as follows. Each entry-point BPF program (referred to as
6404 * main prog) is processed separately. For each subprog (non-entry functions,
6405 * that can be called from either entry progs or other subprogs) gets their
6406 * sub_insn_off reset to zero. This serves as indicator that this subprogram
6407 * hasn't been yet appended and relocated within current main prog. Once its
6408 * relocated, sub_insn_off will point at the position within current main prog
6409 * where given subprog was appended. This will further be used to relocate all
6410 * the call instructions jumping into this subprog.
6411 *
6412 * We start with main program and process all call instructions. If the call
6413 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6414 * is zero), subprog instructions are appended at the end of main program's
6415 * instruction array. Then main program is "put on hold" while we recursively
6416 * process newly appended subprogram. If that subprogram calls into another
6417 * subprogram that hasn't been appended, new subprogram is appended again to
6418 * the *main* prog's instructions (subprog's instructions are always left
6419 * untouched, as they need to be in unmodified state for subsequent main progs
6420 * and subprog instructions are always sent only as part of a main prog) and
6421 * the process continues recursively. Once all the subprogs called from a main
6422 * prog or any of its subprogs are appended (and relocated), all their
6423 * positions within finalized instructions array are known, so it's easy to
6424 * rewrite call instructions with correct relative offsets, corresponding to
6425 * desired target subprog.
6426 *
6427 * Its important to realize that some subprogs might not be called from some
6428 * main prog and any of its called/used subprogs. Those will keep their
6429 * subprog->sub_insn_off as zero at all times and won't be appended to current
6430 * main prog and won't be relocated within the context of current main prog.
6431 * They might still be used from other main progs later.
6432 *
6433 * Visually this process can be shown as below. Suppose we have two main
6434 * programs mainA and mainB and BPF object contains three subprogs: subA,
6435 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6436 * subC both call subB:
6437 *
6438 * +--------+ +-------+
6439 * | v v |
6440 * +--+---+ +--+-+-+ +---+--+
6441 * | subA | | subB | | subC |
6442 * +--+---+ +------+ +---+--+
6443 * ^ ^
6444 * | |
6445 * +---+-------+ +------+----+
6446 * | mainA | | mainB |
6447 * +-----------+ +-----------+
6448 *
6449 * We'll start relocating mainA, will find subA, append it and start
6450 * processing sub A recursively:
6451 *
6452 * +-----------+------+
6453 * | mainA | subA |
6454 * +-----------+------+
6455 *
6456 * At this point we notice that subB is used from subA, so we append it and
6457 * relocate (there are no further subcalls from subB):
6458 *
6459 * +-----------+------+------+
6460 * | mainA | subA | subB |
6461 * +-----------+------+------+
6462 *
6463 * At this point, we relocate subA calls, then go one level up and finish with
6464 * relocatin mainA calls. mainA is done.
6465 *
6466 * For mainB process is similar but results in different order. We start with
6467 * mainB and skip subA and subB, as mainB never calls them (at least
6468 * directly), but we see subC is needed, so we append and start processing it:
6469 *
6470 * +-----------+------+
6471 * | mainB | subC |
6472 * +-----------+------+
6473 * Now we see subC needs subB, so we go back to it, append and relocate it:
6474 *
6475 * +-----------+------+------+
6476 * | mainB | subC | subB |
6477 * +-----------+------+------+
6478 *
6479 * At this point we unwind recursion, relocate calls in subC, then in mainB.
6480 */
6481 static int
bpf_object__relocate_calls(struct bpf_object * obj,struct bpf_program * prog)6482 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6483 {
6484 struct bpf_program *subprog;
6485 int i, err;
6486
6487 /* mark all subprogs as not relocated (yet) within the context of
6488 * current main program
6489 */
6490 for (i = 0; i < obj->nr_programs; i++) {
6491 subprog = &obj->programs[i];
6492 if (!prog_is_subprog(obj, subprog))
6493 continue;
6494
6495 subprog->sub_insn_off = 0;
6496 }
6497
6498 err = bpf_object__reloc_code(obj, prog, prog);
6499 if (err)
6500 return err;
6501
6502 return 0;
6503 }
6504
6505 static void
bpf_object__free_relocs(struct bpf_object * obj)6506 bpf_object__free_relocs(struct bpf_object *obj)
6507 {
6508 struct bpf_program *prog;
6509 int i;
6510
6511 /* free up relocation descriptors */
6512 for (i = 0; i < obj->nr_programs; i++) {
6513 prog = &obj->programs[i];
6514 zfree(&prog->reloc_desc);
6515 prog->nr_reloc = 0;
6516 }
6517 }
6518
cmp_relocs(const void * _a,const void * _b)6519 static int cmp_relocs(const void *_a, const void *_b)
6520 {
6521 const struct reloc_desc *a = _a;
6522 const struct reloc_desc *b = _b;
6523
6524 if (a->insn_idx != b->insn_idx)
6525 return a->insn_idx < b->insn_idx ? -1 : 1;
6526
6527 /* no two relocations should have the same insn_idx, but ... */
6528 if (a->type != b->type)
6529 return a->type < b->type ? -1 : 1;
6530
6531 return 0;
6532 }
6533
bpf_object__sort_relos(struct bpf_object * obj)6534 static void bpf_object__sort_relos(struct bpf_object *obj)
6535 {
6536 int i;
6537
6538 for (i = 0; i < obj->nr_programs; i++) {
6539 struct bpf_program *p = &obj->programs[i];
6540
6541 if (!p->nr_reloc)
6542 continue;
6543
6544 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6545 }
6546 }
6547
bpf_prog_assign_exc_cb(struct bpf_object * obj,struct bpf_program * prog)6548 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6549 {
6550 const char *str = "exception_callback:";
6551 size_t pfx_len = strlen(str);
6552 int i, j, n;
6553
6554 if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6555 return 0;
6556
6557 n = btf__type_cnt(obj->btf);
6558 for (i = 1; i < n; i++) {
6559 const char *name;
6560 struct btf_type *t;
6561
6562 t = btf_type_by_id(obj->btf, i);
6563 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6564 continue;
6565
6566 name = btf__str_by_offset(obj->btf, t->name_off);
6567 if (strncmp(name, str, pfx_len) != 0)
6568 continue;
6569
6570 t = btf_type_by_id(obj->btf, t->type);
6571 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6572 pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6573 prog->name);
6574 return -EINVAL;
6575 }
6576 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6577 continue;
6578 /* Multiple callbacks are specified for the same prog,
6579 * the verifier will eventually return an error for this
6580 * case, hence simply skip appending a subprog.
6581 */
6582 if (prog->exception_cb_idx >= 0) {
6583 prog->exception_cb_idx = -1;
6584 break;
6585 }
6586
6587 name += pfx_len;
6588 if (str_is_empty(name)) {
6589 pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6590 prog->name);
6591 return -EINVAL;
6592 }
6593
6594 for (j = 0; j < obj->nr_programs; j++) {
6595 struct bpf_program *subprog = &obj->programs[j];
6596
6597 if (!prog_is_subprog(obj, subprog))
6598 continue;
6599 if (strcmp(name, subprog->name) != 0)
6600 continue;
6601 /* Enforce non-hidden, as from verifier point of
6602 * view it expects global functions, whereas the
6603 * mark_btf_static fixes up linkage as static.
6604 */
6605 if (!subprog->sym_global || subprog->mark_btf_static) {
6606 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6607 prog->name, subprog->name);
6608 return -EINVAL;
6609 }
6610 /* Let's see if we already saw a static exception callback with the same name */
6611 if (prog->exception_cb_idx >= 0) {
6612 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6613 prog->name, subprog->name);
6614 return -EINVAL;
6615 }
6616 prog->exception_cb_idx = j;
6617 break;
6618 }
6619
6620 if (prog->exception_cb_idx >= 0)
6621 continue;
6622
6623 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6624 return -ENOENT;
6625 }
6626
6627 return 0;
6628 }
6629
6630 static struct {
6631 enum bpf_prog_type prog_type;
6632 const char *ctx_name;
6633 } global_ctx_map[] = {
6634 { BPF_PROG_TYPE_CGROUP_DEVICE, "bpf_cgroup_dev_ctx" },
6635 { BPF_PROG_TYPE_CGROUP_SKB, "__sk_buff" },
6636 { BPF_PROG_TYPE_CGROUP_SOCK, "bpf_sock" },
6637 { BPF_PROG_TYPE_CGROUP_SOCK_ADDR, "bpf_sock_addr" },
6638 { BPF_PROG_TYPE_CGROUP_SOCKOPT, "bpf_sockopt" },
6639 { BPF_PROG_TYPE_CGROUP_SYSCTL, "bpf_sysctl" },
6640 { BPF_PROG_TYPE_FLOW_DISSECTOR, "__sk_buff" },
6641 { BPF_PROG_TYPE_KPROBE, "bpf_user_pt_regs_t" },
6642 { BPF_PROG_TYPE_LWT_IN, "__sk_buff" },
6643 { BPF_PROG_TYPE_LWT_OUT, "__sk_buff" },
6644 { BPF_PROG_TYPE_LWT_SEG6LOCAL, "__sk_buff" },
6645 { BPF_PROG_TYPE_LWT_XMIT, "__sk_buff" },
6646 { BPF_PROG_TYPE_NETFILTER, "bpf_nf_ctx" },
6647 { BPF_PROG_TYPE_PERF_EVENT, "bpf_perf_event_data" },
6648 { BPF_PROG_TYPE_RAW_TRACEPOINT, "bpf_raw_tracepoint_args" },
6649 { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6650 { BPF_PROG_TYPE_SCHED_ACT, "__sk_buff" },
6651 { BPF_PROG_TYPE_SCHED_CLS, "__sk_buff" },
6652 { BPF_PROG_TYPE_SK_LOOKUP, "bpf_sk_lookup" },
6653 { BPF_PROG_TYPE_SK_MSG, "sk_msg_md" },
6654 { BPF_PROG_TYPE_SK_REUSEPORT, "sk_reuseport_md" },
6655 { BPF_PROG_TYPE_SK_SKB, "__sk_buff" },
6656 { BPF_PROG_TYPE_SOCK_OPS, "bpf_sock_ops" },
6657 { BPF_PROG_TYPE_SOCKET_FILTER, "__sk_buff" },
6658 { BPF_PROG_TYPE_XDP, "xdp_md" },
6659 /* all other program types don't have "named" context structs */
6660 };
6661
6662 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6663 * for below __builtin_types_compatible_p() checks;
6664 * with this approach we don't need any extra arch-specific #ifdef guards
6665 */
6666 struct pt_regs;
6667 struct user_pt_regs;
6668 struct user_regs_struct;
6669
need_func_arg_type_fixup(const struct btf * btf,const struct bpf_program * prog,const char * subprog_name,int arg_idx,int arg_type_id,const char * ctx_name)6670 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6671 const char *subprog_name, int arg_idx,
6672 int arg_type_id, const char *ctx_name)
6673 {
6674 const struct btf_type *t;
6675 const char *tname;
6676
6677 /* check if existing parameter already matches verifier expectations */
6678 t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6679 if (!btf_is_ptr(t))
6680 goto out_warn;
6681
6682 /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6683 * and perf_event programs, so check this case early on and forget
6684 * about it for subsequent checks
6685 */
6686 while (btf_is_mod(t))
6687 t = btf__type_by_id(btf, t->type);
6688 if (btf_is_typedef(t) &&
6689 (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6690 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6691 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6692 return false; /* canonical type for kprobe/perf_event */
6693 }
6694
6695 /* now we can ignore typedefs moving forward */
6696 t = skip_mods_and_typedefs(btf, t->type, NULL);
6697
6698 /* if it's `void *`, definitely fix up BTF info */
6699 if (btf_is_void(t))
6700 return true;
6701
6702 /* if it's already proper canonical type, no need to fix up */
6703 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6704 if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6705 return false;
6706
6707 /* special cases */
6708 switch (prog->type) {
6709 case BPF_PROG_TYPE_KPROBE:
6710 /* `struct pt_regs *` is expected, but we need to fix up */
6711 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6712 return true;
6713 break;
6714 case BPF_PROG_TYPE_PERF_EVENT:
6715 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6716 btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6717 return true;
6718 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6719 btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6720 return true;
6721 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6722 btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6723 return true;
6724 break;
6725 case BPF_PROG_TYPE_RAW_TRACEPOINT:
6726 case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6727 /* allow u64* as ctx */
6728 if (btf_is_int(t) && t->size == 8)
6729 return true;
6730 break;
6731 default:
6732 break;
6733 }
6734
6735 out_warn:
6736 pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6737 prog->name, subprog_name, arg_idx, ctx_name);
6738 return false;
6739 }
6740
clone_func_btf_info(struct btf * btf,int orig_fn_id,struct bpf_program * prog)6741 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6742 {
6743 int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6744 int i, err, arg_cnt, fn_name_off, linkage;
6745 struct btf_type *fn_t, *fn_proto_t, *t;
6746 struct btf_param *p;
6747
6748 /* caller already validated FUNC -> FUNC_PROTO validity */
6749 fn_t = btf_type_by_id(btf, orig_fn_id);
6750 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6751
6752 /* Note that each btf__add_xxx() operation invalidates
6753 * all btf_type and string pointers, so we need to be
6754 * very careful when cloning BTF types. BTF type
6755 * pointers have to be always refetched. And to avoid
6756 * problems with invalidated string pointers, we
6757 * add empty strings initially, then just fix up
6758 * name_off offsets in place. Offsets are stable for
6759 * existing strings, so that works out.
6760 */
6761 fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6762 linkage = btf_func_linkage(fn_t);
6763 orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6764 ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6765 arg_cnt = btf_vlen(fn_proto_t);
6766
6767 /* clone FUNC_PROTO and its params */
6768 fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6769 if (fn_proto_id < 0)
6770 return -EINVAL;
6771
6772 for (i = 0; i < arg_cnt; i++) {
6773 int name_off;
6774
6775 /* copy original parameter data */
6776 t = btf_type_by_id(btf, orig_proto_id);
6777 p = &btf_params(t)[i];
6778 name_off = p->name_off;
6779
6780 err = btf__add_func_param(btf, "", p->type);
6781 if (err)
6782 return err;
6783
6784 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6785 p = &btf_params(fn_proto_t)[i];
6786 p->name_off = name_off; /* use remembered str offset */
6787 }
6788
6789 /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6790 * entry program's name as a placeholder, which we replace immediately
6791 * with original name_off
6792 */
6793 fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6794 if (fn_id < 0)
6795 return -EINVAL;
6796
6797 fn_t = btf_type_by_id(btf, fn_id);
6798 fn_t->name_off = fn_name_off; /* reuse original string */
6799
6800 return fn_id;
6801 }
6802
6803 /* Check if main program or global subprog's function prototype has `arg:ctx`
6804 * argument tags, and, if necessary, substitute correct type to match what BPF
6805 * verifier would expect, taking into account specific program type. This
6806 * allows to support __arg_ctx tag transparently on old kernels that don't yet
6807 * have a native support for it in the verifier, making user's life much
6808 * easier.
6809 */
bpf_program_fixup_func_info(struct bpf_object * obj,struct bpf_program * prog)6810 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6811 {
6812 const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6813 struct bpf_func_info_min *func_rec;
6814 struct btf_type *fn_t, *fn_proto_t;
6815 struct btf *btf = obj->btf;
6816 const struct btf_type *t;
6817 struct btf_param *p;
6818 int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6819 int i, n, arg_idx, arg_cnt, err, rec_idx;
6820 int *orig_ids;
6821
6822 /* no .BTF.ext, no problem */
6823 if (!obj->btf_ext || !prog->func_info)
6824 return 0;
6825
6826 /* don't do any fix ups if kernel natively supports __arg_ctx */
6827 if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
6828 return 0;
6829
6830 /* some BPF program types just don't have named context structs, so
6831 * this fallback mechanism doesn't work for them
6832 */
6833 for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
6834 if (global_ctx_map[i].prog_type != prog->type)
6835 continue;
6836 ctx_name = global_ctx_map[i].ctx_name;
6837 break;
6838 }
6839 if (!ctx_name)
6840 return 0;
6841
6842 /* remember original func BTF IDs to detect if we already cloned them */
6843 orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
6844 if (!orig_ids)
6845 return -ENOMEM;
6846 for (i = 0; i < prog->func_info_cnt; i++) {
6847 func_rec = prog->func_info + prog->func_info_rec_size * i;
6848 orig_ids[i] = func_rec->type_id;
6849 }
6850
6851 /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6852 * of our subprogs; if yes and subprog is global and needs adjustment,
6853 * clone and adjust FUNC -> FUNC_PROTO combo
6854 */
6855 for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
6856 /* only DECL_TAG with "arg:ctx" value are interesting */
6857 t = btf__type_by_id(btf, i);
6858 if (!btf_is_decl_tag(t))
6859 continue;
6860 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
6861 continue;
6862
6863 /* only global funcs need adjustment, if at all */
6864 orig_fn_id = t->type;
6865 fn_t = btf_type_by_id(btf, orig_fn_id);
6866 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
6867 continue;
6868
6869 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6870 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6871 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
6872 continue;
6873
6874 /* find corresponding func_info record */
6875 func_rec = NULL;
6876 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
6877 if (orig_ids[rec_idx] == t->type) {
6878 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
6879 break;
6880 }
6881 }
6882 /* current main program doesn't call into this subprog */
6883 if (!func_rec)
6884 continue;
6885
6886 /* some more sanity checking of DECL_TAG */
6887 arg_cnt = btf_vlen(fn_proto_t);
6888 arg_idx = btf_decl_tag(t)->component_idx;
6889 if (arg_idx < 0 || arg_idx >= arg_cnt)
6890 continue;
6891
6892 /* check if we should fix up argument type */
6893 p = &btf_params(fn_proto_t)[arg_idx];
6894 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
6895 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
6896 continue;
6897
6898 /* clone fn/fn_proto, unless we already did it for another arg */
6899 if (func_rec->type_id == orig_fn_id) {
6900 int fn_id;
6901
6902 fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
6903 if (fn_id < 0) {
6904 err = fn_id;
6905 goto err_out;
6906 }
6907
6908 /* point func_info record to a cloned FUNC type */
6909 func_rec->type_id = fn_id;
6910 }
6911
6912 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
6913 * we do it just once per main BPF program, as all global
6914 * funcs share the same program type, so need only PTR ->
6915 * STRUCT type chain
6916 */
6917 if (ptr_id == 0) {
6918 struct_id = btf__add_struct(btf, ctx_name, 0);
6919 ptr_id = btf__add_ptr(btf, struct_id);
6920 if (ptr_id < 0 || struct_id < 0) {
6921 err = -EINVAL;
6922 goto err_out;
6923 }
6924 }
6925
6926 /* for completeness, clone DECL_TAG and point it to cloned param */
6927 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
6928 if (tag_id < 0) {
6929 err = -EINVAL;
6930 goto err_out;
6931 }
6932
6933 /* all the BTF manipulations invalidated pointers, refetch them */
6934 fn_t = btf_type_by_id(btf, func_rec->type_id);
6935 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6936
6937 /* fix up type ID pointed to by param */
6938 p = &btf_params(fn_proto_t)[arg_idx];
6939 p->type = ptr_id;
6940 }
6941
6942 free(orig_ids);
6943 return 0;
6944 err_out:
6945 free(orig_ids);
6946 return err;
6947 }
6948
bpf_object__relocate(struct bpf_object * obj,const char * targ_btf_path)6949 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6950 {
6951 struct bpf_program *prog;
6952 size_t i, j;
6953 int err;
6954
6955 if (obj->btf_ext) {
6956 err = bpf_object__relocate_core(obj, targ_btf_path);
6957 if (err) {
6958 pr_warn("failed to perform CO-RE relocations: %d\n",
6959 err);
6960 return err;
6961 }
6962 bpf_object__sort_relos(obj);
6963 }
6964
6965 /* Before relocating calls pre-process relocations and mark
6966 * few ld_imm64 instructions that points to subprogs.
6967 * Otherwise bpf_object__reloc_code() later would have to consider
6968 * all ld_imm64 insns as relocation candidates. That would
6969 * reduce relocation speed, since amount of find_prog_insn_relo()
6970 * would increase and most of them will fail to find a relo.
6971 */
6972 for (i = 0; i < obj->nr_programs; i++) {
6973 prog = &obj->programs[i];
6974 for (j = 0; j < prog->nr_reloc; j++) {
6975 struct reloc_desc *relo = &prog->reloc_desc[j];
6976 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6977
6978 /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6979 if (relo->type == RELO_SUBPROG_ADDR)
6980 insn[0].src_reg = BPF_PSEUDO_FUNC;
6981 }
6982 }
6983
6984 /* relocate subprogram calls and append used subprograms to main
6985 * programs; each copy of subprogram code needs to be relocated
6986 * differently for each main program, because its code location might
6987 * have changed.
6988 * Append subprog relos to main programs to allow data relos to be
6989 * processed after text is completely relocated.
6990 */
6991 for (i = 0; i < obj->nr_programs; i++) {
6992 prog = &obj->programs[i];
6993 /* sub-program's sub-calls are relocated within the context of
6994 * its main program only
6995 */
6996 if (prog_is_subprog(obj, prog))
6997 continue;
6998 if (!prog->autoload)
6999 continue;
7000
7001 err = bpf_object__relocate_calls(obj, prog);
7002 if (err) {
7003 pr_warn("prog '%s': failed to relocate calls: %d\n",
7004 prog->name, err);
7005 return err;
7006 }
7007
7008 err = bpf_prog_assign_exc_cb(obj, prog);
7009 if (err)
7010 return err;
7011 /* Now, also append exception callback if it has not been done already. */
7012 if (prog->exception_cb_idx >= 0) {
7013 struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
7014
7015 /* Calling exception callback directly is disallowed, which the
7016 * verifier will reject later. In case it was processed already,
7017 * we can skip this step, otherwise for all other valid cases we
7018 * have to append exception callback now.
7019 */
7020 if (subprog->sub_insn_off == 0) {
7021 err = bpf_object__append_subprog_code(obj, prog, subprog);
7022 if (err)
7023 return err;
7024 err = bpf_object__reloc_code(obj, prog, subprog);
7025 if (err)
7026 return err;
7027 }
7028 }
7029 }
7030 for (i = 0; i < obj->nr_programs; i++) {
7031 prog = &obj->programs[i];
7032 if (prog_is_subprog(obj, prog))
7033 continue;
7034 if (!prog->autoload)
7035 continue;
7036
7037 /* Process data relos for main programs */
7038 err = bpf_object__relocate_data(obj, prog);
7039 if (err) {
7040 pr_warn("prog '%s': failed to relocate data references: %d\n",
7041 prog->name, err);
7042 return err;
7043 }
7044
7045 /* Fix up .BTF.ext information, if necessary */
7046 err = bpf_program_fixup_func_info(obj, prog);
7047 if (err) {
7048 pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
7049 prog->name, err);
7050 return err;
7051 }
7052 }
7053
7054 return 0;
7055 }
7056
7057 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
7058 Elf64_Shdr *shdr, Elf_Data *data);
7059
bpf_object__collect_map_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)7060 static int bpf_object__collect_map_relos(struct bpf_object *obj,
7061 Elf64_Shdr *shdr, Elf_Data *data)
7062 {
7063 const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
7064 int i, j, nrels, new_sz;
7065 const struct btf_var_secinfo *vi = NULL;
7066 const struct btf_type *sec, *var, *def;
7067 struct bpf_map *map = NULL, *targ_map = NULL;
7068 struct bpf_program *targ_prog = NULL;
7069 bool is_prog_array, is_map_in_map;
7070 const struct btf_member *member;
7071 const char *name, *mname, *type;
7072 unsigned int moff;
7073 Elf64_Sym *sym;
7074 Elf64_Rel *rel;
7075 void *tmp;
7076
7077 if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
7078 return -EINVAL;
7079 sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
7080 if (!sec)
7081 return -EINVAL;
7082
7083 nrels = shdr->sh_size / shdr->sh_entsize;
7084 for (i = 0; i < nrels; i++) {
7085 rel = elf_rel_by_idx(data, i);
7086 if (!rel) {
7087 pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
7088 return -LIBBPF_ERRNO__FORMAT;
7089 }
7090
7091 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
7092 if (!sym) {
7093 pr_warn(".maps relo #%d: symbol %zx not found\n",
7094 i, (size_t)ELF64_R_SYM(rel->r_info));
7095 return -LIBBPF_ERRNO__FORMAT;
7096 }
7097 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
7098
7099 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
7100 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
7101 (size_t)rel->r_offset, sym->st_name, name);
7102
7103 for (j = 0; j < obj->nr_maps; j++) {
7104 map = &obj->maps[j];
7105 if (map->sec_idx != obj->efile.btf_maps_shndx)
7106 continue;
7107
7108 vi = btf_var_secinfos(sec) + map->btf_var_idx;
7109 if (vi->offset <= rel->r_offset &&
7110 rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
7111 break;
7112 }
7113 if (j == obj->nr_maps) {
7114 pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
7115 i, name, (size_t)rel->r_offset);
7116 return -EINVAL;
7117 }
7118
7119 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
7120 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
7121 type = is_map_in_map ? "map" : "prog";
7122 if (is_map_in_map) {
7123 if (sym->st_shndx != obj->efile.btf_maps_shndx) {
7124 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
7125 i, name);
7126 return -LIBBPF_ERRNO__RELOC;
7127 }
7128 if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
7129 map->def.key_size != sizeof(int)) {
7130 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
7131 i, map->name, sizeof(int));
7132 return -EINVAL;
7133 }
7134 targ_map = bpf_object__find_map_by_name(obj, name);
7135 if (!targ_map) {
7136 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
7137 i, name);
7138 return -ESRCH;
7139 }
7140 } else if (is_prog_array) {
7141 targ_prog = bpf_object__find_program_by_name(obj, name);
7142 if (!targ_prog) {
7143 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
7144 i, name);
7145 return -ESRCH;
7146 }
7147 if (targ_prog->sec_idx != sym->st_shndx ||
7148 targ_prog->sec_insn_off * 8 != sym->st_value ||
7149 prog_is_subprog(obj, targ_prog)) {
7150 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
7151 i, name);
7152 return -LIBBPF_ERRNO__RELOC;
7153 }
7154 } else {
7155 return -EINVAL;
7156 }
7157
7158 var = btf__type_by_id(obj->btf, vi->type);
7159 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
7160 if (btf_vlen(def) == 0)
7161 return -EINVAL;
7162 member = btf_members(def) + btf_vlen(def) - 1;
7163 mname = btf__name_by_offset(obj->btf, member->name_off);
7164 if (strcmp(mname, "values"))
7165 return -EINVAL;
7166
7167 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
7168 if (rel->r_offset - vi->offset < moff)
7169 return -EINVAL;
7170
7171 moff = rel->r_offset - vi->offset - moff;
7172 /* here we use BPF pointer size, which is always 64 bit, as we
7173 * are parsing ELF that was built for BPF target
7174 */
7175 if (moff % bpf_ptr_sz)
7176 return -EINVAL;
7177 moff /= bpf_ptr_sz;
7178 if (moff >= map->init_slots_sz) {
7179 new_sz = moff + 1;
7180 tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
7181 if (!tmp)
7182 return -ENOMEM;
7183 map->init_slots = tmp;
7184 memset(map->init_slots + map->init_slots_sz, 0,
7185 (new_sz - map->init_slots_sz) * host_ptr_sz);
7186 map->init_slots_sz = new_sz;
7187 }
7188 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
7189
7190 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
7191 i, map->name, moff, type, name);
7192 }
7193
7194 return 0;
7195 }
7196
bpf_object__collect_relos(struct bpf_object * obj)7197 static int bpf_object__collect_relos(struct bpf_object *obj)
7198 {
7199 int i, err;
7200
7201 for (i = 0; i < obj->efile.sec_cnt; i++) {
7202 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
7203 Elf64_Shdr *shdr;
7204 Elf_Data *data;
7205 int idx;
7206
7207 if (sec_desc->sec_type != SEC_RELO)
7208 continue;
7209
7210 shdr = sec_desc->shdr;
7211 data = sec_desc->data;
7212 idx = shdr->sh_info;
7213
7214 if (shdr->sh_type != SHT_REL || idx < 0 || idx >= obj->efile.sec_cnt) {
7215 pr_warn("internal error at %d\n", __LINE__);
7216 return -LIBBPF_ERRNO__INTERNAL;
7217 }
7218
7219 if (obj->efile.secs[idx].sec_type == SEC_ST_OPS)
7220 err = bpf_object__collect_st_ops_relos(obj, shdr, data);
7221 else if (idx == obj->efile.btf_maps_shndx)
7222 err = bpf_object__collect_map_relos(obj, shdr, data);
7223 else
7224 err = bpf_object__collect_prog_relos(obj, shdr, data);
7225 if (err)
7226 return err;
7227 }
7228
7229 bpf_object__sort_relos(obj);
7230 return 0;
7231 }
7232
insn_is_helper_call(struct bpf_insn * insn,enum bpf_func_id * func_id)7233 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
7234 {
7235 if (BPF_CLASS(insn->code) == BPF_JMP &&
7236 BPF_OP(insn->code) == BPF_CALL &&
7237 BPF_SRC(insn->code) == BPF_K &&
7238 insn->src_reg == 0 &&
7239 insn->dst_reg == 0) {
7240 *func_id = insn->imm;
7241 return true;
7242 }
7243 return false;
7244 }
7245
bpf_object__sanitize_prog(struct bpf_object * obj,struct bpf_program * prog)7246 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
7247 {
7248 struct bpf_insn *insn = prog->insns;
7249 enum bpf_func_id func_id;
7250 int i;
7251
7252 if (obj->gen_loader)
7253 return 0;
7254
7255 for (i = 0; i < prog->insns_cnt; i++, insn++) {
7256 if (!insn_is_helper_call(insn, &func_id))
7257 continue;
7258
7259 /* on kernels that don't yet support
7260 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
7261 * to bpf_probe_read() which works well for old kernels
7262 */
7263 switch (func_id) {
7264 case BPF_FUNC_probe_read_kernel:
7265 case BPF_FUNC_probe_read_user:
7266 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7267 insn->imm = BPF_FUNC_probe_read;
7268 break;
7269 case BPF_FUNC_probe_read_kernel_str:
7270 case BPF_FUNC_probe_read_user_str:
7271 if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
7272 insn->imm = BPF_FUNC_probe_read_str;
7273 break;
7274 default:
7275 break;
7276 }
7277 }
7278 return 0;
7279 }
7280
7281 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
7282 int *btf_obj_fd, int *btf_type_id);
7283
7284 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
libbpf_prepare_prog_load(struct bpf_program * prog,struct bpf_prog_load_opts * opts,long cookie)7285 static int libbpf_prepare_prog_load(struct bpf_program *prog,
7286 struct bpf_prog_load_opts *opts, long cookie)
7287 {
7288 enum sec_def_flags def = cookie;
7289
7290 /* old kernels might not support specifying expected_attach_type */
7291 if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
7292 opts->expected_attach_type = 0;
7293
7294 if (def & SEC_SLEEPABLE)
7295 opts->prog_flags |= BPF_F_SLEEPABLE;
7296
7297 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
7298 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
7299
7300 /* special check for usdt to use uprobe_multi link */
7301 if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
7302 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
7303
7304 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
7305 int btf_obj_fd = 0, btf_type_id = 0, err;
7306 const char *attach_name;
7307
7308 attach_name = strchr(prog->sec_name, '/');
7309 if (!attach_name) {
7310 /* if BPF program is annotated with just SEC("fentry")
7311 * (or similar) without declaratively specifying
7312 * target, then it is expected that target will be
7313 * specified with bpf_program__set_attach_target() at
7314 * runtime before BPF object load step. If not, then
7315 * there is nothing to load into the kernel as BPF
7316 * verifier won't be able to validate BPF program
7317 * correctness anyways.
7318 */
7319 pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7320 prog->name);
7321 return -EINVAL;
7322 }
7323 attach_name++; /* skip over / */
7324
7325 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7326 if (err)
7327 return err;
7328
7329 /* cache resolved BTF FD and BTF type ID in the prog */
7330 prog->attach_btf_obj_fd = btf_obj_fd;
7331 prog->attach_btf_id = btf_type_id;
7332
7333 /* but by now libbpf common logic is not utilizing
7334 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7335 * this callback is called after opts were populated by
7336 * libbpf, so this callback has to update opts explicitly here
7337 */
7338 opts->attach_btf_obj_fd = btf_obj_fd;
7339 opts->attach_btf_id = btf_type_id;
7340 }
7341 return 0;
7342 }
7343
7344 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7345
bpf_object_load_prog(struct bpf_object * obj,struct bpf_program * prog,struct bpf_insn * insns,int insns_cnt,const char * license,__u32 kern_version,int * prog_fd)7346 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7347 struct bpf_insn *insns, int insns_cnt,
7348 const char *license, __u32 kern_version, int *prog_fd)
7349 {
7350 LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7351 const char *prog_name = NULL;
7352 char *cp, errmsg[STRERR_BUFSIZE];
7353 size_t log_buf_size = 0;
7354 char *log_buf = NULL, *tmp;
7355 bool own_log_buf = true;
7356 __u32 log_level = prog->log_level;
7357 int ret, err;
7358
7359 /* Be more helpful by rejecting programs that can't be validated early
7360 * with more meaningful and actionable error message.
7361 */
7362 switch (prog->type) {
7363 case BPF_PROG_TYPE_UNSPEC:
7364 /*
7365 * The program type must be set. Most likely we couldn't find a proper
7366 * section definition at load time, and thus we didn't infer the type.
7367 */
7368 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7369 prog->name, prog->sec_name);
7370 return -EINVAL;
7371 case BPF_PROG_TYPE_STRUCT_OPS:
7372 if (prog->attach_btf_id == 0) {
7373 pr_warn("prog '%s': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?\n",
7374 prog->name);
7375 return -EINVAL;
7376 }
7377 break;
7378 default:
7379 break;
7380 }
7381
7382 if (!insns || !insns_cnt)
7383 return -EINVAL;
7384
7385 if (kernel_supports(obj, FEAT_PROG_NAME))
7386 prog_name = prog->name;
7387 load_attr.attach_prog_fd = prog->attach_prog_fd;
7388 load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7389 load_attr.attach_btf_id = prog->attach_btf_id;
7390 load_attr.kern_version = kern_version;
7391 load_attr.prog_ifindex = prog->prog_ifindex;
7392
7393 /* specify func_info/line_info only if kernel supports them */
7394 if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7395 load_attr.prog_btf_fd = btf__fd(obj->btf);
7396 load_attr.func_info = prog->func_info;
7397 load_attr.func_info_rec_size = prog->func_info_rec_size;
7398 load_attr.func_info_cnt = prog->func_info_cnt;
7399 load_attr.line_info = prog->line_info;
7400 load_attr.line_info_rec_size = prog->line_info_rec_size;
7401 load_attr.line_info_cnt = prog->line_info_cnt;
7402 }
7403 load_attr.log_level = log_level;
7404 load_attr.prog_flags = prog->prog_flags;
7405 load_attr.fd_array = obj->fd_array;
7406
7407 load_attr.token_fd = obj->token_fd;
7408 if (obj->token_fd)
7409 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7410
7411 /* adjust load_attr if sec_def provides custom preload callback */
7412 if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7413 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7414 if (err < 0) {
7415 pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7416 prog->name, err);
7417 return err;
7418 }
7419 insns = prog->insns;
7420 insns_cnt = prog->insns_cnt;
7421 }
7422
7423 /* allow prog_prepare_load_fn to change expected_attach_type */
7424 load_attr.expected_attach_type = prog->expected_attach_type;
7425
7426 if (obj->gen_loader) {
7427 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7428 license, insns, insns_cnt, &load_attr,
7429 prog - obj->programs);
7430 *prog_fd = -1;
7431 return 0;
7432 }
7433
7434 retry_load:
7435 /* if log_level is zero, we don't request logs initially even if
7436 * custom log_buf is specified; if the program load fails, then we'll
7437 * bump log_level to 1 and use either custom log_buf or we'll allocate
7438 * our own and retry the load to get details on what failed
7439 */
7440 if (log_level) {
7441 if (prog->log_buf) {
7442 log_buf = prog->log_buf;
7443 log_buf_size = prog->log_size;
7444 own_log_buf = false;
7445 } else if (obj->log_buf) {
7446 log_buf = obj->log_buf;
7447 log_buf_size = obj->log_size;
7448 own_log_buf = false;
7449 } else {
7450 log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7451 tmp = realloc(log_buf, log_buf_size);
7452 if (!tmp) {
7453 ret = -ENOMEM;
7454 goto out;
7455 }
7456 log_buf = tmp;
7457 log_buf[0] = '\0';
7458 own_log_buf = true;
7459 }
7460 }
7461
7462 load_attr.log_buf = log_buf;
7463 load_attr.log_size = log_buf_size;
7464 load_attr.log_level = log_level;
7465
7466 ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7467 if (ret >= 0) {
7468 if (log_level && own_log_buf) {
7469 pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7470 prog->name, log_buf);
7471 }
7472
7473 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7474 struct bpf_map *map;
7475 int i;
7476
7477 for (i = 0; i < obj->nr_maps; i++) {
7478 map = &prog->obj->maps[i];
7479 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7480 continue;
7481
7482 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7483 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7484 pr_warn("prog '%s': failed to bind map '%s': %s\n",
7485 prog->name, map->real_name, cp);
7486 /* Don't fail hard if can't bind rodata. */
7487 }
7488 }
7489 }
7490
7491 *prog_fd = ret;
7492 ret = 0;
7493 goto out;
7494 }
7495
7496 if (log_level == 0) {
7497 log_level = 1;
7498 goto retry_load;
7499 }
7500 /* On ENOSPC, increase log buffer size and retry, unless custom
7501 * log_buf is specified.
7502 * Be careful to not overflow u32, though. Kernel's log buf size limit
7503 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7504 * multiply by 2 unless we are sure we'll fit within 32 bits.
7505 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7506 */
7507 if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7508 goto retry_load;
7509
7510 ret = -errno;
7511
7512 /* post-process verifier log to improve error descriptions */
7513 fixup_verifier_log(prog, log_buf, log_buf_size);
7514
7515 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7516 pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
7517 pr_perm_msg(ret);
7518
7519 if (own_log_buf && log_buf && log_buf[0] != '\0') {
7520 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7521 prog->name, log_buf);
7522 }
7523
7524 out:
7525 if (own_log_buf)
7526 free(log_buf);
7527 return ret;
7528 }
7529
find_prev_line(char * buf,char * cur)7530 static char *find_prev_line(char *buf, char *cur)
7531 {
7532 char *p;
7533
7534 if (cur == buf) /* end of a log buf */
7535 return NULL;
7536
7537 p = cur - 1;
7538 while (p - 1 >= buf && *(p - 1) != '\n')
7539 p--;
7540
7541 return p;
7542 }
7543
patch_log(char * buf,size_t buf_sz,size_t log_sz,char * orig,size_t orig_sz,const char * patch)7544 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7545 char *orig, size_t orig_sz, const char *patch)
7546 {
7547 /* size of the remaining log content to the right from the to-be-replaced part */
7548 size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7549 size_t patch_sz = strlen(patch);
7550
7551 if (patch_sz != orig_sz) {
7552 /* If patch line(s) are longer than original piece of verifier log,
7553 * shift log contents by (patch_sz - orig_sz) bytes to the right
7554 * starting from after to-be-replaced part of the log.
7555 *
7556 * If patch line(s) are shorter than original piece of verifier log,
7557 * shift log contents by (orig_sz - patch_sz) bytes to the left
7558 * starting from after to-be-replaced part of the log
7559 *
7560 * We need to be careful about not overflowing available
7561 * buf_sz capacity. If that's the case, we'll truncate the end
7562 * of the original log, as necessary.
7563 */
7564 if (patch_sz > orig_sz) {
7565 if (orig + patch_sz >= buf + buf_sz) {
7566 /* patch is big enough to cover remaining space completely */
7567 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7568 rem_sz = 0;
7569 } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7570 /* patch causes part of remaining log to be truncated */
7571 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7572 }
7573 }
7574 /* shift remaining log to the right by calculated amount */
7575 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7576 }
7577
7578 memcpy(orig, patch, patch_sz);
7579 }
7580
fixup_log_failed_core_relo(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7581 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7582 char *buf, size_t buf_sz, size_t log_sz,
7583 char *line1, char *line2, char *line3)
7584 {
7585 /* Expected log for failed and not properly guarded CO-RE relocation:
7586 * line1 -> 123: (85) call unknown#195896080
7587 * line2 -> invalid func unknown#195896080
7588 * line3 -> <anything else or end of buffer>
7589 *
7590 * "123" is the index of the instruction that was poisoned. We extract
7591 * instruction index to find corresponding CO-RE relocation and
7592 * replace this part of the log with more relevant information about
7593 * failed CO-RE relocation.
7594 */
7595 const struct bpf_core_relo *relo;
7596 struct bpf_core_spec spec;
7597 char patch[512], spec_buf[256];
7598 int insn_idx, err, spec_len;
7599
7600 if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7601 return;
7602
7603 relo = find_relo_core(prog, insn_idx);
7604 if (!relo)
7605 return;
7606
7607 err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7608 if (err)
7609 return;
7610
7611 spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7612 snprintf(patch, sizeof(patch),
7613 "%d: <invalid CO-RE relocation>\n"
7614 "failed to resolve CO-RE relocation %s%s\n",
7615 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7616
7617 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7618 }
7619
fixup_log_missing_map_load(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7620 static void fixup_log_missing_map_load(struct bpf_program *prog,
7621 char *buf, size_t buf_sz, size_t log_sz,
7622 char *line1, char *line2, char *line3)
7623 {
7624 /* Expected log for failed and not properly guarded map reference:
7625 * line1 -> 123: (85) call unknown#2001000345
7626 * line2 -> invalid func unknown#2001000345
7627 * line3 -> <anything else or end of buffer>
7628 *
7629 * "123" is the index of the instruction that was poisoned.
7630 * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7631 */
7632 struct bpf_object *obj = prog->obj;
7633 const struct bpf_map *map;
7634 int insn_idx, map_idx;
7635 char patch[128];
7636
7637 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7638 return;
7639
7640 map_idx -= POISON_LDIMM64_MAP_BASE;
7641 if (map_idx < 0 || map_idx >= obj->nr_maps)
7642 return;
7643 map = &obj->maps[map_idx];
7644
7645 snprintf(patch, sizeof(patch),
7646 "%d: <invalid BPF map reference>\n"
7647 "BPF map '%s' is referenced but wasn't created\n",
7648 insn_idx, map->name);
7649
7650 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7651 }
7652
fixup_log_missing_kfunc_call(struct bpf_program * prog,char * buf,size_t buf_sz,size_t log_sz,char * line1,char * line2,char * line3)7653 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7654 char *buf, size_t buf_sz, size_t log_sz,
7655 char *line1, char *line2, char *line3)
7656 {
7657 /* Expected log for failed and not properly guarded kfunc call:
7658 * line1 -> 123: (85) call unknown#2002000345
7659 * line2 -> invalid func unknown#2002000345
7660 * line3 -> <anything else or end of buffer>
7661 *
7662 * "123" is the index of the instruction that was poisoned.
7663 * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7664 */
7665 struct bpf_object *obj = prog->obj;
7666 const struct extern_desc *ext;
7667 int insn_idx, ext_idx;
7668 char patch[128];
7669
7670 if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7671 return;
7672
7673 ext_idx -= POISON_CALL_KFUNC_BASE;
7674 if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7675 return;
7676 ext = &obj->externs[ext_idx];
7677
7678 snprintf(patch, sizeof(patch),
7679 "%d: <invalid kfunc call>\n"
7680 "kfunc '%s' is referenced but wasn't resolved\n",
7681 insn_idx, ext->name);
7682
7683 patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7684 }
7685
fixup_verifier_log(struct bpf_program * prog,char * buf,size_t buf_sz)7686 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7687 {
7688 /* look for familiar error patterns in last N lines of the log */
7689 const size_t max_last_line_cnt = 10;
7690 char *prev_line, *cur_line, *next_line;
7691 size_t log_sz;
7692 int i;
7693
7694 if (!buf)
7695 return;
7696
7697 log_sz = strlen(buf) + 1;
7698 next_line = buf + log_sz - 1;
7699
7700 for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7701 cur_line = find_prev_line(buf, next_line);
7702 if (!cur_line)
7703 return;
7704
7705 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7706 prev_line = find_prev_line(buf, cur_line);
7707 if (!prev_line)
7708 continue;
7709
7710 /* failed CO-RE relocation case */
7711 fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7712 prev_line, cur_line, next_line);
7713 return;
7714 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7715 prev_line = find_prev_line(buf, cur_line);
7716 if (!prev_line)
7717 continue;
7718
7719 /* reference to uncreated BPF map */
7720 fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7721 prev_line, cur_line, next_line);
7722 return;
7723 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7724 prev_line = find_prev_line(buf, cur_line);
7725 if (!prev_line)
7726 continue;
7727
7728 /* reference to unresolved kfunc */
7729 fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7730 prev_line, cur_line, next_line);
7731 return;
7732 }
7733 }
7734 }
7735
bpf_program_record_relos(struct bpf_program * prog)7736 static int bpf_program_record_relos(struct bpf_program *prog)
7737 {
7738 struct bpf_object *obj = prog->obj;
7739 int i;
7740
7741 for (i = 0; i < prog->nr_reloc; i++) {
7742 struct reloc_desc *relo = &prog->reloc_desc[i];
7743 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7744 int kind;
7745
7746 switch (relo->type) {
7747 case RELO_EXTERN_LD64:
7748 if (ext->type != EXT_KSYM)
7749 continue;
7750 kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7751 BTF_KIND_VAR : BTF_KIND_FUNC;
7752 bpf_gen__record_extern(obj->gen_loader, ext->name,
7753 ext->is_weak, !ext->ksym.type_id,
7754 true, kind, relo->insn_idx);
7755 break;
7756 case RELO_EXTERN_CALL:
7757 bpf_gen__record_extern(obj->gen_loader, ext->name,
7758 ext->is_weak, false, false, BTF_KIND_FUNC,
7759 relo->insn_idx);
7760 break;
7761 case RELO_CORE: {
7762 struct bpf_core_relo cr = {
7763 .insn_off = relo->insn_idx * 8,
7764 .type_id = relo->core_relo->type_id,
7765 .access_str_off = relo->core_relo->access_str_off,
7766 .kind = relo->core_relo->kind,
7767 };
7768
7769 bpf_gen__record_relo_core(obj->gen_loader, &cr);
7770 break;
7771 }
7772 default:
7773 continue;
7774 }
7775 }
7776 return 0;
7777 }
7778
7779 static int
bpf_object__load_progs(struct bpf_object * obj,int log_level)7780 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7781 {
7782 struct bpf_program *prog;
7783 size_t i;
7784 int err;
7785
7786 for (i = 0; i < obj->nr_programs; i++) {
7787 prog = &obj->programs[i];
7788 err = bpf_object__sanitize_prog(obj, prog);
7789 if (err)
7790 return err;
7791 }
7792
7793 for (i = 0; i < obj->nr_programs; i++) {
7794 prog = &obj->programs[i];
7795 if (prog_is_subprog(obj, prog))
7796 continue;
7797 if (!prog->autoload) {
7798 pr_debug("prog '%s': skipped loading\n", prog->name);
7799 continue;
7800 }
7801 prog->log_level |= log_level;
7802
7803 if (obj->gen_loader)
7804 bpf_program_record_relos(prog);
7805
7806 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7807 obj->license, obj->kern_version, &prog->fd);
7808 if (err) {
7809 pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7810 return err;
7811 }
7812 }
7813
7814 bpf_object__free_relocs(obj);
7815 return 0;
7816 }
7817
7818 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7819
bpf_object_init_progs(struct bpf_object * obj,const struct bpf_object_open_opts * opts)7820 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7821 {
7822 struct bpf_program *prog;
7823 int err;
7824
7825 bpf_object__for_each_program(prog, obj) {
7826 prog->sec_def = find_sec_def(prog->sec_name);
7827 if (!prog->sec_def) {
7828 /* couldn't guess, but user might manually specify */
7829 pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7830 prog->name, prog->sec_name);
7831 continue;
7832 }
7833
7834 prog->type = prog->sec_def->prog_type;
7835 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7836
7837 /* sec_def can have custom callback which should be called
7838 * after bpf_program is initialized to adjust its properties
7839 */
7840 if (prog->sec_def->prog_setup_fn) {
7841 err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7842 if (err < 0) {
7843 pr_warn("prog '%s': failed to initialize: %d\n",
7844 prog->name, err);
7845 return err;
7846 }
7847 }
7848 }
7849
7850 return 0;
7851 }
7852
bpf_object_open(const char * path,const void * obj_buf,size_t obj_buf_sz,const struct bpf_object_open_opts * opts)7853 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7854 const struct bpf_object_open_opts *opts)
7855 {
7856 const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
7857 struct bpf_object *obj;
7858 char tmp_name[64];
7859 int err;
7860 char *log_buf;
7861 size_t log_size;
7862 __u32 log_level;
7863
7864 if (elf_version(EV_CURRENT) == EV_NONE) {
7865 pr_warn("failed to init libelf for %s\n",
7866 path ? : "(mem buf)");
7867 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7868 }
7869
7870 if (!OPTS_VALID(opts, bpf_object_open_opts))
7871 return ERR_PTR(-EINVAL);
7872
7873 obj_name = OPTS_GET(opts, object_name, NULL);
7874 if (obj_buf) {
7875 if (!obj_name) {
7876 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7877 (unsigned long)obj_buf,
7878 (unsigned long)obj_buf_sz);
7879 obj_name = tmp_name;
7880 }
7881 path = obj_name;
7882 pr_debug("loading object '%s' from buffer\n", obj_name);
7883 }
7884
7885 log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7886 log_size = OPTS_GET(opts, kernel_log_size, 0);
7887 log_level = OPTS_GET(opts, kernel_log_level, 0);
7888 if (log_size > UINT_MAX)
7889 return ERR_PTR(-EINVAL);
7890 if (log_size && !log_buf)
7891 return ERR_PTR(-EINVAL);
7892
7893 token_path = OPTS_GET(opts, bpf_token_path, NULL);
7894 /* if user didn't specify bpf_token_path explicitly, check if
7895 * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
7896 * option
7897 */
7898 if (!token_path)
7899 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
7900 if (token_path && strlen(token_path) >= PATH_MAX)
7901 return ERR_PTR(-ENAMETOOLONG);
7902
7903 obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7904 if (IS_ERR(obj))
7905 return obj;
7906
7907 obj->log_buf = log_buf;
7908 obj->log_size = log_size;
7909 obj->log_level = log_level;
7910
7911 if (token_path) {
7912 obj->token_path = strdup(token_path);
7913 if (!obj->token_path) {
7914 err = -ENOMEM;
7915 goto out;
7916 }
7917 }
7918
7919 btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7920 if (btf_tmp_path) {
7921 if (strlen(btf_tmp_path) >= PATH_MAX) {
7922 err = -ENAMETOOLONG;
7923 goto out;
7924 }
7925 obj->btf_custom_path = strdup(btf_tmp_path);
7926 if (!obj->btf_custom_path) {
7927 err = -ENOMEM;
7928 goto out;
7929 }
7930 }
7931
7932 kconfig = OPTS_GET(opts, kconfig, NULL);
7933 if (kconfig) {
7934 obj->kconfig = strdup(kconfig);
7935 if (!obj->kconfig) {
7936 err = -ENOMEM;
7937 goto out;
7938 }
7939 }
7940
7941 err = bpf_object__elf_init(obj);
7942 err = err ? : bpf_object__check_endianness(obj);
7943 err = err ? : bpf_object__elf_collect(obj);
7944 err = err ? : bpf_object__collect_externs(obj);
7945 err = err ? : bpf_object_fixup_btf(obj);
7946 err = err ? : bpf_object__init_maps(obj, opts);
7947 err = err ? : bpf_object_init_progs(obj, opts);
7948 err = err ? : bpf_object__collect_relos(obj);
7949 if (err)
7950 goto out;
7951
7952 bpf_object__elf_finish(obj);
7953
7954 return obj;
7955 out:
7956 bpf_object__close(obj);
7957 return ERR_PTR(err);
7958 }
7959
7960 struct bpf_object *
bpf_object__open_file(const char * path,const struct bpf_object_open_opts * opts)7961 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7962 {
7963 if (!path)
7964 return libbpf_err_ptr(-EINVAL);
7965
7966 pr_debug("loading %s\n", path);
7967
7968 return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7969 }
7970
bpf_object__open(const char * path)7971 struct bpf_object *bpf_object__open(const char *path)
7972 {
7973 return bpf_object__open_file(path, NULL);
7974 }
7975
7976 struct bpf_object *
bpf_object__open_mem(const void * obj_buf,size_t obj_buf_sz,const struct bpf_object_open_opts * opts)7977 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7978 const struct bpf_object_open_opts *opts)
7979 {
7980 if (!obj_buf || obj_buf_sz == 0)
7981 return libbpf_err_ptr(-EINVAL);
7982
7983 return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7984 }
7985
bpf_object_unload(struct bpf_object * obj)7986 static int bpf_object_unload(struct bpf_object *obj)
7987 {
7988 size_t i;
7989
7990 if (!obj)
7991 return libbpf_err(-EINVAL);
7992
7993 for (i = 0; i < obj->nr_maps; i++) {
7994 zclose(obj->maps[i].fd);
7995 if (obj->maps[i].st_ops)
7996 zfree(&obj->maps[i].st_ops->kern_vdata);
7997 }
7998
7999 for (i = 0; i < obj->nr_programs; i++)
8000 bpf_program__unload(&obj->programs[i]);
8001
8002 return 0;
8003 }
8004
bpf_object__sanitize_maps(struct bpf_object * obj)8005 static int bpf_object__sanitize_maps(struct bpf_object *obj)
8006 {
8007 struct bpf_map *m;
8008
8009 bpf_object__for_each_map(m, obj) {
8010 if (!bpf_map__is_internal(m))
8011 continue;
8012 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
8013 m->def.map_flags &= ~BPF_F_MMAPABLE;
8014 }
8015
8016 return 0;
8017 }
8018
libbpf_kallsyms_parse(kallsyms_cb_t cb,void * ctx)8019 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
8020 {
8021 char sym_type, sym_name[500];
8022 unsigned long long sym_addr;
8023 int ret, err = 0;
8024 FILE *f;
8025
8026 f = fopen("/proc/kallsyms", "re");
8027 if (!f) {
8028 err = -errno;
8029 pr_warn("failed to open /proc/kallsyms: %d\n", err);
8030 return err;
8031 }
8032
8033 while (true) {
8034 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
8035 &sym_addr, &sym_type, sym_name);
8036 if (ret == EOF && feof(f))
8037 break;
8038 if (ret != 3) {
8039 pr_warn("failed to read kallsyms entry: %d\n", ret);
8040 err = -EINVAL;
8041 break;
8042 }
8043
8044 err = cb(sym_addr, sym_type, sym_name, ctx);
8045 if (err)
8046 break;
8047 }
8048
8049 fclose(f);
8050 return err;
8051 }
8052
kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)8053 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
8054 const char *sym_name, void *ctx)
8055 {
8056 struct bpf_object *obj = ctx;
8057 const struct btf_type *t;
8058 struct extern_desc *ext;
8059
8060 ext = find_extern_by_name(obj, sym_name);
8061 if (!ext || ext->type != EXT_KSYM)
8062 return 0;
8063
8064 t = btf__type_by_id(obj->btf, ext->btf_id);
8065 if (!btf_is_var(t))
8066 return 0;
8067
8068 if (ext->is_set && ext->ksym.addr != sym_addr) {
8069 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
8070 sym_name, ext->ksym.addr, sym_addr);
8071 return -EINVAL;
8072 }
8073 if (!ext->is_set) {
8074 ext->is_set = true;
8075 ext->ksym.addr = sym_addr;
8076 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
8077 }
8078 return 0;
8079 }
8080
bpf_object__read_kallsyms_file(struct bpf_object * obj)8081 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
8082 {
8083 return libbpf_kallsyms_parse(kallsyms_cb, obj);
8084 }
8085
find_ksym_btf_id(struct bpf_object * obj,const char * ksym_name,__u16 kind,struct btf ** res_btf,struct module_btf ** res_mod_btf)8086 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
8087 __u16 kind, struct btf **res_btf,
8088 struct module_btf **res_mod_btf)
8089 {
8090 struct module_btf *mod_btf;
8091 struct btf *btf;
8092 int i, id, err;
8093
8094 btf = obj->btf_vmlinux;
8095 mod_btf = NULL;
8096 id = btf__find_by_name_kind(btf, ksym_name, kind);
8097
8098 if (id == -ENOENT) {
8099 err = load_module_btfs(obj);
8100 if (err)
8101 return err;
8102
8103 for (i = 0; i < obj->btf_module_cnt; i++) {
8104 /* we assume module_btf's BTF FD is always >0 */
8105 mod_btf = &obj->btf_modules[i];
8106 btf = mod_btf->btf;
8107 id = btf__find_by_name_kind_own(btf, ksym_name, kind);
8108 if (id != -ENOENT)
8109 break;
8110 }
8111 }
8112 if (id <= 0)
8113 return -ESRCH;
8114
8115 *res_btf = btf;
8116 *res_mod_btf = mod_btf;
8117 return id;
8118 }
8119
bpf_object__resolve_ksym_var_btf_id(struct bpf_object * obj,struct extern_desc * ext)8120 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
8121 struct extern_desc *ext)
8122 {
8123 const struct btf_type *targ_var, *targ_type;
8124 __u32 targ_type_id, local_type_id;
8125 struct module_btf *mod_btf = NULL;
8126 const char *targ_var_name;
8127 struct btf *btf = NULL;
8128 int id, err;
8129
8130 id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
8131 if (id < 0) {
8132 if (id == -ESRCH && ext->is_weak)
8133 return 0;
8134 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
8135 ext->name);
8136 return id;
8137 }
8138
8139 /* find local type_id */
8140 local_type_id = ext->ksym.type_id;
8141
8142 /* find target type_id */
8143 targ_var = btf__type_by_id(btf, id);
8144 targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
8145 targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
8146
8147 err = bpf_core_types_are_compat(obj->btf, local_type_id,
8148 btf, targ_type_id);
8149 if (err <= 0) {
8150 const struct btf_type *local_type;
8151 const char *targ_name, *local_name;
8152
8153 local_type = btf__type_by_id(obj->btf, local_type_id);
8154 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
8155 targ_name = btf__name_by_offset(btf, targ_type->name_off);
8156
8157 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
8158 ext->name, local_type_id,
8159 btf_kind_str(local_type), local_name, targ_type_id,
8160 btf_kind_str(targ_type), targ_name);
8161 return -EINVAL;
8162 }
8163
8164 ext->is_set = true;
8165 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8166 ext->ksym.kernel_btf_id = id;
8167 pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
8168 ext->name, id, btf_kind_str(targ_var), targ_var_name);
8169
8170 return 0;
8171 }
8172
bpf_object__resolve_ksym_func_btf_id(struct bpf_object * obj,struct extern_desc * ext)8173 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
8174 struct extern_desc *ext)
8175 {
8176 int local_func_proto_id, kfunc_proto_id, kfunc_id;
8177 struct module_btf *mod_btf = NULL;
8178 const struct btf_type *kern_func;
8179 struct btf *kern_btf = NULL;
8180 int ret;
8181
8182 local_func_proto_id = ext->ksym.type_id;
8183
8184 kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
8185 &mod_btf);
8186 if (kfunc_id < 0) {
8187 if (kfunc_id == -ESRCH && ext->is_weak)
8188 return 0;
8189 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
8190 ext->name);
8191 return kfunc_id;
8192 }
8193
8194 kern_func = btf__type_by_id(kern_btf, kfunc_id);
8195 kfunc_proto_id = kern_func->type;
8196
8197 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
8198 kern_btf, kfunc_proto_id);
8199 if (ret <= 0) {
8200 if (ext->is_weak)
8201 return 0;
8202
8203 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
8204 ext->name, local_func_proto_id,
8205 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
8206 return -EINVAL;
8207 }
8208
8209 /* set index for module BTF fd in fd_array, if unset */
8210 if (mod_btf && !mod_btf->fd_array_idx) {
8211 /* insn->off is s16 */
8212 if (obj->fd_array_cnt == INT16_MAX) {
8213 pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
8214 ext->name, mod_btf->fd_array_idx);
8215 return -E2BIG;
8216 }
8217 /* Cannot use index 0 for module BTF fd */
8218 if (!obj->fd_array_cnt)
8219 obj->fd_array_cnt = 1;
8220
8221 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
8222 obj->fd_array_cnt + 1);
8223 if (ret)
8224 return ret;
8225 mod_btf->fd_array_idx = obj->fd_array_cnt;
8226 /* we assume module BTF FD is always >0 */
8227 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
8228 }
8229
8230 ext->is_set = true;
8231 ext->ksym.kernel_btf_id = kfunc_id;
8232 ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
8233 /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
8234 * populates FD into ld_imm64 insn when it's used to point to kfunc.
8235 * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
8236 * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
8237 */
8238 ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
8239 pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
8240 ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
8241
8242 return 0;
8243 }
8244
bpf_object__resolve_ksyms_btf_id(struct bpf_object * obj)8245 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
8246 {
8247 const struct btf_type *t;
8248 struct extern_desc *ext;
8249 int i, err;
8250
8251 for (i = 0; i < obj->nr_extern; i++) {
8252 ext = &obj->externs[i];
8253 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
8254 continue;
8255
8256 if (obj->gen_loader) {
8257 ext->is_set = true;
8258 ext->ksym.kernel_btf_obj_fd = 0;
8259 ext->ksym.kernel_btf_id = 0;
8260 continue;
8261 }
8262 t = btf__type_by_id(obj->btf, ext->btf_id);
8263 if (btf_is_var(t))
8264 err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
8265 else
8266 err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
8267 if (err)
8268 return err;
8269 }
8270 return 0;
8271 }
8272
bpf_object__resolve_externs(struct bpf_object * obj,const char * extra_kconfig)8273 static int bpf_object__resolve_externs(struct bpf_object *obj,
8274 const char *extra_kconfig)
8275 {
8276 bool need_config = false, need_kallsyms = false;
8277 bool need_vmlinux_btf = false;
8278 struct extern_desc *ext;
8279 void *kcfg_data = NULL;
8280 int err, i;
8281
8282 if (obj->nr_extern == 0)
8283 return 0;
8284
8285 if (obj->kconfig_map_idx >= 0)
8286 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
8287
8288 for (i = 0; i < obj->nr_extern; i++) {
8289 ext = &obj->externs[i];
8290
8291 if (ext->type == EXT_KSYM) {
8292 if (ext->ksym.type_id)
8293 need_vmlinux_btf = true;
8294 else
8295 need_kallsyms = true;
8296 continue;
8297 } else if (ext->type == EXT_KCFG) {
8298 void *ext_ptr = kcfg_data + ext->kcfg.data_off;
8299 __u64 value = 0;
8300
8301 /* Kconfig externs need actual /proc/config.gz */
8302 if (str_has_pfx(ext->name, "CONFIG_")) {
8303 need_config = true;
8304 continue;
8305 }
8306
8307 /* Virtual kcfg externs are customly handled by libbpf */
8308 if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
8309 value = get_kernel_version();
8310 if (!value) {
8311 pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
8312 return -EINVAL;
8313 }
8314 } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
8315 value = kernel_supports(obj, FEAT_BPF_COOKIE);
8316 } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
8317 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
8318 } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
8319 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
8320 * __kconfig externs, where LINUX_ ones are virtual and filled out
8321 * customly by libbpf (their values don't come from Kconfig).
8322 * If LINUX_xxx variable is not recognized by libbpf, but is marked
8323 * __weak, it defaults to zero value, just like for CONFIG_xxx
8324 * externs.
8325 */
8326 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
8327 return -EINVAL;
8328 }
8329
8330 err = set_kcfg_value_num(ext, ext_ptr, value);
8331 if (err)
8332 return err;
8333 pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8334 ext->name, (long long)value);
8335 } else {
8336 pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8337 return -EINVAL;
8338 }
8339 }
8340 if (need_config && extra_kconfig) {
8341 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8342 if (err)
8343 return -EINVAL;
8344 need_config = false;
8345 for (i = 0; i < obj->nr_extern; i++) {
8346 ext = &obj->externs[i];
8347 if (ext->type == EXT_KCFG && !ext->is_set) {
8348 need_config = true;
8349 break;
8350 }
8351 }
8352 }
8353 if (need_config) {
8354 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8355 if (err)
8356 return -EINVAL;
8357 }
8358 if (need_kallsyms) {
8359 err = bpf_object__read_kallsyms_file(obj);
8360 if (err)
8361 return -EINVAL;
8362 }
8363 if (need_vmlinux_btf) {
8364 err = bpf_object__resolve_ksyms_btf_id(obj);
8365 if (err)
8366 return -EINVAL;
8367 }
8368 for (i = 0; i < obj->nr_extern; i++) {
8369 ext = &obj->externs[i];
8370
8371 if (!ext->is_set && !ext->is_weak) {
8372 pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8373 return -ESRCH;
8374 } else if (!ext->is_set) {
8375 pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8376 ext->name);
8377 }
8378 }
8379
8380 return 0;
8381 }
8382
bpf_map_prepare_vdata(const struct bpf_map * map)8383 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8384 {
8385 struct bpf_struct_ops *st_ops;
8386 __u32 i;
8387
8388 st_ops = map->st_ops;
8389 for (i = 0; i < btf_vlen(st_ops->type); i++) {
8390 struct bpf_program *prog = st_ops->progs[i];
8391 void *kern_data;
8392 int prog_fd;
8393
8394 if (!prog)
8395 continue;
8396
8397 prog_fd = bpf_program__fd(prog);
8398 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8399 *(unsigned long *)kern_data = prog_fd;
8400 }
8401 }
8402
bpf_object_prepare_struct_ops(struct bpf_object * obj)8403 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8404 {
8405 struct bpf_map *map;
8406 int i;
8407
8408 for (i = 0; i < obj->nr_maps; i++) {
8409 map = &obj->maps[i];
8410
8411 if (!bpf_map__is_struct_ops(map))
8412 continue;
8413
8414 if (!map->autocreate)
8415 continue;
8416
8417 bpf_map_prepare_vdata(map);
8418 }
8419
8420 return 0;
8421 }
8422
bpf_object_load(struct bpf_object * obj,int extra_log_level,const char * target_btf_path)8423 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8424 {
8425 int err, i;
8426
8427 if (!obj)
8428 return libbpf_err(-EINVAL);
8429
8430 if (obj->loaded) {
8431 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8432 return libbpf_err(-EINVAL);
8433 }
8434
8435 if (obj->gen_loader)
8436 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8437
8438 err = bpf_object_prepare_token(obj);
8439 err = err ? : bpf_object__probe_loading(obj);
8440 err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8441 err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8442 err = err ? : bpf_object__sanitize_maps(obj);
8443 err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8444 err = err ? : bpf_object_adjust_struct_ops_autoload(obj);
8445 err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8446 err = err ? : bpf_object__sanitize_and_load_btf(obj);
8447 err = err ? : bpf_object__create_maps(obj);
8448 err = err ? : bpf_object__load_progs(obj, extra_log_level);
8449 err = err ? : bpf_object_init_prog_arrays(obj);
8450 err = err ? : bpf_object_prepare_struct_ops(obj);
8451
8452 if (obj->gen_loader) {
8453 /* reset FDs */
8454 if (obj->btf)
8455 btf__set_fd(obj->btf, -1);
8456 if (!err)
8457 err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8458 }
8459
8460 /* clean up fd_array */
8461 zfree(&obj->fd_array);
8462
8463 /* clean up module BTFs */
8464 for (i = 0; i < obj->btf_module_cnt; i++) {
8465 close(obj->btf_modules[i].fd);
8466 btf__free(obj->btf_modules[i].btf);
8467 free(obj->btf_modules[i].name);
8468 }
8469 free(obj->btf_modules);
8470
8471 /* clean up vmlinux BTF */
8472 btf__free(obj->btf_vmlinux);
8473 obj->btf_vmlinux = NULL;
8474
8475 obj->loaded = true; /* doesn't matter if successfully or not */
8476
8477 if (err)
8478 goto out;
8479
8480 return 0;
8481 out:
8482 /* unpin any maps that were auto-pinned during load */
8483 for (i = 0; i < obj->nr_maps; i++)
8484 if (obj->maps[i].pinned && !obj->maps[i].reused)
8485 bpf_map__unpin(&obj->maps[i], NULL);
8486
8487 bpf_object_unload(obj);
8488 pr_warn("failed to load object '%s'\n", obj->path);
8489 return libbpf_err(err);
8490 }
8491
bpf_object__load(struct bpf_object * obj)8492 int bpf_object__load(struct bpf_object *obj)
8493 {
8494 return bpf_object_load(obj, 0, NULL);
8495 }
8496
make_parent_dir(const char * path)8497 static int make_parent_dir(const char *path)
8498 {
8499 char *cp, errmsg[STRERR_BUFSIZE];
8500 char *dname, *dir;
8501 int err = 0;
8502
8503 dname = strdup(path);
8504 if (dname == NULL)
8505 return -ENOMEM;
8506
8507 dir = dirname(dname);
8508 if (mkdir(dir, 0700) && errno != EEXIST)
8509 err = -errno;
8510
8511 free(dname);
8512 if (err) {
8513 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8514 pr_warn("failed to mkdir %s: %s\n", path, cp);
8515 }
8516 return err;
8517 }
8518
check_path(const char * path)8519 static int check_path(const char *path)
8520 {
8521 char *cp, errmsg[STRERR_BUFSIZE];
8522 struct statfs st_fs;
8523 char *dname, *dir;
8524 int err = 0;
8525
8526 if (path == NULL)
8527 return -EINVAL;
8528
8529 dname = strdup(path);
8530 if (dname == NULL)
8531 return -ENOMEM;
8532
8533 dir = dirname(dname);
8534 if (statfs(dir, &st_fs)) {
8535 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8536 pr_warn("failed to statfs %s: %s\n", dir, cp);
8537 err = -errno;
8538 }
8539 free(dname);
8540
8541 if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8542 pr_warn("specified path %s is not on BPF FS\n", path);
8543 err = -EINVAL;
8544 }
8545
8546 return err;
8547 }
8548
bpf_program__pin(struct bpf_program * prog,const char * path)8549 int bpf_program__pin(struct bpf_program *prog, const char *path)
8550 {
8551 char *cp, errmsg[STRERR_BUFSIZE];
8552 int err;
8553
8554 if (prog->fd < 0) {
8555 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8556 return libbpf_err(-EINVAL);
8557 }
8558
8559 err = make_parent_dir(path);
8560 if (err)
8561 return libbpf_err(err);
8562
8563 err = check_path(path);
8564 if (err)
8565 return libbpf_err(err);
8566
8567 if (bpf_obj_pin(prog->fd, path)) {
8568 err = -errno;
8569 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8570 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
8571 return libbpf_err(err);
8572 }
8573
8574 pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8575 return 0;
8576 }
8577
bpf_program__unpin(struct bpf_program * prog,const char * path)8578 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8579 {
8580 int err;
8581
8582 if (prog->fd < 0) {
8583 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8584 return libbpf_err(-EINVAL);
8585 }
8586
8587 err = check_path(path);
8588 if (err)
8589 return libbpf_err(err);
8590
8591 err = unlink(path);
8592 if (err)
8593 return libbpf_err(-errno);
8594
8595 pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8596 return 0;
8597 }
8598
bpf_map__pin(struct bpf_map * map,const char * path)8599 int bpf_map__pin(struct bpf_map *map, const char *path)
8600 {
8601 char *cp, errmsg[STRERR_BUFSIZE];
8602 int err;
8603
8604 if (map == NULL) {
8605 pr_warn("invalid map pointer\n");
8606 return libbpf_err(-EINVAL);
8607 }
8608
8609 if (map->fd < 0) {
8610 pr_warn("map '%s': can't pin BPF map without FD (was it created?)\n", map->name);
8611 return libbpf_err(-EINVAL);
8612 }
8613
8614 if (map->pin_path) {
8615 if (path && strcmp(path, map->pin_path)) {
8616 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8617 bpf_map__name(map), map->pin_path, path);
8618 return libbpf_err(-EINVAL);
8619 } else if (map->pinned) {
8620 pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8621 bpf_map__name(map), map->pin_path);
8622 return 0;
8623 }
8624 } else {
8625 if (!path) {
8626 pr_warn("missing a path to pin map '%s' at\n",
8627 bpf_map__name(map));
8628 return libbpf_err(-EINVAL);
8629 } else if (map->pinned) {
8630 pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8631 return libbpf_err(-EEXIST);
8632 }
8633
8634 map->pin_path = strdup(path);
8635 if (!map->pin_path) {
8636 err = -errno;
8637 goto out_err;
8638 }
8639 }
8640
8641 err = make_parent_dir(map->pin_path);
8642 if (err)
8643 return libbpf_err(err);
8644
8645 err = check_path(map->pin_path);
8646 if (err)
8647 return libbpf_err(err);
8648
8649 if (bpf_obj_pin(map->fd, map->pin_path)) {
8650 err = -errno;
8651 goto out_err;
8652 }
8653
8654 map->pinned = true;
8655 pr_debug("pinned map '%s'\n", map->pin_path);
8656
8657 return 0;
8658
8659 out_err:
8660 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8661 pr_warn("failed to pin map: %s\n", cp);
8662 return libbpf_err(err);
8663 }
8664
bpf_map__unpin(struct bpf_map * map,const char * path)8665 int bpf_map__unpin(struct bpf_map *map, const char *path)
8666 {
8667 int err;
8668
8669 if (map == NULL) {
8670 pr_warn("invalid map pointer\n");
8671 return libbpf_err(-EINVAL);
8672 }
8673
8674 if (map->pin_path) {
8675 if (path && strcmp(path, map->pin_path)) {
8676 pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8677 bpf_map__name(map), map->pin_path, path);
8678 return libbpf_err(-EINVAL);
8679 }
8680 path = map->pin_path;
8681 } else if (!path) {
8682 pr_warn("no path to unpin map '%s' from\n",
8683 bpf_map__name(map));
8684 return libbpf_err(-EINVAL);
8685 }
8686
8687 err = check_path(path);
8688 if (err)
8689 return libbpf_err(err);
8690
8691 err = unlink(path);
8692 if (err != 0)
8693 return libbpf_err(-errno);
8694
8695 map->pinned = false;
8696 pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8697
8698 return 0;
8699 }
8700
bpf_map__set_pin_path(struct bpf_map * map,const char * path)8701 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8702 {
8703 char *new = NULL;
8704
8705 if (path) {
8706 new = strdup(path);
8707 if (!new)
8708 return libbpf_err(-errno);
8709 }
8710
8711 free(map->pin_path);
8712 map->pin_path = new;
8713 return 0;
8714 }
8715
8716 __alias(bpf_map__pin_path)
8717 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8718
bpf_map__pin_path(const struct bpf_map * map)8719 const char *bpf_map__pin_path(const struct bpf_map *map)
8720 {
8721 return map->pin_path;
8722 }
8723
bpf_map__is_pinned(const struct bpf_map * map)8724 bool bpf_map__is_pinned(const struct bpf_map *map)
8725 {
8726 return map->pinned;
8727 }
8728
sanitize_pin_path(char * s)8729 static void sanitize_pin_path(char *s)
8730 {
8731 /* bpffs disallows periods in path names */
8732 while (*s) {
8733 if (*s == '.')
8734 *s = '_';
8735 s++;
8736 }
8737 }
8738
bpf_object__pin_maps(struct bpf_object * obj,const char * path)8739 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8740 {
8741 struct bpf_map *map;
8742 int err;
8743
8744 if (!obj)
8745 return libbpf_err(-ENOENT);
8746
8747 if (!obj->loaded) {
8748 pr_warn("object not yet loaded; load it first\n");
8749 return libbpf_err(-ENOENT);
8750 }
8751
8752 bpf_object__for_each_map(map, obj) {
8753 char *pin_path = NULL;
8754 char buf[PATH_MAX];
8755
8756 if (!map->autocreate)
8757 continue;
8758
8759 if (path) {
8760 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8761 if (err)
8762 goto err_unpin_maps;
8763 sanitize_pin_path(buf);
8764 pin_path = buf;
8765 } else if (!map->pin_path) {
8766 continue;
8767 }
8768
8769 err = bpf_map__pin(map, pin_path);
8770 if (err)
8771 goto err_unpin_maps;
8772 }
8773
8774 return 0;
8775
8776 err_unpin_maps:
8777 while ((map = bpf_object__prev_map(obj, map))) {
8778 if (!map->pin_path)
8779 continue;
8780
8781 bpf_map__unpin(map, NULL);
8782 }
8783
8784 return libbpf_err(err);
8785 }
8786
bpf_object__unpin_maps(struct bpf_object * obj,const char * path)8787 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8788 {
8789 struct bpf_map *map;
8790 int err;
8791
8792 if (!obj)
8793 return libbpf_err(-ENOENT);
8794
8795 bpf_object__for_each_map(map, obj) {
8796 char *pin_path = NULL;
8797 char buf[PATH_MAX];
8798
8799 if (path) {
8800 err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8801 if (err)
8802 return libbpf_err(err);
8803 sanitize_pin_path(buf);
8804 pin_path = buf;
8805 } else if (!map->pin_path) {
8806 continue;
8807 }
8808
8809 err = bpf_map__unpin(map, pin_path);
8810 if (err)
8811 return libbpf_err(err);
8812 }
8813
8814 return 0;
8815 }
8816
bpf_object__pin_programs(struct bpf_object * obj,const char * path)8817 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8818 {
8819 struct bpf_program *prog;
8820 char buf[PATH_MAX];
8821 int err;
8822
8823 if (!obj)
8824 return libbpf_err(-ENOENT);
8825
8826 if (!obj->loaded) {
8827 pr_warn("object not yet loaded; load it first\n");
8828 return libbpf_err(-ENOENT);
8829 }
8830
8831 bpf_object__for_each_program(prog, obj) {
8832 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8833 if (err)
8834 goto err_unpin_programs;
8835
8836 err = bpf_program__pin(prog, buf);
8837 if (err)
8838 goto err_unpin_programs;
8839 }
8840
8841 return 0;
8842
8843 err_unpin_programs:
8844 while ((prog = bpf_object__prev_program(obj, prog))) {
8845 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8846 continue;
8847
8848 bpf_program__unpin(prog, buf);
8849 }
8850
8851 return libbpf_err(err);
8852 }
8853
bpf_object__unpin_programs(struct bpf_object * obj,const char * path)8854 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8855 {
8856 struct bpf_program *prog;
8857 int err;
8858
8859 if (!obj)
8860 return libbpf_err(-ENOENT);
8861
8862 bpf_object__for_each_program(prog, obj) {
8863 char buf[PATH_MAX];
8864
8865 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8866 if (err)
8867 return libbpf_err(err);
8868
8869 err = bpf_program__unpin(prog, buf);
8870 if (err)
8871 return libbpf_err(err);
8872 }
8873
8874 return 0;
8875 }
8876
bpf_object__pin(struct bpf_object * obj,const char * path)8877 int bpf_object__pin(struct bpf_object *obj, const char *path)
8878 {
8879 int err;
8880
8881 err = bpf_object__pin_maps(obj, path);
8882 if (err)
8883 return libbpf_err(err);
8884
8885 err = bpf_object__pin_programs(obj, path);
8886 if (err) {
8887 bpf_object__unpin_maps(obj, path);
8888 return libbpf_err(err);
8889 }
8890
8891 return 0;
8892 }
8893
bpf_object__unpin(struct bpf_object * obj,const char * path)8894 int bpf_object__unpin(struct bpf_object *obj, const char *path)
8895 {
8896 int err;
8897
8898 err = bpf_object__unpin_programs(obj, path);
8899 if (err)
8900 return libbpf_err(err);
8901
8902 err = bpf_object__unpin_maps(obj, path);
8903 if (err)
8904 return libbpf_err(err);
8905
8906 return 0;
8907 }
8908
bpf_map__destroy(struct bpf_map * map)8909 static void bpf_map__destroy(struct bpf_map *map)
8910 {
8911 if (map->inner_map) {
8912 bpf_map__destroy(map->inner_map);
8913 zfree(&map->inner_map);
8914 }
8915
8916 zfree(&map->init_slots);
8917 map->init_slots_sz = 0;
8918
8919 if (map->mmaped && map->mmaped != map->obj->arena_data)
8920 munmap(map->mmaped, bpf_map_mmap_sz(map));
8921 map->mmaped = NULL;
8922
8923 if (map->st_ops) {
8924 zfree(&map->st_ops->data);
8925 zfree(&map->st_ops->progs);
8926 zfree(&map->st_ops->kern_func_off);
8927 zfree(&map->st_ops);
8928 }
8929
8930 zfree(&map->name);
8931 zfree(&map->real_name);
8932 zfree(&map->pin_path);
8933
8934 if (map->fd >= 0)
8935 zclose(map->fd);
8936 }
8937
bpf_object__close(struct bpf_object * obj)8938 void bpf_object__close(struct bpf_object *obj)
8939 {
8940 size_t i;
8941
8942 if (IS_ERR_OR_NULL(obj))
8943 return;
8944
8945 usdt_manager_free(obj->usdt_man);
8946 obj->usdt_man = NULL;
8947
8948 bpf_gen__free(obj->gen_loader);
8949 bpf_object__elf_finish(obj);
8950 bpf_object_unload(obj);
8951 btf__free(obj->btf);
8952 btf__free(obj->btf_vmlinux);
8953 btf_ext__free(obj->btf_ext);
8954
8955 for (i = 0; i < obj->nr_maps; i++)
8956 bpf_map__destroy(&obj->maps[i]);
8957
8958 zfree(&obj->btf_custom_path);
8959 zfree(&obj->kconfig);
8960
8961 for (i = 0; i < obj->nr_extern; i++)
8962 zfree(&obj->externs[i].essent_name);
8963
8964 zfree(&obj->externs);
8965 obj->nr_extern = 0;
8966
8967 zfree(&obj->maps);
8968 obj->nr_maps = 0;
8969
8970 if (obj->programs && obj->nr_programs) {
8971 for (i = 0; i < obj->nr_programs; i++)
8972 bpf_program__exit(&obj->programs[i]);
8973 }
8974 zfree(&obj->programs);
8975
8976 zfree(&obj->feat_cache);
8977 zfree(&obj->token_path);
8978 if (obj->token_fd > 0)
8979 close(obj->token_fd);
8980
8981 zfree(&obj->arena_data);
8982
8983 free(obj);
8984 }
8985
bpf_object__name(const struct bpf_object * obj)8986 const char *bpf_object__name(const struct bpf_object *obj)
8987 {
8988 return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8989 }
8990
bpf_object__kversion(const struct bpf_object * obj)8991 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8992 {
8993 return obj ? obj->kern_version : 0;
8994 }
8995
bpf_object__btf(const struct bpf_object * obj)8996 struct btf *bpf_object__btf(const struct bpf_object *obj)
8997 {
8998 return obj ? obj->btf : NULL;
8999 }
9000
bpf_object__btf_fd(const struct bpf_object * obj)9001 int bpf_object__btf_fd(const struct bpf_object *obj)
9002 {
9003 return obj->btf ? btf__fd(obj->btf) : -1;
9004 }
9005
bpf_object__set_kversion(struct bpf_object * obj,__u32 kern_version)9006 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
9007 {
9008 if (obj->loaded)
9009 return libbpf_err(-EINVAL);
9010
9011 obj->kern_version = kern_version;
9012
9013 return 0;
9014 }
9015
bpf_object__gen_loader(struct bpf_object * obj,struct gen_loader_opts * opts)9016 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
9017 {
9018 struct bpf_gen *gen;
9019
9020 if (!opts)
9021 return -EFAULT;
9022 if (!OPTS_VALID(opts, gen_loader_opts))
9023 return -EINVAL;
9024 gen = calloc(sizeof(*gen), 1);
9025 if (!gen)
9026 return -ENOMEM;
9027 gen->opts = opts;
9028 obj->gen_loader = gen;
9029 return 0;
9030 }
9031
9032 static struct bpf_program *
__bpf_program__iter(const struct bpf_program * p,const struct bpf_object * obj,bool forward)9033 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
9034 bool forward)
9035 {
9036 size_t nr_programs = obj->nr_programs;
9037 ssize_t idx;
9038
9039 if (!nr_programs)
9040 return NULL;
9041
9042 if (!p)
9043 /* Iter from the beginning */
9044 return forward ? &obj->programs[0] :
9045 &obj->programs[nr_programs - 1];
9046
9047 if (p->obj != obj) {
9048 pr_warn("error: program handler doesn't match object\n");
9049 return errno = EINVAL, NULL;
9050 }
9051
9052 idx = (p - obj->programs) + (forward ? 1 : -1);
9053 if (idx >= obj->nr_programs || idx < 0)
9054 return NULL;
9055 return &obj->programs[idx];
9056 }
9057
9058 struct bpf_program *
bpf_object__next_program(const struct bpf_object * obj,struct bpf_program * prev)9059 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
9060 {
9061 struct bpf_program *prog = prev;
9062
9063 do {
9064 prog = __bpf_program__iter(prog, obj, true);
9065 } while (prog && prog_is_subprog(obj, prog));
9066
9067 return prog;
9068 }
9069
9070 struct bpf_program *
bpf_object__prev_program(const struct bpf_object * obj,struct bpf_program * next)9071 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
9072 {
9073 struct bpf_program *prog = next;
9074
9075 do {
9076 prog = __bpf_program__iter(prog, obj, false);
9077 } while (prog && prog_is_subprog(obj, prog));
9078
9079 return prog;
9080 }
9081
bpf_program__set_ifindex(struct bpf_program * prog,__u32 ifindex)9082 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
9083 {
9084 prog->prog_ifindex = ifindex;
9085 }
9086
bpf_program__name(const struct bpf_program * prog)9087 const char *bpf_program__name(const struct bpf_program *prog)
9088 {
9089 return prog->name;
9090 }
9091
bpf_program__section_name(const struct bpf_program * prog)9092 const char *bpf_program__section_name(const struct bpf_program *prog)
9093 {
9094 return prog->sec_name;
9095 }
9096
bpf_program__autoload(const struct bpf_program * prog)9097 bool bpf_program__autoload(const struct bpf_program *prog)
9098 {
9099 return prog->autoload;
9100 }
9101
bpf_program__set_autoload(struct bpf_program * prog,bool autoload)9102 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
9103 {
9104 if (prog->obj->loaded)
9105 return libbpf_err(-EINVAL);
9106
9107 prog->autoload = autoload;
9108 return 0;
9109 }
9110
bpf_program__autoattach(const struct bpf_program * prog)9111 bool bpf_program__autoattach(const struct bpf_program *prog)
9112 {
9113 return prog->autoattach;
9114 }
9115
bpf_program__set_autoattach(struct bpf_program * prog,bool autoattach)9116 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
9117 {
9118 prog->autoattach = autoattach;
9119 }
9120
bpf_program__insns(const struct bpf_program * prog)9121 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
9122 {
9123 return prog->insns;
9124 }
9125
bpf_program__insn_cnt(const struct bpf_program * prog)9126 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
9127 {
9128 return prog->insns_cnt;
9129 }
9130
bpf_program__set_insns(struct bpf_program * prog,struct bpf_insn * new_insns,size_t new_insn_cnt)9131 int bpf_program__set_insns(struct bpf_program *prog,
9132 struct bpf_insn *new_insns, size_t new_insn_cnt)
9133 {
9134 struct bpf_insn *insns;
9135
9136 if (prog->obj->loaded)
9137 return -EBUSY;
9138
9139 insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
9140 /* NULL is a valid return from reallocarray if the new count is zero */
9141 if (!insns && new_insn_cnt) {
9142 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
9143 return -ENOMEM;
9144 }
9145 memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
9146
9147 prog->insns = insns;
9148 prog->insns_cnt = new_insn_cnt;
9149 return 0;
9150 }
9151
bpf_program__fd(const struct bpf_program * prog)9152 int bpf_program__fd(const struct bpf_program *prog)
9153 {
9154 if (!prog)
9155 return libbpf_err(-EINVAL);
9156
9157 if (prog->fd < 0)
9158 return libbpf_err(-ENOENT);
9159
9160 return prog->fd;
9161 }
9162
9163 __alias(bpf_program__type)
9164 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
9165
bpf_program__type(const struct bpf_program * prog)9166 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
9167 {
9168 return prog->type;
9169 }
9170
9171 static size_t custom_sec_def_cnt;
9172 static struct bpf_sec_def *custom_sec_defs;
9173 static struct bpf_sec_def custom_fallback_def;
9174 static bool has_custom_fallback_def;
9175 static int last_custom_sec_def_handler_id;
9176
bpf_program__set_type(struct bpf_program * prog,enum bpf_prog_type type)9177 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
9178 {
9179 if (prog->obj->loaded)
9180 return libbpf_err(-EBUSY);
9181
9182 /* if type is not changed, do nothing */
9183 if (prog->type == type)
9184 return 0;
9185
9186 prog->type = type;
9187
9188 /* If a program type was changed, we need to reset associated SEC()
9189 * handler, as it will be invalid now. The only exception is a generic
9190 * fallback handler, which by definition is program type-agnostic and
9191 * is a catch-all custom handler, optionally set by the application,
9192 * so should be able to handle any type of BPF program.
9193 */
9194 if (prog->sec_def != &custom_fallback_def)
9195 prog->sec_def = NULL;
9196 return 0;
9197 }
9198
9199 __alias(bpf_program__expected_attach_type)
9200 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
9201
bpf_program__expected_attach_type(const struct bpf_program * prog)9202 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
9203 {
9204 return prog->expected_attach_type;
9205 }
9206
bpf_program__set_expected_attach_type(struct bpf_program * prog,enum bpf_attach_type type)9207 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
9208 enum bpf_attach_type type)
9209 {
9210 if (prog->obj->loaded)
9211 return libbpf_err(-EBUSY);
9212
9213 prog->expected_attach_type = type;
9214 return 0;
9215 }
9216
bpf_program__flags(const struct bpf_program * prog)9217 __u32 bpf_program__flags(const struct bpf_program *prog)
9218 {
9219 return prog->prog_flags;
9220 }
9221
bpf_program__set_flags(struct bpf_program * prog,__u32 flags)9222 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
9223 {
9224 if (prog->obj->loaded)
9225 return libbpf_err(-EBUSY);
9226
9227 prog->prog_flags = flags;
9228 return 0;
9229 }
9230
bpf_program__log_level(const struct bpf_program * prog)9231 __u32 bpf_program__log_level(const struct bpf_program *prog)
9232 {
9233 return prog->log_level;
9234 }
9235
bpf_program__set_log_level(struct bpf_program * prog,__u32 log_level)9236 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
9237 {
9238 if (prog->obj->loaded)
9239 return libbpf_err(-EBUSY);
9240
9241 prog->log_level = log_level;
9242 return 0;
9243 }
9244
bpf_program__log_buf(const struct bpf_program * prog,size_t * log_size)9245 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
9246 {
9247 *log_size = prog->log_size;
9248 return prog->log_buf;
9249 }
9250
bpf_program__set_log_buf(struct bpf_program * prog,char * log_buf,size_t log_size)9251 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
9252 {
9253 if (log_size && !log_buf)
9254 return -EINVAL;
9255 if (prog->log_size > UINT_MAX)
9256 return -EINVAL;
9257 if (prog->obj->loaded)
9258 return -EBUSY;
9259
9260 prog->log_buf = log_buf;
9261 prog->log_size = log_size;
9262 return 0;
9263 }
9264
9265 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) { \
9266 .sec = (char *)sec_pfx, \
9267 .prog_type = BPF_PROG_TYPE_##ptype, \
9268 .expected_attach_type = atype, \
9269 .cookie = (long)(flags), \
9270 .prog_prepare_load_fn = libbpf_prepare_prog_load, \
9271 __VA_ARGS__ \
9272 }
9273
9274 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9275 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9276 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9277 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9278 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9279 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9280 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9281 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9282 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9283 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9284 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
9285
9286 static const struct bpf_sec_def section_defs[] = {
9287 SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE),
9288 SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
9289 SEC_DEF("sk_reuseport", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
9290 SEC_DEF("kprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9291 SEC_DEF("uprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9292 SEC_DEF("uprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9293 SEC_DEF("kretprobe+", KPROBE, 0, SEC_NONE, attach_kprobe),
9294 SEC_DEF("uretprobe+", KPROBE, 0, SEC_NONE, attach_uprobe),
9295 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
9296 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9297 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
9298 SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9299 SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
9300 SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9301 SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
9302 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9303 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall),
9304 SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt),
9305 SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
9306 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
9307 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */
9308 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
9309 SEC_DEF("tcx/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
9310 SEC_DEF("tc", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9311 SEC_DEF("classifier", SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9312 SEC_DEF("action", SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
9313 SEC_DEF("netkit/primary", SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
9314 SEC_DEF("netkit/peer", SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
9315 SEC_DEF("tracepoint+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9316 SEC_DEF("tp+", TRACEPOINT, 0, SEC_NONE, attach_tp),
9317 SEC_DEF("raw_tracepoint+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9318 SEC_DEF("raw_tp+", RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
9319 SEC_DEF("raw_tracepoint.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9320 SEC_DEF("raw_tp.w+", RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
9321 SEC_DEF("tp_btf+", TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
9322 SEC_DEF("fentry+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
9323 SEC_DEF("fmod_ret+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
9324 SEC_DEF("fexit+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
9325 SEC_DEF("fentry.s+", TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9326 SEC_DEF("fmod_ret.s+", TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9327 SEC_DEF("fexit.s+", TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
9328 SEC_DEF("freplace+", EXT, 0, SEC_ATTACH_BTF, attach_trace),
9329 SEC_DEF("lsm+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
9330 SEC_DEF("lsm.s+", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
9331 SEC_DEF("lsm_cgroup+", LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
9332 SEC_DEF("iter+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
9333 SEC_DEF("iter.s+", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
9334 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE),
9335 SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
9336 SEC_DEF("xdp/devmap", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
9337 SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
9338 SEC_DEF("xdp/cpumap", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
9339 SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS),
9340 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
9341 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE),
9342 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE),
9343 SEC_DEF("lwt_out", LWT_OUT, 0, SEC_NONE),
9344 SEC_DEF("lwt_xmit", LWT_XMIT, 0, SEC_NONE),
9345 SEC_DEF("lwt_seg6local", LWT_SEG6LOCAL, 0, SEC_NONE),
9346 SEC_DEF("sockops", SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9347 SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9348 SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9349 SEC_DEF("sk_skb/verdict", SK_SKB, BPF_SK_SKB_VERDICT, SEC_ATTACHABLE_OPT),
9350 SEC_DEF("sk_skb", SK_SKB, 0, SEC_NONE),
9351 SEC_DEF("sk_msg", SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9352 SEC_DEF("lirc_mode2", LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9353 SEC_DEF("flow_dissector", FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9354 SEC_DEF("cgroup_skb/ingress", CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9355 SEC_DEF("cgroup_skb/egress", CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9356 SEC_DEF("cgroup/skb", CGROUP_SKB, 0, SEC_NONE),
9357 SEC_DEF("cgroup/sock_create", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9358 SEC_DEF("cgroup/sock_release", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9359 SEC_DEF("cgroup/sock", CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9360 SEC_DEF("cgroup/post_bind4", CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9361 SEC_DEF("cgroup/post_bind6", CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9362 SEC_DEF("cgroup/bind4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9363 SEC_DEF("cgroup/bind6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9364 SEC_DEF("cgroup/connect4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9365 SEC_DEF("cgroup/connect6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9366 SEC_DEF("cgroup/connect_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9367 SEC_DEF("cgroup/sendmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9368 SEC_DEF("cgroup/sendmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9369 SEC_DEF("cgroup/sendmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9370 SEC_DEF("cgroup/recvmsg4", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9371 SEC_DEF("cgroup/recvmsg6", CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9372 SEC_DEF("cgroup/recvmsg_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9373 SEC_DEF("cgroup/getpeername4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9374 SEC_DEF("cgroup/getpeername6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9375 SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9376 SEC_DEF("cgroup/getsockname4", CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9377 SEC_DEF("cgroup/getsockname6", CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9378 SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9379 SEC_DEF("cgroup/sysctl", CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9380 SEC_DEF("cgroup/getsockopt", CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9381 SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9382 SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9383 SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
9384 SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
9385 SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9386 SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE),
9387 };
9388
libbpf_register_prog_handler(const char * sec,enum bpf_prog_type prog_type,enum bpf_attach_type exp_attach_type,const struct libbpf_prog_handler_opts * opts)9389 int libbpf_register_prog_handler(const char *sec,
9390 enum bpf_prog_type prog_type,
9391 enum bpf_attach_type exp_attach_type,
9392 const struct libbpf_prog_handler_opts *opts)
9393 {
9394 struct bpf_sec_def *sec_def;
9395
9396 if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9397 return libbpf_err(-EINVAL);
9398
9399 if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9400 return libbpf_err(-E2BIG);
9401
9402 if (sec) {
9403 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9404 sizeof(*sec_def));
9405 if (!sec_def)
9406 return libbpf_err(-ENOMEM);
9407
9408 custom_sec_defs = sec_def;
9409 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9410 } else {
9411 if (has_custom_fallback_def)
9412 return libbpf_err(-EBUSY);
9413
9414 sec_def = &custom_fallback_def;
9415 }
9416
9417 sec_def->sec = sec ? strdup(sec) : NULL;
9418 if (sec && !sec_def->sec)
9419 return libbpf_err(-ENOMEM);
9420
9421 sec_def->prog_type = prog_type;
9422 sec_def->expected_attach_type = exp_attach_type;
9423 sec_def->cookie = OPTS_GET(opts, cookie, 0);
9424
9425 sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9426 sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9427 sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9428
9429 sec_def->handler_id = ++last_custom_sec_def_handler_id;
9430
9431 if (sec)
9432 custom_sec_def_cnt++;
9433 else
9434 has_custom_fallback_def = true;
9435
9436 return sec_def->handler_id;
9437 }
9438
libbpf_unregister_prog_handler(int handler_id)9439 int libbpf_unregister_prog_handler(int handler_id)
9440 {
9441 struct bpf_sec_def *sec_defs;
9442 int i;
9443
9444 if (handler_id <= 0)
9445 return libbpf_err(-EINVAL);
9446
9447 if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9448 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9449 has_custom_fallback_def = false;
9450 return 0;
9451 }
9452
9453 for (i = 0; i < custom_sec_def_cnt; i++) {
9454 if (custom_sec_defs[i].handler_id == handler_id)
9455 break;
9456 }
9457
9458 if (i == custom_sec_def_cnt)
9459 return libbpf_err(-ENOENT);
9460
9461 free(custom_sec_defs[i].sec);
9462 for (i = i + 1; i < custom_sec_def_cnt; i++)
9463 custom_sec_defs[i - 1] = custom_sec_defs[i];
9464 custom_sec_def_cnt--;
9465
9466 /* try to shrink the array, but it's ok if we couldn't */
9467 sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9468 /* if new count is zero, reallocarray can return a valid NULL result;
9469 * in this case the previous pointer will be freed, so we *have to*
9470 * reassign old pointer to the new value (even if it's NULL)
9471 */
9472 if (sec_defs || custom_sec_def_cnt == 0)
9473 custom_sec_defs = sec_defs;
9474
9475 return 0;
9476 }
9477
sec_def_matches(const struct bpf_sec_def * sec_def,const char * sec_name)9478 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9479 {
9480 size_t len = strlen(sec_def->sec);
9481
9482 /* "type/" always has to have proper SEC("type/extras") form */
9483 if (sec_def->sec[len - 1] == '/') {
9484 if (str_has_pfx(sec_name, sec_def->sec))
9485 return true;
9486 return false;
9487 }
9488
9489 /* "type+" means it can be either exact SEC("type") or
9490 * well-formed SEC("type/extras") with proper '/' separator
9491 */
9492 if (sec_def->sec[len - 1] == '+') {
9493 len--;
9494 /* not even a prefix */
9495 if (strncmp(sec_name, sec_def->sec, len) != 0)
9496 return false;
9497 /* exact match or has '/' separator */
9498 if (sec_name[len] == '\0' || sec_name[len] == '/')
9499 return true;
9500 return false;
9501 }
9502
9503 return strcmp(sec_name, sec_def->sec) == 0;
9504 }
9505
find_sec_def(const char * sec_name)9506 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9507 {
9508 const struct bpf_sec_def *sec_def;
9509 int i, n;
9510
9511 n = custom_sec_def_cnt;
9512 for (i = 0; i < n; i++) {
9513 sec_def = &custom_sec_defs[i];
9514 if (sec_def_matches(sec_def, sec_name))
9515 return sec_def;
9516 }
9517
9518 n = ARRAY_SIZE(section_defs);
9519 for (i = 0; i < n; i++) {
9520 sec_def = §ion_defs[i];
9521 if (sec_def_matches(sec_def, sec_name))
9522 return sec_def;
9523 }
9524
9525 if (has_custom_fallback_def)
9526 return &custom_fallback_def;
9527
9528 return NULL;
9529 }
9530
9531 #define MAX_TYPE_NAME_SIZE 32
9532
libbpf_get_type_names(bool attach_type)9533 static char *libbpf_get_type_names(bool attach_type)
9534 {
9535 int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9536 char *buf;
9537
9538 buf = malloc(len);
9539 if (!buf)
9540 return NULL;
9541
9542 buf[0] = '\0';
9543 /* Forge string buf with all available names */
9544 for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9545 const struct bpf_sec_def *sec_def = §ion_defs[i];
9546
9547 if (attach_type) {
9548 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9549 continue;
9550
9551 if (!(sec_def->cookie & SEC_ATTACHABLE))
9552 continue;
9553 }
9554
9555 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9556 free(buf);
9557 return NULL;
9558 }
9559 strcat(buf, " ");
9560 strcat(buf, section_defs[i].sec);
9561 }
9562
9563 return buf;
9564 }
9565
libbpf_prog_type_by_name(const char * name,enum bpf_prog_type * prog_type,enum bpf_attach_type * expected_attach_type)9566 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9567 enum bpf_attach_type *expected_attach_type)
9568 {
9569 const struct bpf_sec_def *sec_def;
9570 char *type_names;
9571
9572 if (!name)
9573 return libbpf_err(-EINVAL);
9574
9575 sec_def = find_sec_def(name);
9576 if (sec_def) {
9577 *prog_type = sec_def->prog_type;
9578 *expected_attach_type = sec_def->expected_attach_type;
9579 return 0;
9580 }
9581
9582 pr_debug("failed to guess program type from ELF section '%s'\n", name);
9583 type_names = libbpf_get_type_names(false);
9584 if (type_names != NULL) {
9585 pr_debug("supported section(type) names are:%s\n", type_names);
9586 free(type_names);
9587 }
9588
9589 return libbpf_err(-ESRCH);
9590 }
9591
libbpf_bpf_attach_type_str(enum bpf_attach_type t)9592 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9593 {
9594 if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9595 return NULL;
9596
9597 return attach_type_name[t];
9598 }
9599
libbpf_bpf_link_type_str(enum bpf_link_type t)9600 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9601 {
9602 if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9603 return NULL;
9604
9605 return link_type_name[t];
9606 }
9607
libbpf_bpf_map_type_str(enum bpf_map_type t)9608 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9609 {
9610 if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9611 return NULL;
9612
9613 return map_type_name[t];
9614 }
9615
libbpf_bpf_prog_type_str(enum bpf_prog_type t)9616 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9617 {
9618 if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9619 return NULL;
9620
9621 return prog_type_name[t];
9622 }
9623
find_struct_ops_map_by_offset(struct bpf_object * obj,int sec_idx,size_t offset)9624 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9625 int sec_idx,
9626 size_t offset)
9627 {
9628 struct bpf_map *map;
9629 size_t i;
9630
9631 for (i = 0; i < obj->nr_maps; i++) {
9632 map = &obj->maps[i];
9633 if (!bpf_map__is_struct_ops(map))
9634 continue;
9635 if (map->sec_idx == sec_idx &&
9636 map->sec_offset <= offset &&
9637 offset - map->sec_offset < map->def.value_size)
9638 return map;
9639 }
9640
9641 return NULL;
9642 }
9643
9644 /* Collect the reloc from ELF, populate the st_ops->progs[], and update
9645 * st_ops->data for shadow type.
9646 */
bpf_object__collect_st_ops_relos(struct bpf_object * obj,Elf64_Shdr * shdr,Elf_Data * data)9647 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9648 Elf64_Shdr *shdr, Elf_Data *data)
9649 {
9650 const struct btf_member *member;
9651 struct bpf_struct_ops *st_ops;
9652 struct bpf_program *prog;
9653 unsigned int shdr_idx;
9654 const struct btf *btf;
9655 struct bpf_map *map;
9656 unsigned int moff, insn_idx;
9657 const char *name;
9658 __u32 member_idx;
9659 Elf64_Sym *sym;
9660 Elf64_Rel *rel;
9661 int i, nrels;
9662
9663 btf = obj->btf;
9664 nrels = shdr->sh_size / shdr->sh_entsize;
9665 for (i = 0; i < nrels; i++) {
9666 rel = elf_rel_by_idx(data, i);
9667 if (!rel) {
9668 pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9669 return -LIBBPF_ERRNO__FORMAT;
9670 }
9671
9672 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9673 if (!sym) {
9674 pr_warn("struct_ops reloc: symbol %zx not found\n",
9675 (size_t)ELF64_R_SYM(rel->r_info));
9676 return -LIBBPF_ERRNO__FORMAT;
9677 }
9678
9679 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9680 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9681 if (!map) {
9682 pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9683 (size_t)rel->r_offset);
9684 return -EINVAL;
9685 }
9686
9687 moff = rel->r_offset - map->sec_offset;
9688 shdr_idx = sym->st_shndx;
9689 st_ops = map->st_ops;
9690 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9691 map->name,
9692 (long long)(rel->r_info >> 32),
9693 (long long)sym->st_value,
9694 shdr_idx, (size_t)rel->r_offset,
9695 map->sec_offset, sym->st_name, name);
9696
9697 if (shdr_idx >= SHN_LORESERVE) {
9698 pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9699 map->name, (size_t)rel->r_offset, shdr_idx);
9700 return -LIBBPF_ERRNO__RELOC;
9701 }
9702 if (sym->st_value % BPF_INSN_SZ) {
9703 pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9704 map->name, (unsigned long long)sym->st_value);
9705 return -LIBBPF_ERRNO__FORMAT;
9706 }
9707 insn_idx = sym->st_value / BPF_INSN_SZ;
9708
9709 member = find_member_by_offset(st_ops->type, moff * 8);
9710 if (!member) {
9711 pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9712 map->name, moff);
9713 return -EINVAL;
9714 }
9715 member_idx = member - btf_members(st_ops->type);
9716 name = btf__name_by_offset(btf, member->name_off);
9717
9718 if (!resolve_func_ptr(btf, member->type, NULL)) {
9719 pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9720 map->name, name);
9721 return -EINVAL;
9722 }
9723
9724 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9725 if (!prog) {
9726 pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9727 map->name, shdr_idx, name);
9728 return -EINVAL;
9729 }
9730
9731 /* prevent the use of BPF prog with invalid type */
9732 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9733 pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9734 map->name, prog->name);
9735 return -EINVAL;
9736 }
9737
9738 st_ops->progs[member_idx] = prog;
9739
9740 /* st_ops->data will be exposed to users, being returned by
9741 * bpf_map__initial_value() as a pointer to the shadow
9742 * type. All function pointers in the original struct type
9743 * should be converted to a pointer to struct bpf_program
9744 * in the shadow type.
9745 */
9746 *((struct bpf_program **)(st_ops->data + moff)) = prog;
9747 }
9748
9749 return 0;
9750 }
9751
9752 #define BTF_TRACE_PREFIX "btf_trace_"
9753 #define BTF_LSM_PREFIX "bpf_lsm_"
9754 #define BTF_ITER_PREFIX "bpf_iter_"
9755 #define BTF_MAX_NAME_SIZE 128
9756
btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,const char ** prefix,int * kind)9757 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9758 const char **prefix, int *kind)
9759 {
9760 switch (attach_type) {
9761 case BPF_TRACE_RAW_TP:
9762 *prefix = BTF_TRACE_PREFIX;
9763 *kind = BTF_KIND_TYPEDEF;
9764 break;
9765 case BPF_LSM_MAC:
9766 case BPF_LSM_CGROUP:
9767 *prefix = BTF_LSM_PREFIX;
9768 *kind = BTF_KIND_FUNC;
9769 break;
9770 case BPF_TRACE_ITER:
9771 *prefix = BTF_ITER_PREFIX;
9772 *kind = BTF_KIND_FUNC;
9773 break;
9774 default:
9775 *prefix = "";
9776 *kind = BTF_KIND_FUNC;
9777 }
9778 }
9779
find_btf_by_prefix_kind(const struct btf * btf,const char * prefix,const char * name,__u32 kind)9780 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9781 const char *name, __u32 kind)
9782 {
9783 char btf_type_name[BTF_MAX_NAME_SIZE];
9784 int ret;
9785
9786 ret = snprintf(btf_type_name, sizeof(btf_type_name),
9787 "%s%s", prefix, name);
9788 /* snprintf returns the number of characters written excluding the
9789 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9790 * indicates truncation.
9791 */
9792 if (ret < 0 || ret >= sizeof(btf_type_name))
9793 return -ENAMETOOLONG;
9794 return btf__find_by_name_kind(btf, btf_type_name, kind);
9795 }
9796
find_attach_btf_id(struct btf * btf,const char * name,enum bpf_attach_type attach_type)9797 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9798 enum bpf_attach_type attach_type)
9799 {
9800 const char *prefix;
9801 int kind;
9802
9803 btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9804 return find_btf_by_prefix_kind(btf, prefix, name, kind);
9805 }
9806
libbpf_find_vmlinux_btf_id(const char * name,enum bpf_attach_type attach_type)9807 int libbpf_find_vmlinux_btf_id(const char *name,
9808 enum bpf_attach_type attach_type)
9809 {
9810 struct btf *btf;
9811 int err;
9812
9813 btf = btf__load_vmlinux_btf();
9814 err = libbpf_get_error(btf);
9815 if (err) {
9816 pr_warn("vmlinux BTF is not found\n");
9817 return libbpf_err(err);
9818 }
9819
9820 err = find_attach_btf_id(btf, name, attach_type);
9821 if (err <= 0)
9822 pr_warn("%s is not found in vmlinux BTF\n", name);
9823
9824 btf__free(btf);
9825 return libbpf_err(err);
9826 }
9827
libbpf_find_prog_btf_id(const char * name,__u32 attach_prog_fd)9828 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9829 {
9830 struct bpf_prog_info info;
9831 __u32 info_len = sizeof(info);
9832 struct btf *btf;
9833 int err;
9834
9835 memset(&info, 0, info_len);
9836 err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9837 if (err) {
9838 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9839 attach_prog_fd, err);
9840 return err;
9841 }
9842
9843 err = -EINVAL;
9844 if (!info.btf_id) {
9845 pr_warn("The target program doesn't have BTF\n");
9846 goto out;
9847 }
9848 btf = btf__load_from_kernel_by_id(info.btf_id);
9849 err = libbpf_get_error(btf);
9850 if (err) {
9851 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9852 goto out;
9853 }
9854 err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9855 btf__free(btf);
9856 if (err <= 0) {
9857 pr_warn("%s is not found in prog's BTF\n", name);
9858 goto out;
9859 }
9860 out:
9861 return err;
9862 }
9863
find_kernel_btf_id(struct bpf_object * obj,const char * attach_name,enum bpf_attach_type attach_type,int * btf_obj_fd,int * btf_type_id)9864 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9865 enum bpf_attach_type attach_type,
9866 int *btf_obj_fd, int *btf_type_id)
9867 {
9868 int ret, i;
9869
9870 ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9871 if (ret > 0) {
9872 *btf_obj_fd = 0; /* vmlinux BTF */
9873 *btf_type_id = ret;
9874 return 0;
9875 }
9876 if (ret != -ENOENT)
9877 return ret;
9878
9879 ret = load_module_btfs(obj);
9880 if (ret)
9881 return ret;
9882
9883 for (i = 0; i < obj->btf_module_cnt; i++) {
9884 const struct module_btf *mod = &obj->btf_modules[i];
9885
9886 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9887 if (ret > 0) {
9888 *btf_obj_fd = mod->fd;
9889 *btf_type_id = ret;
9890 return 0;
9891 }
9892 if (ret == -ENOENT)
9893 continue;
9894
9895 return ret;
9896 }
9897
9898 return -ESRCH;
9899 }
9900
libbpf_find_attach_btf_id(struct bpf_program * prog,const char * attach_name,int * btf_obj_fd,int * btf_type_id)9901 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9902 int *btf_obj_fd, int *btf_type_id)
9903 {
9904 enum bpf_attach_type attach_type = prog->expected_attach_type;
9905 __u32 attach_prog_fd = prog->attach_prog_fd;
9906 int err = 0;
9907
9908 /* BPF program's BTF ID */
9909 if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9910 if (!attach_prog_fd) {
9911 pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9912 return -EINVAL;
9913 }
9914 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9915 if (err < 0) {
9916 pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9917 prog->name, attach_prog_fd, attach_name, err);
9918 return err;
9919 }
9920 *btf_obj_fd = 0;
9921 *btf_type_id = err;
9922 return 0;
9923 }
9924
9925 /* kernel/module BTF ID */
9926 if (prog->obj->gen_loader) {
9927 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9928 *btf_obj_fd = 0;
9929 *btf_type_id = 1;
9930 } else {
9931 err = find_kernel_btf_id(prog->obj, attach_name,
9932 attach_type, btf_obj_fd,
9933 btf_type_id);
9934 }
9935 if (err) {
9936 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9937 prog->name, attach_name, err);
9938 return err;
9939 }
9940 return 0;
9941 }
9942
libbpf_attach_type_by_name(const char * name,enum bpf_attach_type * attach_type)9943 int libbpf_attach_type_by_name(const char *name,
9944 enum bpf_attach_type *attach_type)
9945 {
9946 char *type_names;
9947 const struct bpf_sec_def *sec_def;
9948
9949 if (!name)
9950 return libbpf_err(-EINVAL);
9951
9952 sec_def = find_sec_def(name);
9953 if (!sec_def) {
9954 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9955 type_names = libbpf_get_type_names(true);
9956 if (type_names != NULL) {
9957 pr_debug("attachable section(type) names are:%s\n", type_names);
9958 free(type_names);
9959 }
9960
9961 return libbpf_err(-EINVAL);
9962 }
9963
9964 if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9965 return libbpf_err(-EINVAL);
9966 if (!(sec_def->cookie & SEC_ATTACHABLE))
9967 return libbpf_err(-EINVAL);
9968
9969 *attach_type = sec_def->expected_attach_type;
9970 return 0;
9971 }
9972
bpf_map__fd(const struct bpf_map * map)9973 int bpf_map__fd(const struct bpf_map *map)
9974 {
9975 if (!map)
9976 return libbpf_err(-EINVAL);
9977 if (!map_is_created(map))
9978 return -1;
9979 return map->fd;
9980 }
9981
map_uses_real_name(const struct bpf_map * map)9982 static bool map_uses_real_name(const struct bpf_map *map)
9983 {
9984 /* Since libbpf started to support custom .data.* and .rodata.* maps,
9985 * their user-visible name differs from kernel-visible name. Users see
9986 * such map's corresponding ELF section name as a map name.
9987 * This check distinguishes .data/.rodata from .data.* and .rodata.*
9988 * maps to know which name has to be returned to the user.
9989 */
9990 if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9991 return true;
9992 if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9993 return true;
9994 return false;
9995 }
9996
bpf_map__name(const struct bpf_map * map)9997 const char *bpf_map__name(const struct bpf_map *map)
9998 {
9999 if (!map)
10000 return NULL;
10001
10002 if (map_uses_real_name(map))
10003 return map->real_name;
10004
10005 return map->name;
10006 }
10007
bpf_map__type(const struct bpf_map * map)10008 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
10009 {
10010 return map->def.type;
10011 }
10012
bpf_map__set_type(struct bpf_map * map,enum bpf_map_type type)10013 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
10014 {
10015 if (map_is_created(map))
10016 return libbpf_err(-EBUSY);
10017 map->def.type = type;
10018 return 0;
10019 }
10020
bpf_map__map_flags(const struct bpf_map * map)10021 __u32 bpf_map__map_flags(const struct bpf_map *map)
10022 {
10023 return map->def.map_flags;
10024 }
10025
bpf_map__set_map_flags(struct bpf_map * map,__u32 flags)10026 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
10027 {
10028 if (map_is_created(map))
10029 return libbpf_err(-EBUSY);
10030 map->def.map_flags = flags;
10031 return 0;
10032 }
10033
bpf_map__map_extra(const struct bpf_map * map)10034 __u64 bpf_map__map_extra(const struct bpf_map *map)
10035 {
10036 return map->map_extra;
10037 }
10038
bpf_map__set_map_extra(struct bpf_map * map,__u64 map_extra)10039 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
10040 {
10041 if (map_is_created(map))
10042 return libbpf_err(-EBUSY);
10043 map->map_extra = map_extra;
10044 return 0;
10045 }
10046
bpf_map__numa_node(const struct bpf_map * map)10047 __u32 bpf_map__numa_node(const struct bpf_map *map)
10048 {
10049 return map->numa_node;
10050 }
10051
bpf_map__set_numa_node(struct bpf_map * map,__u32 numa_node)10052 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
10053 {
10054 if (map_is_created(map))
10055 return libbpf_err(-EBUSY);
10056 map->numa_node = numa_node;
10057 return 0;
10058 }
10059
bpf_map__key_size(const struct bpf_map * map)10060 __u32 bpf_map__key_size(const struct bpf_map *map)
10061 {
10062 return map->def.key_size;
10063 }
10064
bpf_map__set_key_size(struct bpf_map * map,__u32 size)10065 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
10066 {
10067 if (map_is_created(map))
10068 return libbpf_err(-EBUSY);
10069 map->def.key_size = size;
10070 return 0;
10071 }
10072
bpf_map__value_size(const struct bpf_map * map)10073 __u32 bpf_map__value_size(const struct bpf_map *map)
10074 {
10075 return map->def.value_size;
10076 }
10077
map_btf_datasec_resize(struct bpf_map * map,__u32 size)10078 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
10079 {
10080 struct btf *btf;
10081 struct btf_type *datasec_type, *var_type;
10082 struct btf_var_secinfo *var;
10083 const struct btf_type *array_type;
10084 const struct btf_array *array;
10085 int vlen, element_sz, new_array_id;
10086 __u32 nr_elements;
10087
10088 /* check btf existence */
10089 btf = bpf_object__btf(map->obj);
10090 if (!btf)
10091 return -ENOENT;
10092
10093 /* verify map is datasec */
10094 datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
10095 if (!btf_is_datasec(datasec_type)) {
10096 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
10097 bpf_map__name(map));
10098 return -EINVAL;
10099 }
10100
10101 /* verify datasec has at least one var */
10102 vlen = btf_vlen(datasec_type);
10103 if (vlen == 0) {
10104 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
10105 bpf_map__name(map));
10106 return -EINVAL;
10107 }
10108
10109 /* verify last var in the datasec is an array */
10110 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10111 var_type = btf_type_by_id(btf, var->type);
10112 array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
10113 if (!btf_is_array(array_type)) {
10114 pr_warn("map '%s': cannot be resized, last var must be an array\n",
10115 bpf_map__name(map));
10116 return -EINVAL;
10117 }
10118
10119 /* verify request size aligns with array */
10120 array = btf_array(array_type);
10121 element_sz = btf__resolve_size(btf, array->type);
10122 if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
10123 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
10124 bpf_map__name(map), element_sz, size);
10125 return -EINVAL;
10126 }
10127
10128 /* create a new array based on the existing array, but with new length */
10129 nr_elements = (size - var->offset) / element_sz;
10130 new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
10131 if (new_array_id < 0)
10132 return new_array_id;
10133
10134 /* adding a new btf type invalidates existing pointers to btf objects,
10135 * so refresh pointers before proceeding
10136 */
10137 datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
10138 var = &btf_var_secinfos(datasec_type)[vlen - 1];
10139 var_type = btf_type_by_id(btf, var->type);
10140
10141 /* finally update btf info */
10142 datasec_type->size = size;
10143 var->size = size - var->offset;
10144 var_type->type = new_array_id;
10145
10146 return 0;
10147 }
10148
bpf_map__set_value_size(struct bpf_map * map,__u32 size)10149 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
10150 {
10151 if (map->obj->loaded || map->reused)
10152 return libbpf_err(-EBUSY);
10153
10154 if (map->mmaped) {
10155 size_t mmap_old_sz, mmap_new_sz;
10156 int err;
10157
10158 if (map->def.type != BPF_MAP_TYPE_ARRAY)
10159 return -EOPNOTSUPP;
10160
10161 mmap_old_sz = bpf_map_mmap_sz(map);
10162 mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
10163 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
10164 if (err) {
10165 pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
10166 bpf_map__name(map), err);
10167 return err;
10168 }
10169 err = map_btf_datasec_resize(map, size);
10170 if (err && err != -ENOENT) {
10171 pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
10172 bpf_map__name(map), err);
10173 map->btf_value_type_id = 0;
10174 map->btf_key_type_id = 0;
10175 }
10176 }
10177
10178 map->def.value_size = size;
10179 return 0;
10180 }
10181
bpf_map__btf_key_type_id(const struct bpf_map * map)10182 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
10183 {
10184 return map ? map->btf_key_type_id : 0;
10185 }
10186
bpf_map__btf_value_type_id(const struct bpf_map * map)10187 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
10188 {
10189 return map ? map->btf_value_type_id : 0;
10190 }
10191
bpf_map__set_initial_value(struct bpf_map * map,const void * data,size_t size)10192 int bpf_map__set_initial_value(struct bpf_map *map,
10193 const void *data, size_t size)
10194 {
10195 size_t actual_sz;
10196
10197 if (map->obj->loaded || map->reused)
10198 return libbpf_err(-EBUSY);
10199
10200 if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
10201 return libbpf_err(-EINVAL);
10202
10203 if (map->def.type == BPF_MAP_TYPE_ARENA)
10204 actual_sz = map->obj->arena_data_sz;
10205 else
10206 actual_sz = map->def.value_size;
10207 if (size != actual_sz)
10208 return libbpf_err(-EINVAL);
10209
10210 memcpy(map->mmaped, data, size);
10211 return 0;
10212 }
10213
bpf_map__initial_value(const struct bpf_map * map,size_t * psize)10214 void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
10215 {
10216 if (bpf_map__is_struct_ops(map)) {
10217 if (psize)
10218 *psize = map->def.value_size;
10219 return map->st_ops->data;
10220 }
10221
10222 if (!map->mmaped)
10223 return NULL;
10224
10225 if (map->def.type == BPF_MAP_TYPE_ARENA)
10226 *psize = map->obj->arena_data_sz;
10227 else
10228 *psize = map->def.value_size;
10229
10230 return map->mmaped;
10231 }
10232
bpf_map__is_internal(const struct bpf_map * map)10233 bool bpf_map__is_internal(const struct bpf_map *map)
10234 {
10235 return map->libbpf_type != LIBBPF_MAP_UNSPEC;
10236 }
10237
bpf_map__ifindex(const struct bpf_map * map)10238 __u32 bpf_map__ifindex(const struct bpf_map *map)
10239 {
10240 return map->map_ifindex;
10241 }
10242
bpf_map__set_ifindex(struct bpf_map * map,__u32 ifindex)10243 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
10244 {
10245 if (map_is_created(map))
10246 return libbpf_err(-EBUSY);
10247 map->map_ifindex = ifindex;
10248 return 0;
10249 }
10250
bpf_map__set_inner_map_fd(struct bpf_map * map,int fd)10251 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
10252 {
10253 if (!bpf_map_type__is_map_in_map(map->def.type)) {
10254 pr_warn("error: unsupported map type\n");
10255 return libbpf_err(-EINVAL);
10256 }
10257 if (map->inner_map_fd != -1) {
10258 pr_warn("error: inner_map_fd already specified\n");
10259 return libbpf_err(-EINVAL);
10260 }
10261 if (map->inner_map) {
10262 bpf_map__destroy(map->inner_map);
10263 zfree(&map->inner_map);
10264 }
10265 map->inner_map_fd = fd;
10266 return 0;
10267 }
10268
10269 static struct bpf_map *
__bpf_map__iter(const struct bpf_map * m,const struct bpf_object * obj,int i)10270 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
10271 {
10272 ssize_t idx;
10273 struct bpf_map *s, *e;
10274
10275 if (!obj || !obj->maps)
10276 return errno = EINVAL, NULL;
10277
10278 s = obj->maps;
10279 e = obj->maps + obj->nr_maps;
10280
10281 if ((m < s) || (m >= e)) {
10282 pr_warn("error in %s: map handler doesn't belong to object\n",
10283 __func__);
10284 return errno = EINVAL, NULL;
10285 }
10286
10287 idx = (m - obj->maps) + i;
10288 if (idx >= obj->nr_maps || idx < 0)
10289 return NULL;
10290 return &obj->maps[idx];
10291 }
10292
10293 struct bpf_map *
bpf_object__next_map(const struct bpf_object * obj,const struct bpf_map * prev)10294 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
10295 {
10296 if (prev == NULL)
10297 return obj->maps;
10298
10299 return __bpf_map__iter(prev, obj, 1);
10300 }
10301
10302 struct bpf_map *
bpf_object__prev_map(const struct bpf_object * obj,const struct bpf_map * next)10303 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
10304 {
10305 if (next == NULL) {
10306 if (!obj->nr_maps)
10307 return NULL;
10308 return obj->maps + obj->nr_maps - 1;
10309 }
10310
10311 return __bpf_map__iter(next, obj, -1);
10312 }
10313
10314 struct bpf_map *
bpf_object__find_map_by_name(const struct bpf_object * obj,const char * name)10315 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
10316 {
10317 struct bpf_map *pos;
10318
10319 bpf_object__for_each_map(pos, obj) {
10320 /* if it's a special internal map name (which always starts
10321 * with dot) then check if that special name matches the
10322 * real map name (ELF section name)
10323 */
10324 if (name[0] == '.') {
10325 if (pos->real_name && strcmp(pos->real_name, name) == 0)
10326 return pos;
10327 continue;
10328 }
10329 /* otherwise map name has to be an exact match */
10330 if (map_uses_real_name(pos)) {
10331 if (strcmp(pos->real_name, name) == 0)
10332 return pos;
10333 continue;
10334 }
10335 if (strcmp(pos->name, name) == 0)
10336 return pos;
10337 }
10338 return errno = ENOENT, NULL;
10339 }
10340
10341 int
bpf_object__find_map_fd_by_name(const struct bpf_object * obj,const char * name)10342 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
10343 {
10344 return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
10345 }
10346
validate_map_op(const struct bpf_map * map,size_t key_sz,size_t value_sz,bool check_value_sz)10347 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
10348 size_t value_sz, bool check_value_sz)
10349 {
10350 if (!map_is_created(map)) /* map is not yet created */
10351 return -ENOENT;
10352
10353 if (map->def.key_size != key_sz) {
10354 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
10355 map->name, key_sz, map->def.key_size);
10356 return -EINVAL;
10357 }
10358
10359 if (map->fd < 0) {
10360 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
10361 return -EINVAL;
10362 }
10363
10364 if (!check_value_sz)
10365 return 0;
10366
10367 switch (map->def.type) {
10368 case BPF_MAP_TYPE_PERCPU_ARRAY:
10369 case BPF_MAP_TYPE_PERCPU_HASH:
10370 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10371 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10372 int num_cpu = libbpf_num_possible_cpus();
10373 size_t elem_sz = roundup(map->def.value_size, 8);
10374
10375 if (value_sz != num_cpu * elem_sz) {
10376 pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10377 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10378 return -EINVAL;
10379 }
10380 break;
10381 }
10382 default:
10383 if (map->def.value_size != value_sz) {
10384 pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10385 map->name, value_sz, map->def.value_size);
10386 return -EINVAL;
10387 }
10388 break;
10389 }
10390 return 0;
10391 }
10392
bpf_map__lookup_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10393 int bpf_map__lookup_elem(const struct bpf_map *map,
10394 const void *key, size_t key_sz,
10395 void *value, size_t value_sz, __u64 flags)
10396 {
10397 int err;
10398
10399 err = validate_map_op(map, key_sz, value_sz, true);
10400 if (err)
10401 return libbpf_err(err);
10402
10403 return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10404 }
10405
bpf_map__update_elem(const struct bpf_map * map,const void * key,size_t key_sz,const void * value,size_t value_sz,__u64 flags)10406 int bpf_map__update_elem(const struct bpf_map *map,
10407 const void *key, size_t key_sz,
10408 const void *value, size_t value_sz, __u64 flags)
10409 {
10410 int err;
10411
10412 err = validate_map_op(map, key_sz, value_sz, true);
10413 if (err)
10414 return libbpf_err(err);
10415
10416 return bpf_map_update_elem(map->fd, key, value, flags);
10417 }
10418
bpf_map__delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,__u64 flags)10419 int bpf_map__delete_elem(const struct bpf_map *map,
10420 const void *key, size_t key_sz, __u64 flags)
10421 {
10422 int err;
10423
10424 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10425 if (err)
10426 return libbpf_err(err);
10427
10428 return bpf_map_delete_elem_flags(map->fd, key, flags);
10429 }
10430
bpf_map__lookup_and_delete_elem(const struct bpf_map * map,const void * key,size_t key_sz,void * value,size_t value_sz,__u64 flags)10431 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10432 const void *key, size_t key_sz,
10433 void *value, size_t value_sz, __u64 flags)
10434 {
10435 int err;
10436
10437 err = validate_map_op(map, key_sz, value_sz, true);
10438 if (err)
10439 return libbpf_err(err);
10440
10441 return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10442 }
10443
bpf_map__get_next_key(const struct bpf_map * map,const void * cur_key,void * next_key,size_t key_sz)10444 int bpf_map__get_next_key(const struct bpf_map *map,
10445 const void *cur_key, void *next_key, size_t key_sz)
10446 {
10447 int err;
10448
10449 err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10450 if (err)
10451 return libbpf_err(err);
10452
10453 return bpf_map_get_next_key(map->fd, cur_key, next_key);
10454 }
10455
libbpf_get_error(const void * ptr)10456 long libbpf_get_error(const void *ptr)
10457 {
10458 if (!IS_ERR_OR_NULL(ptr))
10459 return 0;
10460
10461 if (IS_ERR(ptr))
10462 errno = -PTR_ERR(ptr);
10463
10464 /* If ptr == NULL, then errno should be already set by the failing
10465 * API, because libbpf never returns NULL on success and it now always
10466 * sets errno on error. So no extra errno handling for ptr == NULL
10467 * case.
10468 */
10469 return -errno;
10470 }
10471
10472 /* Replace link's underlying BPF program with the new one */
bpf_link__update_program(struct bpf_link * link,struct bpf_program * prog)10473 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10474 {
10475 int ret;
10476 int prog_fd = bpf_program__fd(prog);
10477
10478 if (prog_fd < 0) {
10479 pr_warn("prog '%s': can't use BPF program without FD (was it loaded?)\n",
10480 prog->name);
10481 return libbpf_err(-EINVAL);
10482 }
10483
10484 ret = bpf_link_update(bpf_link__fd(link), prog_fd, NULL);
10485 return libbpf_err_errno(ret);
10486 }
10487
10488 /* Release "ownership" of underlying BPF resource (typically, BPF program
10489 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10490 * link, when destructed through bpf_link__destroy() call won't attempt to
10491 * detach/unregisted that BPF resource. This is useful in situations where,
10492 * say, attached BPF program has to outlive userspace program that attached it
10493 * in the system. Depending on type of BPF program, though, there might be
10494 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10495 * exit of userspace program doesn't trigger automatic detachment and clean up
10496 * inside the kernel.
10497 */
bpf_link__disconnect(struct bpf_link * link)10498 void bpf_link__disconnect(struct bpf_link *link)
10499 {
10500 link->disconnected = true;
10501 }
10502
bpf_link__destroy(struct bpf_link * link)10503 int bpf_link__destroy(struct bpf_link *link)
10504 {
10505 int err = 0;
10506
10507 if (IS_ERR_OR_NULL(link))
10508 return 0;
10509
10510 if (!link->disconnected && link->detach)
10511 err = link->detach(link);
10512 if (link->pin_path)
10513 free(link->pin_path);
10514 if (link->dealloc)
10515 link->dealloc(link);
10516 else
10517 free(link);
10518
10519 return libbpf_err(err);
10520 }
10521
bpf_link__fd(const struct bpf_link * link)10522 int bpf_link__fd(const struct bpf_link *link)
10523 {
10524 return link->fd;
10525 }
10526
bpf_link__pin_path(const struct bpf_link * link)10527 const char *bpf_link__pin_path(const struct bpf_link *link)
10528 {
10529 return link->pin_path;
10530 }
10531
bpf_link__detach_fd(struct bpf_link * link)10532 static int bpf_link__detach_fd(struct bpf_link *link)
10533 {
10534 return libbpf_err_errno(close(link->fd));
10535 }
10536
bpf_link__open(const char * path)10537 struct bpf_link *bpf_link__open(const char *path)
10538 {
10539 struct bpf_link *link;
10540 int fd;
10541
10542 fd = bpf_obj_get(path);
10543 if (fd < 0) {
10544 fd = -errno;
10545 pr_warn("failed to open link at %s: %d\n", path, fd);
10546 return libbpf_err_ptr(fd);
10547 }
10548
10549 link = calloc(1, sizeof(*link));
10550 if (!link) {
10551 close(fd);
10552 return libbpf_err_ptr(-ENOMEM);
10553 }
10554 link->detach = &bpf_link__detach_fd;
10555 link->fd = fd;
10556
10557 link->pin_path = strdup(path);
10558 if (!link->pin_path) {
10559 bpf_link__destroy(link);
10560 return libbpf_err_ptr(-ENOMEM);
10561 }
10562
10563 return link;
10564 }
10565
bpf_link__detach(struct bpf_link * link)10566 int bpf_link__detach(struct bpf_link *link)
10567 {
10568 return bpf_link_detach(link->fd) ? -errno : 0;
10569 }
10570
bpf_link__pin(struct bpf_link * link,const char * path)10571 int bpf_link__pin(struct bpf_link *link, const char *path)
10572 {
10573 int err;
10574
10575 if (link->pin_path)
10576 return libbpf_err(-EBUSY);
10577 err = make_parent_dir(path);
10578 if (err)
10579 return libbpf_err(err);
10580 err = check_path(path);
10581 if (err)
10582 return libbpf_err(err);
10583
10584 link->pin_path = strdup(path);
10585 if (!link->pin_path)
10586 return libbpf_err(-ENOMEM);
10587
10588 if (bpf_obj_pin(link->fd, link->pin_path)) {
10589 err = -errno;
10590 zfree(&link->pin_path);
10591 return libbpf_err(err);
10592 }
10593
10594 pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10595 return 0;
10596 }
10597
bpf_link__unpin(struct bpf_link * link)10598 int bpf_link__unpin(struct bpf_link *link)
10599 {
10600 int err;
10601
10602 if (!link->pin_path)
10603 return libbpf_err(-EINVAL);
10604
10605 err = unlink(link->pin_path);
10606 if (err != 0)
10607 return -errno;
10608
10609 pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10610 zfree(&link->pin_path);
10611 return 0;
10612 }
10613
10614 struct bpf_link_perf {
10615 struct bpf_link link;
10616 int perf_event_fd;
10617 /* legacy kprobe support: keep track of probe identifier and type */
10618 char *legacy_probe_name;
10619 bool legacy_is_kprobe;
10620 bool legacy_is_retprobe;
10621 };
10622
10623 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10624 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10625
bpf_link_perf_detach(struct bpf_link * link)10626 static int bpf_link_perf_detach(struct bpf_link *link)
10627 {
10628 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10629 int err = 0;
10630
10631 if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10632 err = -errno;
10633
10634 if (perf_link->perf_event_fd != link->fd)
10635 close(perf_link->perf_event_fd);
10636 close(link->fd);
10637
10638 /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10639 if (perf_link->legacy_probe_name) {
10640 if (perf_link->legacy_is_kprobe) {
10641 err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10642 perf_link->legacy_is_retprobe);
10643 } else {
10644 err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10645 perf_link->legacy_is_retprobe);
10646 }
10647 }
10648
10649 return err;
10650 }
10651
bpf_link_perf_dealloc(struct bpf_link * link)10652 static void bpf_link_perf_dealloc(struct bpf_link *link)
10653 {
10654 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10655
10656 free(perf_link->legacy_probe_name);
10657 free(perf_link);
10658 }
10659
bpf_program__attach_perf_event_opts(const struct bpf_program * prog,int pfd,const struct bpf_perf_event_opts * opts)10660 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10661 const struct bpf_perf_event_opts *opts)
10662 {
10663 char errmsg[STRERR_BUFSIZE];
10664 struct bpf_link_perf *link;
10665 int prog_fd, link_fd = -1, err;
10666 bool force_ioctl_attach;
10667
10668 if (!OPTS_VALID(opts, bpf_perf_event_opts))
10669 return libbpf_err_ptr(-EINVAL);
10670
10671 if (pfd < 0) {
10672 pr_warn("prog '%s': invalid perf event FD %d\n",
10673 prog->name, pfd);
10674 return libbpf_err_ptr(-EINVAL);
10675 }
10676 prog_fd = bpf_program__fd(prog);
10677 if (prog_fd < 0) {
10678 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
10679 prog->name);
10680 return libbpf_err_ptr(-EINVAL);
10681 }
10682
10683 link = calloc(1, sizeof(*link));
10684 if (!link)
10685 return libbpf_err_ptr(-ENOMEM);
10686 link->link.detach = &bpf_link_perf_detach;
10687 link->link.dealloc = &bpf_link_perf_dealloc;
10688 link->perf_event_fd = pfd;
10689
10690 force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10691 if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10692 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10693 .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10694
10695 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10696 if (link_fd < 0) {
10697 err = -errno;
10698 pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10699 prog->name, pfd,
10700 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10701 goto err_out;
10702 }
10703 link->link.fd = link_fd;
10704 } else {
10705 if (OPTS_GET(opts, bpf_cookie, 0)) {
10706 pr_warn("prog '%s': user context value is not supported\n", prog->name);
10707 err = -EOPNOTSUPP;
10708 goto err_out;
10709 }
10710
10711 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10712 err = -errno;
10713 pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10714 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10715 if (err == -EPROTO)
10716 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10717 prog->name, pfd);
10718 goto err_out;
10719 }
10720 link->link.fd = pfd;
10721 }
10722 if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10723 err = -errno;
10724 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10725 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10726 goto err_out;
10727 }
10728
10729 return &link->link;
10730 err_out:
10731 if (link_fd >= 0)
10732 close(link_fd);
10733 free(link);
10734 return libbpf_err_ptr(err);
10735 }
10736
bpf_program__attach_perf_event(const struct bpf_program * prog,int pfd)10737 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10738 {
10739 return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10740 }
10741
10742 /*
10743 * this function is expected to parse integer in the range of [0, 2^31-1] from
10744 * given file using scanf format string fmt. If actual parsed value is
10745 * negative, the result might be indistinguishable from error
10746 */
parse_uint_from_file(const char * file,const char * fmt)10747 static int parse_uint_from_file(const char *file, const char *fmt)
10748 {
10749 char buf[STRERR_BUFSIZE];
10750 int err, ret;
10751 FILE *f;
10752
10753 f = fopen(file, "re");
10754 if (!f) {
10755 err = -errno;
10756 pr_debug("failed to open '%s': %s\n", file,
10757 libbpf_strerror_r(err, buf, sizeof(buf)));
10758 return err;
10759 }
10760 err = fscanf(f, fmt, &ret);
10761 if (err != 1) {
10762 err = err == EOF ? -EIO : -errno;
10763 pr_debug("failed to parse '%s': %s\n", file,
10764 libbpf_strerror_r(err, buf, sizeof(buf)));
10765 fclose(f);
10766 return err;
10767 }
10768 fclose(f);
10769 return ret;
10770 }
10771
determine_kprobe_perf_type(void)10772 static int determine_kprobe_perf_type(void)
10773 {
10774 const char *file = "/sys/bus/event_source/devices/kprobe/type";
10775
10776 return parse_uint_from_file(file, "%d\n");
10777 }
10778
determine_uprobe_perf_type(void)10779 static int determine_uprobe_perf_type(void)
10780 {
10781 const char *file = "/sys/bus/event_source/devices/uprobe/type";
10782
10783 return parse_uint_from_file(file, "%d\n");
10784 }
10785
determine_kprobe_retprobe_bit(void)10786 static int determine_kprobe_retprobe_bit(void)
10787 {
10788 const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10789
10790 return parse_uint_from_file(file, "config:%d\n");
10791 }
10792
determine_uprobe_retprobe_bit(void)10793 static int determine_uprobe_retprobe_bit(void)
10794 {
10795 const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10796
10797 return parse_uint_from_file(file, "config:%d\n");
10798 }
10799
10800 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10801 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10802
perf_event_open_probe(bool uprobe,bool retprobe,const char * name,uint64_t offset,int pid,size_t ref_ctr_off)10803 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10804 uint64_t offset, int pid, size_t ref_ctr_off)
10805 {
10806 const size_t attr_sz = sizeof(struct perf_event_attr);
10807 struct perf_event_attr attr;
10808 char errmsg[STRERR_BUFSIZE];
10809 int type, pfd;
10810
10811 if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10812 return -EINVAL;
10813
10814 memset(&attr, 0, attr_sz);
10815
10816 type = uprobe ? determine_uprobe_perf_type()
10817 : determine_kprobe_perf_type();
10818 if (type < 0) {
10819 pr_warn("failed to determine %s perf type: %s\n",
10820 uprobe ? "uprobe" : "kprobe",
10821 libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10822 return type;
10823 }
10824 if (retprobe) {
10825 int bit = uprobe ? determine_uprobe_retprobe_bit()
10826 : determine_kprobe_retprobe_bit();
10827
10828 if (bit < 0) {
10829 pr_warn("failed to determine %s retprobe bit: %s\n",
10830 uprobe ? "uprobe" : "kprobe",
10831 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10832 return bit;
10833 }
10834 attr.config |= 1 << bit;
10835 }
10836 attr.size = attr_sz;
10837 attr.type = type;
10838 attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10839 attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10840 attr.config2 = offset; /* kprobe_addr or probe_offset */
10841
10842 /* pid filter is meaningful only for uprobes */
10843 pfd = syscall(__NR_perf_event_open, &attr,
10844 pid < 0 ? -1 : pid /* pid */,
10845 pid == -1 ? 0 : -1 /* cpu */,
10846 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10847 return pfd >= 0 ? pfd : -errno;
10848 }
10849
append_to_file(const char * file,const char * fmt,...)10850 static int append_to_file(const char *file, const char *fmt, ...)
10851 {
10852 int fd, n, err = 0;
10853 va_list ap;
10854 char buf[1024];
10855
10856 va_start(ap, fmt);
10857 n = vsnprintf(buf, sizeof(buf), fmt, ap);
10858 va_end(ap);
10859
10860 if (n < 0 || n >= sizeof(buf))
10861 return -EINVAL;
10862
10863 fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10864 if (fd < 0)
10865 return -errno;
10866
10867 if (write(fd, buf, n) < 0)
10868 err = -errno;
10869
10870 close(fd);
10871 return err;
10872 }
10873
10874 #define DEBUGFS "/sys/kernel/debug/tracing"
10875 #define TRACEFS "/sys/kernel/tracing"
10876
use_debugfs(void)10877 static bool use_debugfs(void)
10878 {
10879 static int has_debugfs = -1;
10880
10881 if (has_debugfs < 0)
10882 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10883
10884 return has_debugfs == 1;
10885 }
10886
tracefs_path(void)10887 static const char *tracefs_path(void)
10888 {
10889 return use_debugfs() ? DEBUGFS : TRACEFS;
10890 }
10891
tracefs_kprobe_events(void)10892 static const char *tracefs_kprobe_events(void)
10893 {
10894 return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10895 }
10896
tracefs_uprobe_events(void)10897 static const char *tracefs_uprobe_events(void)
10898 {
10899 return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10900 }
10901
tracefs_available_filter_functions(void)10902 static const char *tracefs_available_filter_functions(void)
10903 {
10904 return use_debugfs() ? DEBUGFS"/available_filter_functions"
10905 : TRACEFS"/available_filter_functions";
10906 }
10907
tracefs_available_filter_functions_addrs(void)10908 static const char *tracefs_available_filter_functions_addrs(void)
10909 {
10910 return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
10911 : TRACEFS"/available_filter_functions_addrs";
10912 }
10913
gen_kprobe_legacy_event_name(char * buf,size_t buf_sz,const char * kfunc_name,size_t offset)10914 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10915 const char *kfunc_name, size_t offset)
10916 {
10917 static int index = 0;
10918 int i;
10919
10920 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10921 __sync_fetch_and_add(&index, 1));
10922
10923 /* sanitize binary_path in the probe name */
10924 for (i = 0; buf[i]; i++) {
10925 if (!isalnum(buf[i]))
10926 buf[i] = '_';
10927 }
10928 }
10929
add_kprobe_event_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset)10930 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10931 const char *kfunc_name, size_t offset)
10932 {
10933 return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10934 retprobe ? 'r' : 'p',
10935 retprobe ? "kretprobes" : "kprobes",
10936 probe_name, kfunc_name, offset);
10937 }
10938
remove_kprobe_event_legacy(const char * probe_name,bool retprobe)10939 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10940 {
10941 return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10942 retprobe ? "kretprobes" : "kprobes", probe_name);
10943 }
10944
determine_kprobe_perf_type_legacy(const char * probe_name,bool retprobe)10945 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10946 {
10947 char file[256];
10948
10949 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10950 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10951
10952 return parse_uint_from_file(file, "%d\n");
10953 }
10954
perf_event_kprobe_open_legacy(const char * probe_name,bool retprobe,const char * kfunc_name,size_t offset,int pid)10955 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10956 const char *kfunc_name, size_t offset, int pid)
10957 {
10958 const size_t attr_sz = sizeof(struct perf_event_attr);
10959 struct perf_event_attr attr;
10960 char errmsg[STRERR_BUFSIZE];
10961 int type, pfd, err;
10962
10963 err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10964 if (err < 0) {
10965 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10966 kfunc_name, offset,
10967 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10968 return err;
10969 }
10970 type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10971 if (type < 0) {
10972 err = type;
10973 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10974 kfunc_name, offset,
10975 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10976 goto err_clean_legacy;
10977 }
10978
10979 memset(&attr, 0, attr_sz);
10980 attr.size = attr_sz;
10981 attr.config = type;
10982 attr.type = PERF_TYPE_TRACEPOINT;
10983
10984 pfd = syscall(__NR_perf_event_open, &attr,
10985 pid < 0 ? -1 : pid, /* pid */
10986 pid == -1 ? 0 : -1, /* cpu */
10987 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10988 if (pfd < 0) {
10989 err = -errno;
10990 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10991 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10992 goto err_clean_legacy;
10993 }
10994 return pfd;
10995
10996 err_clean_legacy:
10997 /* Clear the newly added legacy kprobe_event */
10998 remove_kprobe_event_legacy(probe_name, retprobe);
10999 return err;
11000 }
11001
arch_specific_syscall_pfx(void)11002 static const char *arch_specific_syscall_pfx(void)
11003 {
11004 #if defined(__x86_64__)
11005 return "x64";
11006 #elif defined(__i386__)
11007 return "ia32";
11008 #elif defined(__s390x__)
11009 return "s390x";
11010 #elif defined(__s390__)
11011 return "s390";
11012 #elif defined(__arm__)
11013 return "arm";
11014 #elif defined(__aarch64__)
11015 return "arm64";
11016 #elif defined(__mips__)
11017 return "mips";
11018 #elif defined(__riscv)
11019 return "riscv";
11020 #elif defined(__powerpc__)
11021 return "powerpc";
11022 #elif defined(__powerpc64__)
11023 return "powerpc64";
11024 #else
11025 return NULL;
11026 #endif
11027 }
11028
probe_kern_syscall_wrapper(int token_fd)11029 int probe_kern_syscall_wrapper(int token_fd)
11030 {
11031 char syscall_name[64];
11032 const char *ksys_pfx;
11033
11034 ksys_pfx = arch_specific_syscall_pfx();
11035 if (!ksys_pfx)
11036 return 0;
11037
11038 snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
11039
11040 if (determine_kprobe_perf_type() >= 0) {
11041 int pfd;
11042
11043 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
11044 if (pfd >= 0)
11045 close(pfd);
11046
11047 return pfd >= 0 ? 1 : 0;
11048 } else { /* legacy mode */
11049 char probe_name[128];
11050
11051 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
11052 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
11053 return 0;
11054
11055 (void)remove_kprobe_event_legacy(probe_name, false);
11056 return 1;
11057 }
11058 }
11059
11060 struct bpf_link *
bpf_program__attach_kprobe_opts(const struct bpf_program * prog,const char * func_name,const struct bpf_kprobe_opts * opts)11061 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
11062 const char *func_name,
11063 const struct bpf_kprobe_opts *opts)
11064 {
11065 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11066 enum probe_attach_mode attach_mode;
11067 char errmsg[STRERR_BUFSIZE];
11068 char *legacy_probe = NULL;
11069 struct bpf_link *link;
11070 size_t offset;
11071 bool retprobe, legacy;
11072 int pfd, err;
11073
11074 if (!OPTS_VALID(opts, bpf_kprobe_opts))
11075 return libbpf_err_ptr(-EINVAL);
11076
11077 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11078 retprobe = OPTS_GET(opts, retprobe, false);
11079 offset = OPTS_GET(opts, offset, 0);
11080 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11081
11082 legacy = determine_kprobe_perf_type() < 0;
11083 switch (attach_mode) {
11084 case PROBE_ATTACH_MODE_LEGACY:
11085 legacy = true;
11086 pe_opts.force_ioctl_attach = true;
11087 break;
11088 case PROBE_ATTACH_MODE_PERF:
11089 if (legacy)
11090 return libbpf_err_ptr(-ENOTSUP);
11091 pe_opts.force_ioctl_attach = true;
11092 break;
11093 case PROBE_ATTACH_MODE_LINK:
11094 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11095 return libbpf_err_ptr(-ENOTSUP);
11096 break;
11097 case PROBE_ATTACH_MODE_DEFAULT:
11098 break;
11099 default:
11100 return libbpf_err_ptr(-EINVAL);
11101 }
11102
11103 if (!legacy) {
11104 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
11105 func_name, offset,
11106 -1 /* pid */, 0 /* ref_ctr_off */);
11107 } else {
11108 char probe_name[256];
11109
11110 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
11111 func_name, offset);
11112
11113 legacy_probe = strdup(probe_name);
11114 if (!legacy_probe)
11115 return libbpf_err_ptr(-ENOMEM);
11116
11117 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
11118 offset, -1 /* pid */);
11119 }
11120 if (pfd < 0) {
11121 err = -errno;
11122 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
11123 prog->name, retprobe ? "kretprobe" : "kprobe",
11124 func_name, offset,
11125 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11126 goto err_out;
11127 }
11128 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11129 err = libbpf_get_error(link);
11130 if (err) {
11131 close(pfd);
11132 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
11133 prog->name, retprobe ? "kretprobe" : "kprobe",
11134 func_name, offset,
11135 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11136 goto err_clean_legacy;
11137 }
11138 if (legacy) {
11139 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11140
11141 perf_link->legacy_probe_name = legacy_probe;
11142 perf_link->legacy_is_kprobe = true;
11143 perf_link->legacy_is_retprobe = retprobe;
11144 }
11145
11146 return link;
11147
11148 err_clean_legacy:
11149 if (legacy)
11150 remove_kprobe_event_legacy(legacy_probe, retprobe);
11151 err_out:
11152 free(legacy_probe);
11153 return libbpf_err_ptr(err);
11154 }
11155
bpf_program__attach_kprobe(const struct bpf_program * prog,bool retprobe,const char * func_name)11156 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
11157 bool retprobe,
11158 const char *func_name)
11159 {
11160 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
11161 .retprobe = retprobe,
11162 );
11163
11164 return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
11165 }
11166
bpf_program__attach_ksyscall(const struct bpf_program * prog,const char * syscall_name,const struct bpf_ksyscall_opts * opts)11167 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
11168 const char *syscall_name,
11169 const struct bpf_ksyscall_opts *opts)
11170 {
11171 LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
11172 char func_name[128];
11173
11174 if (!OPTS_VALID(opts, bpf_ksyscall_opts))
11175 return libbpf_err_ptr(-EINVAL);
11176
11177 if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
11178 /* arch_specific_syscall_pfx() should never return NULL here
11179 * because it is guarded by kernel_supports(). However, since
11180 * compiler does not know that we have an explicit conditional
11181 * as well.
11182 */
11183 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
11184 arch_specific_syscall_pfx() ? : "", syscall_name);
11185 } else {
11186 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
11187 }
11188
11189 kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
11190 kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11191
11192 return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
11193 }
11194
11195 /* Adapted from perf/util/string.c */
glob_match(const char * str,const char * pat)11196 bool glob_match(const char *str, const char *pat)
11197 {
11198 while (*str && *pat && *pat != '*') {
11199 if (*pat == '?') { /* Matches any single character */
11200 str++;
11201 pat++;
11202 continue;
11203 }
11204 if (*str != *pat)
11205 return false;
11206 str++;
11207 pat++;
11208 }
11209 /* Check wild card */
11210 if (*pat == '*') {
11211 while (*pat == '*')
11212 pat++;
11213 if (!*pat) /* Tail wild card matches all */
11214 return true;
11215 while (*str)
11216 if (glob_match(str++, pat))
11217 return true;
11218 }
11219 return !*str && !*pat;
11220 }
11221
11222 struct kprobe_multi_resolve {
11223 const char *pattern;
11224 unsigned long *addrs;
11225 size_t cap;
11226 size_t cnt;
11227 };
11228
11229 struct avail_kallsyms_data {
11230 char **syms;
11231 size_t cnt;
11232 struct kprobe_multi_resolve *res;
11233 };
11234
avail_func_cmp(const void * a,const void * b)11235 static int avail_func_cmp(const void *a, const void *b)
11236 {
11237 return strcmp(*(const char **)a, *(const char **)b);
11238 }
11239
avail_kallsyms_cb(unsigned long long sym_addr,char sym_type,const char * sym_name,void * ctx)11240 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
11241 const char *sym_name, void *ctx)
11242 {
11243 struct avail_kallsyms_data *data = ctx;
11244 struct kprobe_multi_resolve *res = data->res;
11245 int err;
11246
11247 if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
11248 return 0;
11249
11250 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
11251 if (err)
11252 return err;
11253
11254 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11255 return 0;
11256 }
11257
libbpf_available_kallsyms_parse(struct kprobe_multi_resolve * res)11258 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
11259 {
11260 const char *available_functions_file = tracefs_available_filter_functions();
11261 struct avail_kallsyms_data data;
11262 char sym_name[500];
11263 FILE *f;
11264 int err = 0, ret, i;
11265 char **syms = NULL;
11266 size_t cap = 0, cnt = 0;
11267
11268 f = fopen(available_functions_file, "re");
11269 if (!f) {
11270 err = -errno;
11271 pr_warn("failed to open %s: %d\n", available_functions_file, err);
11272 return err;
11273 }
11274
11275 while (true) {
11276 char *name;
11277
11278 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
11279 if (ret == EOF && feof(f))
11280 break;
11281
11282 if (ret != 1) {
11283 pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
11284 err = -EINVAL;
11285 goto cleanup;
11286 }
11287
11288 if (!glob_match(sym_name, res->pattern))
11289 continue;
11290
11291 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
11292 if (err)
11293 goto cleanup;
11294
11295 name = strdup(sym_name);
11296 if (!name) {
11297 err = -errno;
11298 goto cleanup;
11299 }
11300
11301 syms[cnt++] = name;
11302 }
11303
11304 /* no entries found, bail out */
11305 if (cnt == 0) {
11306 err = -ENOENT;
11307 goto cleanup;
11308 }
11309
11310 /* sort available functions */
11311 qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
11312
11313 data.syms = syms;
11314 data.res = res;
11315 data.cnt = cnt;
11316 libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
11317
11318 if (res->cnt == 0)
11319 err = -ENOENT;
11320
11321 cleanup:
11322 for (i = 0; i < cnt; i++)
11323 free((char *)syms[i]);
11324 free(syms);
11325
11326 fclose(f);
11327 return err;
11328 }
11329
has_available_filter_functions_addrs(void)11330 static bool has_available_filter_functions_addrs(void)
11331 {
11332 return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
11333 }
11334
libbpf_available_kprobes_parse(struct kprobe_multi_resolve * res)11335 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
11336 {
11337 const char *available_path = tracefs_available_filter_functions_addrs();
11338 char sym_name[500];
11339 FILE *f;
11340 int ret, err = 0;
11341 unsigned long long sym_addr;
11342
11343 f = fopen(available_path, "re");
11344 if (!f) {
11345 err = -errno;
11346 pr_warn("failed to open %s: %d\n", available_path, err);
11347 return err;
11348 }
11349
11350 while (true) {
11351 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
11352 if (ret == EOF && feof(f))
11353 break;
11354
11355 if (ret != 2) {
11356 pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
11357 ret);
11358 err = -EINVAL;
11359 goto cleanup;
11360 }
11361
11362 if (!glob_match(sym_name, res->pattern))
11363 continue;
11364
11365 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
11366 sizeof(*res->addrs), res->cnt + 1);
11367 if (err)
11368 goto cleanup;
11369
11370 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11371 }
11372
11373 if (res->cnt == 0)
11374 err = -ENOENT;
11375
11376 cleanup:
11377 fclose(f);
11378 return err;
11379 }
11380
11381 struct bpf_link *
bpf_program__attach_kprobe_multi_opts(const struct bpf_program * prog,const char * pattern,const struct bpf_kprobe_multi_opts * opts)11382 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11383 const char *pattern,
11384 const struct bpf_kprobe_multi_opts *opts)
11385 {
11386 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11387 struct kprobe_multi_resolve res = {
11388 .pattern = pattern,
11389 };
11390 struct bpf_link *link = NULL;
11391 char errmsg[STRERR_BUFSIZE];
11392 const unsigned long *addrs;
11393 int err, link_fd, prog_fd;
11394 const __u64 *cookies;
11395 const char **syms;
11396 bool retprobe;
11397 size_t cnt;
11398
11399 if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11400 return libbpf_err_ptr(-EINVAL);
11401
11402 prog_fd = bpf_program__fd(prog);
11403 if (prog_fd < 0) {
11404 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11405 prog->name);
11406 return libbpf_err_ptr(-EINVAL);
11407 }
11408
11409 syms = OPTS_GET(opts, syms, false);
11410 addrs = OPTS_GET(opts, addrs, false);
11411 cnt = OPTS_GET(opts, cnt, false);
11412 cookies = OPTS_GET(opts, cookies, false);
11413
11414 if (!pattern && !addrs && !syms)
11415 return libbpf_err_ptr(-EINVAL);
11416 if (pattern && (addrs || syms || cookies || cnt))
11417 return libbpf_err_ptr(-EINVAL);
11418 if (!pattern && !cnt)
11419 return libbpf_err_ptr(-EINVAL);
11420 if (addrs && syms)
11421 return libbpf_err_ptr(-EINVAL);
11422
11423 if (pattern) {
11424 if (has_available_filter_functions_addrs())
11425 err = libbpf_available_kprobes_parse(&res);
11426 else
11427 err = libbpf_available_kallsyms_parse(&res);
11428 if (err)
11429 goto error;
11430 addrs = res.addrs;
11431 cnt = res.cnt;
11432 }
11433
11434 retprobe = OPTS_GET(opts, retprobe, false);
11435
11436 lopts.kprobe_multi.syms = syms;
11437 lopts.kprobe_multi.addrs = addrs;
11438 lopts.kprobe_multi.cookies = cookies;
11439 lopts.kprobe_multi.cnt = cnt;
11440 lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11441
11442 link = calloc(1, sizeof(*link));
11443 if (!link) {
11444 err = -ENOMEM;
11445 goto error;
11446 }
11447 link->detach = &bpf_link__detach_fd;
11448
11449 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
11450 if (link_fd < 0) {
11451 err = -errno;
11452 pr_warn("prog '%s': failed to attach: %s\n",
11453 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11454 goto error;
11455 }
11456 link->fd = link_fd;
11457 free(res.addrs);
11458 return link;
11459
11460 error:
11461 free(link);
11462 free(res.addrs);
11463 return libbpf_err_ptr(err);
11464 }
11465
attach_kprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11466 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11467 {
11468 DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11469 unsigned long offset = 0;
11470 const char *func_name;
11471 char *func;
11472 int n;
11473
11474 *link = NULL;
11475
11476 /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11477 if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11478 return 0;
11479
11480 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11481 if (opts.retprobe)
11482 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11483 else
11484 func_name = prog->sec_name + sizeof("kprobe/") - 1;
11485
11486 n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11487 if (n < 1) {
11488 pr_warn("kprobe name is invalid: %s\n", func_name);
11489 return -EINVAL;
11490 }
11491 if (opts.retprobe && offset != 0) {
11492 free(func);
11493 pr_warn("kretprobes do not support offset specification\n");
11494 return -EINVAL;
11495 }
11496
11497 opts.offset = offset;
11498 *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11499 free(func);
11500 return libbpf_get_error(*link);
11501 }
11502
attach_ksyscall(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11503 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11504 {
11505 LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11506 const char *syscall_name;
11507
11508 *link = NULL;
11509
11510 /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11511 if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11512 return 0;
11513
11514 opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11515 if (opts.retprobe)
11516 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11517 else
11518 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11519
11520 *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11521 return *link ? 0 : -errno;
11522 }
11523
attach_kprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11524 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11525 {
11526 LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11527 const char *spec;
11528 char *pattern;
11529 int n;
11530
11531 *link = NULL;
11532
11533 /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11534 if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11535 strcmp(prog->sec_name, "kretprobe.multi") == 0)
11536 return 0;
11537
11538 opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11539 if (opts.retprobe)
11540 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11541 else
11542 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11543
11544 n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11545 if (n < 1) {
11546 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
11547 return -EINVAL;
11548 }
11549
11550 *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11551 free(pattern);
11552 return libbpf_get_error(*link);
11553 }
11554
attach_uprobe_multi(const struct bpf_program * prog,long cookie,struct bpf_link ** link)11555 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11556 {
11557 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11558 LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11559 int n, ret = -EINVAL;
11560
11561 *link = NULL;
11562
11563 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11564 &probe_type, &binary_path, &func_name);
11565 switch (n) {
11566 case 1:
11567 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11568 ret = 0;
11569 break;
11570 case 3:
11571 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
11572 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11573 ret = libbpf_get_error(*link);
11574 break;
11575 default:
11576 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11577 prog->sec_name);
11578 break;
11579 }
11580 free(probe_type);
11581 free(binary_path);
11582 free(func_name);
11583 return ret;
11584 }
11585
gen_uprobe_legacy_event_name(char * buf,size_t buf_sz,const char * binary_path,uint64_t offset)11586 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
11587 const char *binary_path, uint64_t offset)
11588 {
11589 int i;
11590
11591 snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
11592
11593 /* sanitize binary_path in the probe name */
11594 for (i = 0; buf[i]; i++) {
11595 if (!isalnum(buf[i]))
11596 buf[i] = '_';
11597 }
11598 }
11599
add_uprobe_event_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset)11600 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11601 const char *binary_path, size_t offset)
11602 {
11603 return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11604 retprobe ? 'r' : 'p',
11605 retprobe ? "uretprobes" : "uprobes",
11606 probe_name, binary_path, offset);
11607 }
11608
remove_uprobe_event_legacy(const char * probe_name,bool retprobe)11609 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11610 {
11611 return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11612 retprobe ? "uretprobes" : "uprobes", probe_name);
11613 }
11614
determine_uprobe_perf_type_legacy(const char * probe_name,bool retprobe)11615 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11616 {
11617 char file[512];
11618
11619 snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11620 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11621
11622 return parse_uint_from_file(file, "%d\n");
11623 }
11624
perf_event_uprobe_open_legacy(const char * probe_name,bool retprobe,const char * binary_path,size_t offset,int pid)11625 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11626 const char *binary_path, size_t offset, int pid)
11627 {
11628 const size_t attr_sz = sizeof(struct perf_event_attr);
11629 struct perf_event_attr attr;
11630 int type, pfd, err;
11631
11632 err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11633 if (err < 0) {
11634 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11635 binary_path, (size_t)offset, err);
11636 return err;
11637 }
11638 type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11639 if (type < 0) {
11640 err = type;
11641 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11642 binary_path, offset, err);
11643 goto err_clean_legacy;
11644 }
11645
11646 memset(&attr, 0, attr_sz);
11647 attr.size = attr_sz;
11648 attr.config = type;
11649 attr.type = PERF_TYPE_TRACEPOINT;
11650
11651 pfd = syscall(__NR_perf_event_open, &attr,
11652 pid < 0 ? -1 : pid, /* pid */
11653 pid == -1 ? 0 : -1, /* cpu */
11654 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11655 if (pfd < 0) {
11656 err = -errno;
11657 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11658 goto err_clean_legacy;
11659 }
11660 return pfd;
11661
11662 err_clean_legacy:
11663 /* Clear the newly added legacy uprobe_event */
11664 remove_uprobe_event_legacy(probe_name, retprobe);
11665 return err;
11666 }
11667
11668 /* Find offset of function name in archive specified by path. Currently
11669 * supported are .zip files that do not compress their contents, as used on
11670 * Android in the form of APKs, for example. "file_name" is the name of the ELF
11671 * file inside the archive. "func_name" matches symbol name or name@@LIB for
11672 * library functions.
11673 *
11674 * An overview of the APK format specifically provided here:
11675 * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11676 */
elf_find_func_offset_from_archive(const char * archive_path,const char * file_name,const char * func_name)11677 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11678 const char *func_name)
11679 {
11680 struct zip_archive *archive;
11681 struct zip_entry entry;
11682 long ret;
11683 Elf *elf;
11684
11685 archive = zip_archive_open(archive_path);
11686 if (IS_ERR(archive)) {
11687 ret = PTR_ERR(archive);
11688 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11689 return ret;
11690 }
11691
11692 ret = zip_archive_find_entry(archive, file_name, &entry);
11693 if (ret) {
11694 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11695 archive_path, ret);
11696 goto out;
11697 }
11698 pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11699 (unsigned long)entry.data_offset);
11700
11701 if (entry.compression) {
11702 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11703 archive_path);
11704 ret = -LIBBPF_ERRNO__FORMAT;
11705 goto out;
11706 }
11707
11708 elf = elf_memory((void *)entry.data, entry.data_length);
11709 if (!elf) {
11710 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11711 elf_errmsg(-1));
11712 ret = -LIBBPF_ERRNO__LIBELF;
11713 goto out;
11714 }
11715
11716 ret = elf_find_func_offset(elf, file_name, func_name);
11717 if (ret > 0) {
11718 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11719 func_name, file_name, archive_path, entry.data_offset, ret,
11720 ret + entry.data_offset);
11721 ret += entry.data_offset;
11722 }
11723 elf_end(elf);
11724
11725 out:
11726 zip_archive_close(archive);
11727 return ret;
11728 }
11729
arch_specific_lib_paths(void)11730 static const char *arch_specific_lib_paths(void)
11731 {
11732 /*
11733 * Based on https://packages.debian.org/sid/libc6.
11734 *
11735 * Assume that the traced program is built for the same architecture
11736 * as libbpf, which should cover the vast majority of cases.
11737 */
11738 #if defined(__x86_64__)
11739 return "/lib/x86_64-linux-gnu";
11740 #elif defined(__i386__)
11741 return "/lib/i386-linux-gnu";
11742 #elif defined(__s390x__)
11743 return "/lib/s390x-linux-gnu";
11744 #elif defined(__s390__)
11745 return "/lib/s390-linux-gnu";
11746 #elif defined(__arm__) && defined(__SOFTFP__)
11747 return "/lib/arm-linux-gnueabi";
11748 #elif defined(__arm__) && !defined(__SOFTFP__)
11749 return "/lib/arm-linux-gnueabihf";
11750 #elif defined(__aarch64__)
11751 return "/lib/aarch64-linux-gnu";
11752 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11753 return "/lib/mips64el-linux-gnuabi64";
11754 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11755 return "/lib/mipsel-linux-gnu";
11756 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11757 return "/lib/powerpc64le-linux-gnu";
11758 #elif defined(__sparc__) && defined(__arch64__)
11759 return "/lib/sparc64-linux-gnu";
11760 #elif defined(__riscv) && __riscv_xlen == 64
11761 return "/lib/riscv64-linux-gnu";
11762 #else
11763 return NULL;
11764 #endif
11765 }
11766
11767 /* Get full path to program/shared library. */
resolve_full_path(const char * file,char * result,size_t result_sz)11768 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11769 {
11770 const char *search_paths[3] = {};
11771 int i, perm;
11772
11773 if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11774 search_paths[0] = getenv("LD_LIBRARY_PATH");
11775 search_paths[1] = "/usr/lib64:/usr/lib";
11776 search_paths[2] = arch_specific_lib_paths();
11777 perm = R_OK;
11778 } else {
11779 search_paths[0] = getenv("PATH");
11780 search_paths[1] = "/usr/bin:/usr/sbin";
11781 perm = R_OK | X_OK;
11782 }
11783
11784 for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11785 const char *s;
11786
11787 if (!search_paths[i])
11788 continue;
11789 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11790 char *next_path;
11791 int seg_len;
11792
11793 if (s[0] == ':')
11794 s++;
11795 next_path = strchr(s, ':');
11796 seg_len = next_path ? next_path - s : strlen(s);
11797 if (!seg_len)
11798 continue;
11799 snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11800 /* ensure it has required permissions */
11801 if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11802 continue;
11803 pr_debug("resolved '%s' to '%s'\n", file, result);
11804 return 0;
11805 }
11806 }
11807 return -ENOENT;
11808 }
11809
11810 struct bpf_link *
bpf_program__attach_uprobe_multi(const struct bpf_program * prog,pid_t pid,const char * path,const char * func_pattern,const struct bpf_uprobe_multi_opts * opts)11811 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11812 pid_t pid,
11813 const char *path,
11814 const char *func_pattern,
11815 const struct bpf_uprobe_multi_opts *opts)
11816 {
11817 const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11818 LIBBPF_OPTS(bpf_link_create_opts, lopts);
11819 unsigned long *resolved_offsets = NULL;
11820 int err = 0, link_fd, prog_fd;
11821 struct bpf_link *link = NULL;
11822 char errmsg[STRERR_BUFSIZE];
11823 char full_path[PATH_MAX];
11824 const __u64 *cookies;
11825 const char **syms;
11826 size_t cnt;
11827
11828 if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11829 return libbpf_err_ptr(-EINVAL);
11830
11831 prog_fd = bpf_program__fd(prog);
11832 if (prog_fd < 0) {
11833 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
11834 prog->name);
11835 return libbpf_err_ptr(-EINVAL);
11836 }
11837
11838 syms = OPTS_GET(opts, syms, NULL);
11839 offsets = OPTS_GET(opts, offsets, NULL);
11840 ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
11841 cookies = OPTS_GET(opts, cookies, NULL);
11842 cnt = OPTS_GET(opts, cnt, 0);
11843
11844 /*
11845 * User can specify 2 mutually exclusive set of inputs:
11846 *
11847 * 1) use only path/func_pattern/pid arguments
11848 *
11849 * 2) use path/pid with allowed combinations of:
11850 * syms/offsets/ref_ctr_offsets/cookies/cnt
11851 *
11852 * - syms and offsets are mutually exclusive
11853 * - ref_ctr_offsets and cookies are optional
11854 *
11855 * Any other usage results in error.
11856 */
11857
11858 if (!path)
11859 return libbpf_err_ptr(-EINVAL);
11860 if (!func_pattern && cnt == 0)
11861 return libbpf_err_ptr(-EINVAL);
11862
11863 if (func_pattern) {
11864 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
11865 return libbpf_err_ptr(-EINVAL);
11866 } else {
11867 if (!!syms == !!offsets)
11868 return libbpf_err_ptr(-EINVAL);
11869 }
11870
11871 if (func_pattern) {
11872 if (!strchr(path, '/')) {
11873 err = resolve_full_path(path, full_path, sizeof(full_path));
11874 if (err) {
11875 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11876 prog->name, path, err);
11877 return libbpf_err_ptr(err);
11878 }
11879 path = full_path;
11880 }
11881
11882 err = elf_resolve_pattern_offsets(path, func_pattern,
11883 &resolved_offsets, &cnt);
11884 if (err < 0)
11885 return libbpf_err_ptr(err);
11886 offsets = resolved_offsets;
11887 } else if (syms) {
11888 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
11889 if (err < 0)
11890 return libbpf_err_ptr(err);
11891 offsets = resolved_offsets;
11892 }
11893
11894 lopts.uprobe_multi.path = path;
11895 lopts.uprobe_multi.offsets = offsets;
11896 lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
11897 lopts.uprobe_multi.cookies = cookies;
11898 lopts.uprobe_multi.cnt = cnt;
11899 lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
11900
11901 if (pid == 0)
11902 pid = getpid();
11903 if (pid > 0)
11904 lopts.uprobe_multi.pid = pid;
11905
11906 link = calloc(1, sizeof(*link));
11907 if (!link) {
11908 err = -ENOMEM;
11909 goto error;
11910 }
11911 link->detach = &bpf_link__detach_fd;
11912
11913 link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
11914 if (link_fd < 0) {
11915 err = -errno;
11916 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11917 prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11918 goto error;
11919 }
11920 link->fd = link_fd;
11921 free(resolved_offsets);
11922 return link;
11923
11924 error:
11925 free(resolved_offsets);
11926 free(link);
11927 return libbpf_err_ptr(err);
11928 }
11929
11930 LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe_opts(const struct bpf_program * prog,pid_t pid,const char * binary_path,size_t func_offset,const struct bpf_uprobe_opts * opts)11931 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11932 const char *binary_path, size_t func_offset,
11933 const struct bpf_uprobe_opts *opts)
11934 {
11935 const char *archive_path = NULL, *archive_sep = NULL;
11936 char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11937 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11938 enum probe_attach_mode attach_mode;
11939 char full_path[PATH_MAX];
11940 struct bpf_link *link;
11941 size_t ref_ctr_off;
11942 int pfd, err;
11943 bool retprobe, legacy;
11944 const char *func_name;
11945
11946 if (!OPTS_VALID(opts, bpf_uprobe_opts))
11947 return libbpf_err_ptr(-EINVAL);
11948
11949 attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11950 retprobe = OPTS_GET(opts, retprobe, false);
11951 ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11952 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11953
11954 if (!binary_path)
11955 return libbpf_err_ptr(-EINVAL);
11956
11957 /* Check if "binary_path" refers to an archive. */
11958 archive_sep = strstr(binary_path, "!/");
11959 if (archive_sep) {
11960 full_path[0] = '\0';
11961 libbpf_strlcpy(full_path, binary_path,
11962 min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11963 archive_path = full_path;
11964 binary_path = archive_sep + 2;
11965 } else if (!strchr(binary_path, '/')) {
11966 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11967 if (err) {
11968 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11969 prog->name, binary_path, err);
11970 return libbpf_err_ptr(err);
11971 }
11972 binary_path = full_path;
11973 }
11974 func_name = OPTS_GET(opts, func_name, NULL);
11975 if (func_name) {
11976 long sym_off;
11977
11978 if (archive_path) {
11979 sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11980 func_name);
11981 binary_path = archive_path;
11982 } else {
11983 sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11984 }
11985 if (sym_off < 0)
11986 return libbpf_err_ptr(sym_off);
11987 func_offset += sym_off;
11988 }
11989
11990 legacy = determine_uprobe_perf_type() < 0;
11991 switch (attach_mode) {
11992 case PROBE_ATTACH_MODE_LEGACY:
11993 legacy = true;
11994 pe_opts.force_ioctl_attach = true;
11995 break;
11996 case PROBE_ATTACH_MODE_PERF:
11997 if (legacy)
11998 return libbpf_err_ptr(-ENOTSUP);
11999 pe_opts.force_ioctl_attach = true;
12000 break;
12001 case PROBE_ATTACH_MODE_LINK:
12002 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
12003 return libbpf_err_ptr(-ENOTSUP);
12004 break;
12005 case PROBE_ATTACH_MODE_DEFAULT:
12006 break;
12007 default:
12008 return libbpf_err_ptr(-EINVAL);
12009 }
12010
12011 if (!legacy) {
12012 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
12013 func_offset, pid, ref_ctr_off);
12014 } else {
12015 char probe_name[PATH_MAX + 64];
12016
12017 if (ref_ctr_off)
12018 return libbpf_err_ptr(-EINVAL);
12019
12020 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
12021 binary_path, func_offset);
12022
12023 legacy_probe = strdup(probe_name);
12024 if (!legacy_probe)
12025 return libbpf_err_ptr(-ENOMEM);
12026
12027 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
12028 binary_path, func_offset, pid);
12029 }
12030 if (pfd < 0) {
12031 err = -errno;
12032 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
12033 prog->name, retprobe ? "uretprobe" : "uprobe",
12034 binary_path, func_offset,
12035 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12036 goto err_out;
12037 }
12038
12039 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12040 err = libbpf_get_error(link);
12041 if (err) {
12042 close(pfd);
12043 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
12044 prog->name, retprobe ? "uretprobe" : "uprobe",
12045 binary_path, func_offset,
12046 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12047 goto err_clean_legacy;
12048 }
12049 if (legacy) {
12050 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
12051
12052 perf_link->legacy_probe_name = legacy_probe;
12053 perf_link->legacy_is_kprobe = false;
12054 perf_link->legacy_is_retprobe = retprobe;
12055 }
12056 return link;
12057
12058 err_clean_legacy:
12059 if (legacy)
12060 remove_uprobe_event_legacy(legacy_probe, retprobe);
12061 err_out:
12062 free(legacy_probe);
12063 return libbpf_err_ptr(err);
12064 }
12065
12066 /* Format of u[ret]probe section definition supporting auto-attach:
12067 * u[ret]probe/binary:function[+offset]
12068 *
12069 * binary can be an absolute/relative path or a filename; the latter is resolved to a
12070 * full binary path via bpf_program__attach_uprobe_opts.
12071 *
12072 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
12073 * specified (and auto-attach is not possible) or the above format is specified for
12074 * auto-attach.
12075 */
attach_uprobe(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12076 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12077 {
12078 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
12079 char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
12080 int n, c, ret = -EINVAL;
12081 long offset = 0;
12082
12083 *link = NULL;
12084
12085 n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
12086 &probe_type, &binary_path, &func_name);
12087 switch (n) {
12088 case 1:
12089 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
12090 ret = 0;
12091 break;
12092 case 2:
12093 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
12094 prog->name, prog->sec_name);
12095 break;
12096 case 3:
12097 /* check if user specifies `+offset`, if yes, this should be
12098 * the last part of the string, make sure sscanf read to EOL
12099 */
12100 func_off = strrchr(func_name, '+');
12101 if (func_off) {
12102 n = sscanf(func_off, "+%li%n", &offset, &c);
12103 if (n == 1 && *(func_off + c) == '\0')
12104 func_off[0] = '\0';
12105 else
12106 offset = 0;
12107 }
12108 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
12109 strcmp(probe_type, "uretprobe.s") == 0;
12110 if (opts.retprobe && offset != 0) {
12111 pr_warn("prog '%s': uretprobes do not support offset specification\n",
12112 prog->name);
12113 break;
12114 }
12115 opts.func_name = func_name;
12116 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
12117 ret = libbpf_get_error(*link);
12118 break;
12119 default:
12120 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
12121 prog->sec_name);
12122 break;
12123 }
12124 free(probe_type);
12125 free(binary_path);
12126 free(func_name);
12127
12128 return ret;
12129 }
12130
bpf_program__attach_uprobe(const struct bpf_program * prog,bool retprobe,pid_t pid,const char * binary_path,size_t func_offset)12131 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
12132 bool retprobe, pid_t pid,
12133 const char *binary_path,
12134 size_t func_offset)
12135 {
12136 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
12137
12138 return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
12139 }
12140
bpf_program__attach_usdt(const struct bpf_program * prog,pid_t pid,const char * binary_path,const char * usdt_provider,const char * usdt_name,const struct bpf_usdt_opts * opts)12141 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
12142 pid_t pid, const char *binary_path,
12143 const char *usdt_provider, const char *usdt_name,
12144 const struct bpf_usdt_opts *opts)
12145 {
12146 char resolved_path[512];
12147 struct bpf_object *obj = prog->obj;
12148 struct bpf_link *link;
12149 __u64 usdt_cookie;
12150 int err;
12151
12152 if (!OPTS_VALID(opts, bpf_uprobe_opts))
12153 return libbpf_err_ptr(-EINVAL);
12154
12155 if (bpf_program__fd(prog) < 0) {
12156 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12157 prog->name);
12158 return libbpf_err_ptr(-EINVAL);
12159 }
12160
12161 if (!binary_path)
12162 return libbpf_err_ptr(-EINVAL);
12163
12164 if (!strchr(binary_path, '/')) {
12165 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
12166 if (err) {
12167 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
12168 prog->name, binary_path, err);
12169 return libbpf_err_ptr(err);
12170 }
12171 binary_path = resolved_path;
12172 }
12173
12174 /* USDT manager is instantiated lazily on first USDT attach. It will
12175 * be destroyed together with BPF object in bpf_object__close().
12176 */
12177 if (IS_ERR(obj->usdt_man))
12178 return libbpf_ptr(obj->usdt_man);
12179 if (!obj->usdt_man) {
12180 obj->usdt_man = usdt_manager_new(obj);
12181 if (IS_ERR(obj->usdt_man))
12182 return libbpf_ptr(obj->usdt_man);
12183 }
12184
12185 usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
12186 link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
12187 usdt_provider, usdt_name, usdt_cookie);
12188 err = libbpf_get_error(link);
12189 if (err)
12190 return libbpf_err_ptr(err);
12191 return link;
12192 }
12193
attach_usdt(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12194 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12195 {
12196 char *path = NULL, *provider = NULL, *name = NULL;
12197 const char *sec_name;
12198 int n, err;
12199
12200 sec_name = bpf_program__section_name(prog);
12201 if (strcmp(sec_name, "usdt") == 0) {
12202 /* no auto-attach for just SEC("usdt") */
12203 *link = NULL;
12204 return 0;
12205 }
12206
12207 n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
12208 if (n != 3) {
12209 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
12210 sec_name);
12211 err = -EINVAL;
12212 } else {
12213 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
12214 provider, name, NULL);
12215 err = libbpf_get_error(*link);
12216 }
12217 free(path);
12218 free(provider);
12219 free(name);
12220 return err;
12221 }
12222
determine_tracepoint_id(const char * tp_category,const char * tp_name)12223 static int determine_tracepoint_id(const char *tp_category,
12224 const char *tp_name)
12225 {
12226 char file[PATH_MAX];
12227 int ret;
12228
12229 ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
12230 tracefs_path(), tp_category, tp_name);
12231 if (ret < 0)
12232 return -errno;
12233 if (ret >= sizeof(file)) {
12234 pr_debug("tracepoint %s/%s path is too long\n",
12235 tp_category, tp_name);
12236 return -E2BIG;
12237 }
12238 return parse_uint_from_file(file, "%d\n");
12239 }
12240
perf_event_open_tracepoint(const char * tp_category,const char * tp_name)12241 static int perf_event_open_tracepoint(const char *tp_category,
12242 const char *tp_name)
12243 {
12244 const size_t attr_sz = sizeof(struct perf_event_attr);
12245 struct perf_event_attr attr;
12246 char errmsg[STRERR_BUFSIZE];
12247 int tp_id, pfd, err;
12248
12249 tp_id = determine_tracepoint_id(tp_category, tp_name);
12250 if (tp_id < 0) {
12251 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
12252 tp_category, tp_name,
12253 libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
12254 return tp_id;
12255 }
12256
12257 memset(&attr, 0, attr_sz);
12258 attr.type = PERF_TYPE_TRACEPOINT;
12259 attr.size = attr_sz;
12260 attr.config = tp_id;
12261
12262 pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
12263 -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
12264 if (pfd < 0) {
12265 err = -errno;
12266 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
12267 tp_category, tp_name,
12268 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12269 return err;
12270 }
12271 return pfd;
12272 }
12273
bpf_program__attach_tracepoint_opts(const struct bpf_program * prog,const char * tp_category,const char * tp_name,const struct bpf_tracepoint_opts * opts)12274 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
12275 const char *tp_category,
12276 const char *tp_name,
12277 const struct bpf_tracepoint_opts *opts)
12278 {
12279 DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
12280 char errmsg[STRERR_BUFSIZE];
12281 struct bpf_link *link;
12282 int pfd, err;
12283
12284 if (!OPTS_VALID(opts, bpf_tracepoint_opts))
12285 return libbpf_err_ptr(-EINVAL);
12286
12287 pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
12288
12289 pfd = perf_event_open_tracepoint(tp_category, tp_name);
12290 if (pfd < 0) {
12291 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
12292 prog->name, tp_category, tp_name,
12293 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12294 return libbpf_err_ptr(pfd);
12295 }
12296 link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
12297 err = libbpf_get_error(link);
12298 if (err) {
12299 close(pfd);
12300 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
12301 prog->name, tp_category, tp_name,
12302 libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
12303 return libbpf_err_ptr(err);
12304 }
12305 return link;
12306 }
12307
bpf_program__attach_tracepoint(const struct bpf_program * prog,const char * tp_category,const char * tp_name)12308 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
12309 const char *tp_category,
12310 const char *tp_name)
12311 {
12312 return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
12313 }
12314
attach_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12315 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12316 {
12317 char *sec_name, *tp_cat, *tp_name;
12318
12319 *link = NULL;
12320
12321 /* no auto-attach for SEC("tp") or SEC("tracepoint") */
12322 if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
12323 return 0;
12324
12325 sec_name = strdup(prog->sec_name);
12326 if (!sec_name)
12327 return -ENOMEM;
12328
12329 /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
12330 if (str_has_pfx(prog->sec_name, "tp/"))
12331 tp_cat = sec_name + sizeof("tp/") - 1;
12332 else
12333 tp_cat = sec_name + sizeof("tracepoint/") - 1;
12334 tp_name = strchr(tp_cat, '/');
12335 if (!tp_name) {
12336 free(sec_name);
12337 return -EINVAL;
12338 }
12339 *tp_name = '\0';
12340 tp_name++;
12341
12342 *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
12343 free(sec_name);
12344 return libbpf_get_error(*link);
12345 }
12346
12347 struct bpf_link *
bpf_program__attach_raw_tracepoint_opts(const struct bpf_program * prog,const char * tp_name,struct bpf_raw_tracepoint_opts * opts)12348 bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog,
12349 const char *tp_name,
12350 struct bpf_raw_tracepoint_opts *opts)
12351 {
12352 LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts);
12353 char errmsg[STRERR_BUFSIZE];
12354 struct bpf_link *link;
12355 int prog_fd, pfd;
12356
12357 if (!OPTS_VALID(opts, bpf_raw_tracepoint_opts))
12358 return libbpf_err_ptr(-EINVAL);
12359
12360 prog_fd = bpf_program__fd(prog);
12361 if (prog_fd < 0) {
12362 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12363 return libbpf_err_ptr(-EINVAL);
12364 }
12365
12366 link = calloc(1, sizeof(*link));
12367 if (!link)
12368 return libbpf_err_ptr(-ENOMEM);
12369 link->detach = &bpf_link__detach_fd;
12370
12371 raw_opts.tp_name = tp_name;
12372 raw_opts.cookie = OPTS_GET(opts, cookie, 0);
12373 pfd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_opts);
12374 if (pfd < 0) {
12375 pfd = -errno;
12376 free(link);
12377 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
12378 prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12379 return libbpf_err_ptr(pfd);
12380 }
12381 link->fd = pfd;
12382 return link;
12383 }
12384
bpf_program__attach_raw_tracepoint(const struct bpf_program * prog,const char * tp_name)12385 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
12386 const char *tp_name)
12387 {
12388 return bpf_program__attach_raw_tracepoint_opts(prog, tp_name, NULL);
12389 }
12390
attach_raw_tp(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12391 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12392 {
12393 static const char *const prefixes[] = {
12394 "raw_tp",
12395 "raw_tracepoint",
12396 "raw_tp.w",
12397 "raw_tracepoint.w",
12398 };
12399 size_t i;
12400 const char *tp_name = NULL;
12401
12402 *link = NULL;
12403
12404 for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12405 size_t pfx_len;
12406
12407 if (!str_has_pfx(prog->sec_name, prefixes[i]))
12408 continue;
12409
12410 pfx_len = strlen(prefixes[i]);
12411 /* no auto-attach case of, e.g., SEC("raw_tp") */
12412 if (prog->sec_name[pfx_len] == '\0')
12413 return 0;
12414
12415 if (prog->sec_name[pfx_len] != '/')
12416 continue;
12417
12418 tp_name = prog->sec_name + pfx_len + 1;
12419 break;
12420 }
12421
12422 if (!tp_name) {
12423 pr_warn("prog '%s': invalid section name '%s'\n",
12424 prog->name, prog->sec_name);
12425 return -EINVAL;
12426 }
12427
12428 *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12429 return libbpf_get_error(*link);
12430 }
12431
12432 /* Common logic for all BPF program types that attach to a btf_id */
bpf_program__attach_btf_id(const struct bpf_program * prog,const struct bpf_trace_opts * opts)12433 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12434 const struct bpf_trace_opts *opts)
12435 {
12436 LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12437 char errmsg[STRERR_BUFSIZE];
12438 struct bpf_link *link;
12439 int prog_fd, pfd;
12440
12441 if (!OPTS_VALID(opts, bpf_trace_opts))
12442 return libbpf_err_ptr(-EINVAL);
12443
12444 prog_fd = bpf_program__fd(prog);
12445 if (prog_fd < 0) {
12446 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12447 return libbpf_err_ptr(-EINVAL);
12448 }
12449
12450 link = calloc(1, sizeof(*link));
12451 if (!link)
12452 return libbpf_err_ptr(-ENOMEM);
12453 link->detach = &bpf_link__detach_fd;
12454
12455 /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12456 link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12457 pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12458 if (pfd < 0) {
12459 pfd = -errno;
12460 free(link);
12461 pr_warn("prog '%s': failed to attach: %s\n",
12462 prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12463 return libbpf_err_ptr(pfd);
12464 }
12465 link->fd = pfd;
12466 return link;
12467 }
12468
bpf_program__attach_trace(const struct bpf_program * prog)12469 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12470 {
12471 return bpf_program__attach_btf_id(prog, NULL);
12472 }
12473
bpf_program__attach_trace_opts(const struct bpf_program * prog,const struct bpf_trace_opts * opts)12474 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12475 const struct bpf_trace_opts *opts)
12476 {
12477 return bpf_program__attach_btf_id(prog, opts);
12478 }
12479
bpf_program__attach_lsm(const struct bpf_program * prog)12480 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12481 {
12482 return bpf_program__attach_btf_id(prog, NULL);
12483 }
12484
attach_trace(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12485 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12486 {
12487 *link = bpf_program__attach_trace(prog);
12488 return libbpf_get_error(*link);
12489 }
12490
attach_lsm(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12491 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12492 {
12493 *link = bpf_program__attach_lsm(prog);
12494 return libbpf_get_error(*link);
12495 }
12496
12497 static struct bpf_link *
bpf_program_attach_fd(const struct bpf_program * prog,int target_fd,const char * target_name,const struct bpf_link_create_opts * opts)12498 bpf_program_attach_fd(const struct bpf_program *prog,
12499 int target_fd, const char *target_name,
12500 const struct bpf_link_create_opts *opts)
12501 {
12502 enum bpf_attach_type attach_type;
12503 char errmsg[STRERR_BUFSIZE];
12504 struct bpf_link *link;
12505 int prog_fd, link_fd;
12506
12507 prog_fd = bpf_program__fd(prog);
12508 if (prog_fd < 0) {
12509 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12510 return libbpf_err_ptr(-EINVAL);
12511 }
12512
12513 link = calloc(1, sizeof(*link));
12514 if (!link)
12515 return libbpf_err_ptr(-ENOMEM);
12516 link->detach = &bpf_link__detach_fd;
12517
12518 attach_type = bpf_program__expected_attach_type(prog);
12519 link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12520 if (link_fd < 0) {
12521 link_fd = -errno;
12522 free(link);
12523 pr_warn("prog '%s': failed to attach to %s: %s\n",
12524 prog->name, target_name,
12525 libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12526 return libbpf_err_ptr(link_fd);
12527 }
12528 link->fd = link_fd;
12529 return link;
12530 }
12531
12532 struct bpf_link *
bpf_program__attach_cgroup(const struct bpf_program * prog,int cgroup_fd)12533 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12534 {
12535 return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12536 }
12537
12538 struct bpf_link *
bpf_program__attach_netns(const struct bpf_program * prog,int netns_fd)12539 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12540 {
12541 return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12542 }
12543
bpf_program__attach_xdp(const struct bpf_program * prog,int ifindex)12544 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12545 {
12546 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12547 return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12548 }
12549
12550 struct bpf_link *
bpf_program__attach_tcx(const struct bpf_program * prog,int ifindex,const struct bpf_tcx_opts * opts)12551 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12552 const struct bpf_tcx_opts *opts)
12553 {
12554 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12555 __u32 relative_id;
12556 int relative_fd;
12557
12558 if (!OPTS_VALID(opts, bpf_tcx_opts))
12559 return libbpf_err_ptr(-EINVAL);
12560
12561 relative_id = OPTS_GET(opts, relative_id, 0);
12562 relative_fd = OPTS_GET(opts, relative_fd, 0);
12563
12564 /* validate we don't have unexpected combinations of non-zero fields */
12565 if (!ifindex) {
12566 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12567 prog->name);
12568 return libbpf_err_ptr(-EINVAL);
12569 }
12570 if (relative_fd && relative_id) {
12571 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12572 prog->name);
12573 return libbpf_err_ptr(-EINVAL);
12574 }
12575
12576 link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12577 link_create_opts.tcx.relative_fd = relative_fd;
12578 link_create_opts.tcx.relative_id = relative_id;
12579 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12580
12581 /* target_fd/target_ifindex use the same field in LINK_CREATE */
12582 return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12583 }
12584
12585 struct bpf_link *
bpf_program__attach_netkit(const struct bpf_program * prog,int ifindex,const struct bpf_netkit_opts * opts)12586 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12587 const struct bpf_netkit_opts *opts)
12588 {
12589 LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12590 __u32 relative_id;
12591 int relative_fd;
12592
12593 if (!OPTS_VALID(opts, bpf_netkit_opts))
12594 return libbpf_err_ptr(-EINVAL);
12595
12596 relative_id = OPTS_GET(opts, relative_id, 0);
12597 relative_fd = OPTS_GET(opts, relative_fd, 0);
12598
12599 /* validate we don't have unexpected combinations of non-zero fields */
12600 if (!ifindex) {
12601 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12602 prog->name);
12603 return libbpf_err_ptr(-EINVAL);
12604 }
12605 if (relative_fd && relative_id) {
12606 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12607 prog->name);
12608 return libbpf_err_ptr(-EINVAL);
12609 }
12610
12611 link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12612 link_create_opts.netkit.relative_fd = relative_fd;
12613 link_create_opts.netkit.relative_id = relative_id;
12614 link_create_opts.flags = OPTS_GET(opts, flags, 0);
12615
12616 return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12617 }
12618
bpf_program__attach_freplace(const struct bpf_program * prog,int target_fd,const char * attach_func_name)12619 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12620 int target_fd,
12621 const char *attach_func_name)
12622 {
12623 int btf_id;
12624
12625 if (!!target_fd != !!attach_func_name) {
12626 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12627 prog->name);
12628 return libbpf_err_ptr(-EINVAL);
12629 }
12630
12631 if (prog->type != BPF_PROG_TYPE_EXT) {
12632 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12633 prog->name);
12634 return libbpf_err_ptr(-EINVAL);
12635 }
12636
12637 if (target_fd) {
12638 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12639
12640 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12641 if (btf_id < 0)
12642 return libbpf_err_ptr(btf_id);
12643
12644 target_opts.target_btf_id = btf_id;
12645
12646 return bpf_program_attach_fd(prog, target_fd, "freplace",
12647 &target_opts);
12648 } else {
12649 /* no target, so use raw_tracepoint_open for compatibility
12650 * with old kernels
12651 */
12652 return bpf_program__attach_trace(prog);
12653 }
12654 }
12655
12656 struct bpf_link *
bpf_program__attach_iter(const struct bpf_program * prog,const struct bpf_iter_attach_opts * opts)12657 bpf_program__attach_iter(const struct bpf_program *prog,
12658 const struct bpf_iter_attach_opts *opts)
12659 {
12660 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12661 char errmsg[STRERR_BUFSIZE];
12662 struct bpf_link *link;
12663 int prog_fd, link_fd;
12664 __u32 target_fd = 0;
12665
12666 if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12667 return libbpf_err_ptr(-EINVAL);
12668
12669 link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12670 link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12671
12672 prog_fd = bpf_program__fd(prog);
12673 if (prog_fd < 0) {
12674 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12675 return libbpf_err_ptr(-EINVAL);
12676 }
12677
12678 link = calloc(1, sizeof(*link));
12679 if (!link)
12680 return libbpf_err_ptr(-ENOMEM);
12681 link->detach = &bpf_link__detach_fd;
12682
12683 link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12684 &link_create_opts);
12685 if (link_fd < 0) {
12686 link_fd = -errno;
12687 free(link);
12688 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12689 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12690 return libbpf_err_ptr(link_fd);
12691 }
12692 link->fd = link_fd;
12693 return link;
12694 }
12695
attach_iter(const struct bpf_program * prog,long cookie,struct bpf_link ** link)12696 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12697 {
12698 *link = bpf_program__attach_iter(prog, NULL);
12699 return libbpf_get_error(*link);
12700 }
12701
bpf_program__attach_netfilter(const struct bpf_program * prog,const struct bpf_netfilter_opts * opts)12702 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12703 const struct bpf_netfilter_opts *opts)
12704 {
12705 LIBBPF_OPTS(bpf_link_create_opts, lopts);
12706 struct bpf_link *link;
12707 int prog_fd, link_fd;
12708
12709 if (!OPTS_VALID(opts, bpf_netfilter_opts))
12710 return libbpf_err_ptr(-EINVAL);
12711
12712 prog_fd = bpf_program__fd(prog);
12713 if (prog_fd < 0) {
12714 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12715 return libbpf_err_ptr(-EINVAL);
12716 }
12717
12718 link = calloc(1, sizeof(*link));
12719 if (!link)
12720 return libbpf_err_ptr(-ENOMEM);
12721
12722 link->detach = &bpf_link__detach_fd;
12723
12724 lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12725 lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12726 lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12727 lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12728
12729 link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12730 if (link_fd < 0) {
12731 char errmsg[STRERR_BUFSIZE];
12732
12733 link_fd = -errno;
12734 free(link);
12735 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12736 prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12737 return libbpf_err_ptr(link_fd);
12738 }
12739 link->fd = link_fd;
12740
12741 return link;
12742 }
12743
bpf_program__attach(const struct bpf_program * prog)12744 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12745 {
12746 struct bpf_link *link = NULL;
12747 int err;
12748
12749 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12750 return libbpf_err_ptr(-EOPNOTSUPP);
12751
12752 if (bpf_program__fd(prog) < 0) {
12753 pr_warn("prog '%s': can't attach BPF program without FD (was it loaded?)\n",
12754 prog->name);
12755 return libbpf_err_ptr(-EINVAL);
12756 }
12757
12758 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12759 if (err)
12760 return libbpf_err_ptr(err);
12761
12762 /* When calling bpf_program__attach() explicitly, auto-attach support
12763 * is expected to work, so NULL returned link is considered an error.
12764 * This is different for skeleton's attach, see comment in
12765 * bpf_object__attach_skeleton().
12766 */
12767 if (!link)
12768 return libbpf_err_ptr(-EOPNOTSUPP);
12769
12770 return link;
12771 }
12772
12773 struct bpf_link_struct_ops {
12774 struct bpf_link link;
12775 int map_fd;
12776 };
12777
bpf_link__detach_struct_ops(struct bpf_link * link)12778 static int bpf_link__detach_struct_ops(struct bpf_link *link)
12779 {
12780 struct bpf_link_struct_ops *st_link;
12781 __u32 zero = 0;
12782
12783 st_link = container_of(link, struct bpf_link_struct_ops, link);
12784
12785 if (st_link->map_fd < 0)
12786 /* w/o a real link */
12787 return bpf_map_delete_elem(link->fd, &zero);
12788
12789 return close(link->fd);
12790 }
12791
bpf_map__attach_struct_ops(const struct bpf_map * map)12792 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12793 {
12794 struct bpf_link_struct_ops *link;
12795 __u32 zero = 0;
12796 int err, fd;
12797
12798 if (!bpf_map__is_struct_ops(map))
12799 return libbpf_err_ptr(-EINVAL);
12800
12801 if (map->fd < 0) {
12802 pr_warn("map '%s': can't attach BPF map without FD (was it created?)\n", map->name);
12803 return libbpf_err_ptr(-EINVAL);
12804 }
12805
12806 link = calloc(1, sizeof(*link));
12807 if (!link)
12808 return libbpf_err_ptr(-EINVAL);
12809
12810 /* kern_vdata should be prepared during the loading phase. */
12811 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12812 /* It can be EBUSY if the map has been used to create or
12813 * update a link before. We don't allow updating the value of
12814 * a struct_ops once it is set. That ensures that the value
12815 * never changed. So, it is safe to skip EBUSY.
12816 */
12817 if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12818 free(link);
12819 return libbpf_err_ptr(err);
12820 }
12821
12822 link->link.detach = bpf_link__detach_struct_ops;
12823
12824 if (!(map->def.map_flags & BPF_F_LINK)) {
12825 /* w/o a real link */
12826 link->link.fd = map->fd;
12827 link->map_fd = -1;
12828 return &link->link;
12829 }
12830
12831 fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
12832 if (fd < 0) {
12833 free(link);
12834 return libbpf_err_ptr(fd);
12835 }
12836
12837 link->link.fd = fd;
12838 link->map_fd = map->fd;
12839
12840 return &link->link;
12841 }
12842
12843 /*
12844 * Swap the back struct_ops of a link with a new struct_ops map.
12845 */
bpf_link__update_map(struct bpf_link * link,const struct bpf_map * map)12846 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
12847 {
12848 struct bpf_link_struct_ops *st_ops_link;
12849 __u32 zero = 0;
12850 int err;
12851
12852 if (!bpf_map__is_struct_ops(map))
12853 return -EINVAL;
12854
12855 if (map->fd < 0) {
12856 pr_warn("map '%s': can't use BPF map without FD (was it created?)\n", map->name);
12857 return -EINVAL;
12858 }
12859
12860 st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
12861 /* Ensure the type of a link is correct */
12862 if (st_ops_link->map_fd < 0)
12863 return -EINVAL;
12864
12865 err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12866 /* It can be EBUSY if the map has been used to create or
12867 * update a link before. We don't allow updating the value of
12868 * a struct_ops once it is set. That ensures that the value
12869 * never changed. So, it is safe to skip EBUSY.
12870 */
12871 if (err && err != -EBUSY)
12872 return err;
12873
12874 err = bpf_link_update(link->fd, map->fd, NULL);
12875 if (err < 0)
12876 return err;
12877
12878 st_ops_link->map_fd = map->fd;
12879
12880 return 0;
12881 }
12882
12883 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
12884 void *private_data);
12885
12886 static enum bpf_perf_event_ret
perf_event_read_simple(void * mmap_mem,size_t mmap_size,size_t page_size,void ** copy_mem,size_t * copy_size,bpf_perf_event_print_t fn,void * private_data)12887 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
12888 void **copy_mem, size_t *copy_size,
12889 bpf_perf_event_print_t fn, void *private_data)
12890 {
12891 struct perf_event_mmap_page *header = mmap_mem;
12892 __u64 data_head = ring_buffer_read_head(header);
12893 __u64 data_tail = header->data_tail;
12894 void *base = ((__u8 *)header) + page_size;
12895 int ret = LIBBPF_PERF_EVENT_CONT;
12896 struct perf_event_header *ehdr;
12897 size_t ehdr_size;
12898
12899 while (data_head != data_tail) {
12900 ehdr = base + (data_tail & (mmap_size - 1));
12901 ehdr_size = ehdr->size;
12902
12903 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
12904 void *copy_start = ehdr;
12905 size_t len_first = base + mmap_size - copy_start;
12906 size_t len_secnd = ehdr_size - len_first;
12907
12908 if (*copy_size < ehdr_size) {
12909 free(*copy_mem);
12910 *copy_mem = malloc(ehdr_size);
12911 if (!*copy_mem) {
12912 *copy_size = 0;
12913 ret = LIBBPF_PERF_EVENT_ERROR;
12914 break;
12915 }
12916 *copy_size = ehdr_size;
12917 }
12918
12919 memcpy(*copy_mem, copy_start, len_first);
12920 memcpy(*copy_mem + len_first, base, len_secnd);
12921 ehdr = *copy_mem;
12922 }
12923
12924 ret = fn(ehdr, private_data);
12925 data_tail += ehdr_size;
12926 if (ret != LIBBPF_PERF_EVENT_CONT)
12927 break;
12928 }
12929
12930 ring_buffer_write_tail(header, data_tail);
12931 return libbpf_err(ret);
12932 }
12933
12934 struct perf_buffer;
12935
12936 struct perf_buffer_params {
12937 struct perf_event_attr *attr;
12938 /* if event_cb is specified, it takes precendence */
12939 perf_buffer_event_fn event_cb;
12940 /* sample_cb and lost_cb are higher-level common-case callbacks */
12941 perf_buffer_sample_fn sample_cb;
12942 perf_buffer_lost_fn lost_cb;
12943 void *ctx;
12944 int cpu_cnt;
12945 int *cpus;
12946 int *map_keys;
12947 };
12948
12949 struct perf_cpu_buf {
12950 struct perf_buffer *pb;
12951 void *base; /* mmap()'ed memory */
12952 void *buf; /* for reconstructing segmented data */
12953 size_t buf_size;
12954 int fd;
12955 int cpu;
12956 int map_key;
12957 };
12958
12959 struct perf_buffer {
12960 perf_buffer_event_fn event_cb;
12961 perf_buffer_sample_fn sample_cb;
12962 perf_buffer_lost_fn lost_cb;
12963 void *ctx; /* passed into callbacks */
12964
12965 size_t page_size;
12966 size_t mmap_size;
12967 struct perf_cpu_buf **cpu_bufs;
12968 struct epoll_event *events;
12969 int cpu_cnt; /* number of allocated CPU buffers */
12970 int epoll_fd; /* perf event FD */
12971 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12972 };
12973
perf_buffer__free_cpu_buf(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)12974 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12975 struct perf_cpu_buf *cpu_buf)
12976 {
12977 if (!cpu_buf)
12978 return;
12979 if (cpu_buf->base &&
12980 munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12981 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12982 if (cpu_buf->fd >= 0) {
12983 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12984 close(cpu_buf->fd);
12985 }
12986 free(cpu_buf->buf);
12987 free(cpu_buf);
12988 }
12989
perf_buffer__free(struct perf_buffer * pb)12990 void perf_buffer__free(struct perf_buffer *pb)
12991 {
12992 int i;
12993
12994 if (IS_ERR_OR_NULL(pb))
12995 return;
12996 if (pb->cpu_bufs) {
12997 for (i = 0; i < pb->cpu_cnt; i++) {
12998 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12999
13000 if (!cpu_buf)
13001 continue;
13002
13003 bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
13004 perf_buffer__free_cpu_buf(pb, cpu_buf);
13005 }
13006 free(pb->cpu_bufs);
13007 }
13008 if (pb->epoll_fd >= 0)
13009 close(pb->epoll_fd);
13010 free(pb->events);
13011 free(pb);
13012 }
13013
13014 static struct perf_cpu_buf *
perf_buffer__open_cpu_buf(struct perf_buffer * pb,struct perf_event_attr * attr,int cpu,int map_key)13015 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
13016 int cpu, int map_key)
13017 {
13018 struct perf_cpu_buf *cpu_buf;
13019 char msg[STRERR_BUFSIZE];
13020 int err;
13021
13022 cpu_buf = calloc(1, sizeof(*cpu_buf));
13023 if (!cpu_buf)
13024 return ERR_PTR(-ENOMEM);
13025
13026 cpu_buf->pb = pb;
13027 cpu_buf->cpu = cpu;
13028 cpu_buf->map_key = map_key;
13029
13030 cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
13031 -1, PERF_FLAG_FD_CLOEXEC);
13032 if (cpu_buf->fd < 0) {
13033 err = -errno;
13034 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
13035 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13036 goto error;
13037 }
13038
13039 cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
13040 PROT_READ | PROT_WRITE, MAP_SHARED,
13041 cpu_buf->fd, 0);
13042 if (cpu_buf->base == MAP_FAILED) {
13043 cpu_buf->base = NULL;
13044 err = -errno;
13045 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
13046 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13047 goto error;
13048 }
13049
13050 if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
13051 err = -errno;
13052 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
13053 cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
13054 goto error;
13055 }
13056
13057 return cpu_buf;
13058
13059 error:
13060 perf_buffer__free_cpu_buf(pb, cpu_buf);
13061 return (struct perf_cpu_buf *)ERR_PTR(err);
13062 }
13063
13064 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13065 struct perf_buffer_params *p);
13066
perf_buffer__new(int map_fd,size_t page_cnt,perf_buffer_sample_fn sample_cb,perf_buffer_lost_fn lost_cb,void * ctx,const struct perf_buffer_opts * opts)13067 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
13068 perf_buffer_sample_fn sample_cb,
13069 perf_buffer_lost_fn lost_cb,
13070 void *ctx,
13071 const struct perf_buffer_opts *opts)
13072 {
13073 const size_t attr_sz = sizeof(struct perf_event_attr);
13074 struct perf_buffer_params p = {};
13075 struct perf_event_attr attr;
13076 __u32 sample_period;
13077
13078 if (!OPTS_VALID(opts, perf_buffer_opts))
13079 return libbpf_err_ptr(-EINVAL);
13080
13081 sample_period = OPTS_GET(opts, sample_period, 1);
13082 if (!sample_period)
13083 sample_period = 1;
13084
13085 memset(&attr, 0, attr_sz);
13086 attr.size = attr_sz;
13087 attr.config = PERF_COUNT_SW_BPF_OUTPUT;
13088 attr.type = PERF_TYPE_SOFTWARE;
13089 attr.sample_type = PERF_SAMPLE_RAW;
13090 attr.sample_period = sample_period;
13091 attr.wakeup_events = sample_period;
13092
13093 p.attr = &attr;
13094 p.sample_cb = sample_cb;
13095 p.lost_cb = lost_cb;
13096 p.ctx = ctx;
13097
13098 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13099 }
13100
perf_buffer__new_raw(int map_fd,size_t page_cnt,struct perf_event_attr * attr,perf_buffer_event_fn event_cb,void * ctx,const struct perf_buffer_raw_opts * opts)13101 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
13102 struct perf_event_attr *attr,
13103 perf_buffer_event_fn event_cb, void *ctx,
13104 const struct perf_buffer_raw_opts *opts)
13105 {
13106 struct perf_buffer_params p = {};
13107
13108 if (!attr)
13109 return libbpf_err_ptr(-EINVAL);
13110
13111 if (!OPTS_VALID(opts, perf_buffer_raw_opts))
13112 return libbpf_err_ptr(-EINVAL);
13113
13114 p.attr = attr;
13115 p.event_cb = event_cb;
13116 p.ctx = ctx;
13117 p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
13118 p.cpus = OPTS_GET(opts, cpus, NULL);
13119 p.map_keys = OPTS_GET(opts, map_keys, NULL);
13120
13121 return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
13122 }
13123
__perf_buffer__new(int map_fd,size_t page_cnt,struct perf_buffer_params * p)13124 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
13125 struct perf_buffer_params *p)
13126 {
13127 const char *online_cpus_file = "/sys/devices/system/cpu/online";
13128 struct bpf_map_info map;
13129 char msg[STRERR_BUFSIZE];
13130 struct perf_buffer *pb;
13131 bool *online = NULL;
13132 __u32 map_info_len;
13133 int err, i, j, n;
13134
13135 if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
13136 pr_warn("page count should be power of two, but is %zu\n",
13137 page_cnt);
13138 return ERR_PTR(-EINVAL);
13139 }
13140
13141 /* best-effort sanity checks */
13142 memset(&map, 0, sizeof(map));
13143 map_info_len = sizeof(map);
13144 err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
13145 if (err) {
13146 err = -errno;
13147 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
13148 * -EBADFD, -EFAULT, or -E2BIG on real error
13149 */
13150 if (err != -EINVAL) {
13151 pr_warn("failed to get map info for map FD %d: %s\n",
13152 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
13153 return ERR_PTR(err);
13154 }
13155 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
13156 map_fd);
13157 } else {
13158 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
13159 pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
13160 map.name);
13161 return ERR_PTR(-EINVAL);
13162 }
13163 }
13164
13165 pb = calloc(1, sizeof(*pb));
13166 if (!pb)
13167 return ERR_PTR(-ENOMEM);
13168
13169 pb->event_cb = p->event_cb;
13170 pb->sample_cb = p->sample_cb;
13171 pb->lost_cb = p->lost_cb;
13172 pb->ctx = p->ctx;
13173
13174 pb->page_size = getpagesize();
13175 pb->mmap_size = pb->page_size * page_cnt;
13176 pb->map_fd = map_fd;
13177
13178 pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
13179 if (pb->epoll_fd < 0) {
13180 err = -errno;
13181 pr_warn("failed to create epoll instance: %s\n",
13182 libbpf_strerror_r(err, msg, sizeof(msg)));
13183 goto error;
13184 }
13185
13186 if (p->cpu_cnt > 0) {
13187 pb->cpu_cnt = p->cpu_cnt;
13188 } else {
13189 pb->cpu_cnt = libbpf_num_possible_cpus();
13190 if (pb->cpu_cnt < 0) {
13191 err = pb->cpu_cnt;
13192 goto error;
13193 }
13194 if (map.max_entries && map.max_entries < pb->cpu_cnt)
13195 pb->cpu_cnt = map.max_entries;
13196 }
13197
13198 pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
13199 if (!pb->events) {
13200 err = -ENOMEM;
13201 pr_warn("failed to allocate events: out of memory\n");
13202 goto error;
13203 }
13204 pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
13205 if (!pb->cpu_bufs) {
13206 err = -ENOMEM;
13207 pr_warn("failed to allocate buffers: out of memory\n");
13208 goto error;
13209 }
13210
13211 err = parse_cpu_mask_file(online_cpus_file, &online, &n);
13212 if (err) {
13213 pr_warn("failed to get online CPU mask: %d\n", err);
13214 goto error;
13215 }
13216
13217 for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
13218 struct perf_cpu_buf *cpu_buf;
13219 int cpu, map_key;
13220
13221 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
13222 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
13223
13224 /* in case user didn't explicitly requested particular CPUs to
13225 * be attached to, skip offline/not present CPUs
13226 */
13227 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
13228 continue;
13229
13230 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
13231 if (IS_ERR(cpu_buf)) {
13232 err = PTR_ERR(cpu_buf);
13233 goto error;
13234 }
13235
13236 pb->cpu_bufs[j] = cpu_buf;
13237
13238 err = bpf_map_update_elem(pb->map_fd, &map_key,
13239 &cpu_buf->fd, 0);
13240 if (err) {
13241 err = -errno;
13242 pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
13243 cpu, map_key, cpu_buf->fd,
13244 libbpf_strerror_r(err, msg, sizeof(msg)));
13245 goto error;
13246 }
13247
13248 pb->events[j].events = EPOLLIN;
13249 pb->events[j].data.ptr = cpu_buf;
13250 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
13251 &pb->events[j]) < 0) {
13252 err = -errno;
13253 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
13254 cpu, cpu_buf->fd,
13255 libbpf_strerror_r(err, msg, sizeof(msg)));
13256 goto error;
13257 }
13258 j++;
13259 }
13260 pb->cpu_cnt = j;
13261 free(online);
13262
13263 return pb;
13264
13265 error:
13266 free(online);
13267 if (pb)
13268 perf_buffer__free(pb);
13269 return ERR_PTR(err);
13270 }
13271
13272 struct perf_sample_raw {
13273 struct perf_event_header header;
13274 uint32_t size;
13275 char data[];
13276 };
13277
13278 struct perf_sample_lost {
13279 struct perf_event_header header;
13280 uint64_t id;
13281 uint64_t lost;
13282 uint64_t sample_id;
13283 };
13284
13285 static enum bpf_perf_event_ret
perf_buffer__process_record(struct perf_event_header * e,void * ctx)13286 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
13287 {
13288 struct perf_cpu_buf *cpu_buf = ctx;
13289 struct perf_buffer *pb = cpu_buf->pb;
13290 void *data = e;
13291
13292 /* user wants full control over parsing perf event */
13293 if (pb->event_cb)
13294 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
13295
13296 switch (e->type) {
13297 case PERF_RECORD_SAMPLE: {
13298 struct perf_sample_raw *s = data;
13299
13300 if (pb->sample_cb)
13301 pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
13302 break;
13303 }
13304 case PERF_RECORD_LOST: {
13305 struct perf_sample_lost *s = data;
13306
13307 if (pb->lost_cb)
13308 pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
13309 break;
13310 }
13311 default:
13312 pr_warn("unknown perf sample type %d\n", e->type);
13313 return LIBBPF_PERF_EVENT_ERROR;
13314 }
13315 return LIBBPF_PERF_EVENT_CONT;
13316 }
13317
perf_buffer__process_records(struct perf_buffer * pb,struct perf_cpu_buf * cpu_buf)13318 static int perf_buffer__process_records(struct perf_buffer *pb,
13319 struct perf_cpu_buf *cpu_buf)
13320 {
13321 enum bpf_perf_event_ret ret;
13322
13323 ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
13324 pb->page_size, &cpu_buf->buf,
13325 &cpu_buf->buf_size,
13326 perf_buffer__process_record, cpu_buf);
13327 if (ret != LIBBPF_PERF_EVENT_CONT)
13328 return ret;
13329 return 0;
13330 }
13331
perf_buffer__epoll_fd(const struct perf_buffer * pb)13332 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
13333 {
13334 return pb->epoll_fd;
13335 }
13336
perf_buffer__poll(struct perf_buffer * pb,int timeout_ms)13337 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
13338 {
13339 int i, cnt, err;
13340
13341 cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
13342 if (cnt < 0)
13343 return -errno;
13344
13345 for (i = 0; i < cnt; i++) {
13346 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
13347
13348 err = perf_buffer__process_records(pb, cpu_buf);
13349 if (err) {
13350 pr_warn("error while processing records: %d\n", err);
13351 return libbpf_err(err);
13352 }
13353 }
13354 return cnt;
13355 }
13356
13357 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
13358 * manager.
13359 */
perf_buffer__buffer_cnt(const struct perf_buffer * pb)13360 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
13361 {
13362 return pb->cpu_cnt;
13363 }
13364
13365 /*
13366 * Return perf_event FD of a ring buffer in *buf_idx* slot of
13367 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
13368 * select()/poll()/epoll() Linux syscalls.
13369 */
perf_buffer__buffer_fd(const struct perf_buffer * pb,size_t buf_idx)13370 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
13371 {
13372 struct perf_cpu_buf *cpu_buf;
13373
13374 if (buf_idx >= pb->cpu_cnt)
13375 return libbpf_err(-EINVAL);
13376
13377 cpu_buf = pb->cpu_bufs[buf_idx];
13378 if (!cpu_buf)
13379 return libbpf_err(-ENOENT);
13380
13381 return cpu_buf->fd;
13382 }
13383
perf_buffer__buffer(struct perf_buffer * pb,int buf_idx,void ** buf,size_t * buf_size)13384 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
13385 {
13386 struct perf_cpu_buf *cpu_buf;
13387
13388 if (buf_idx >= pb->cpu_cnt)
13389 return libbpf_err(-EINVAL);
13390
13391 cpu_buf = pb->cpu_bufs[buf_idx];
13392 if (!cpu_buf)
13393 return libbpf_err(-ENOENT);
13394
13395 *buf = cpu_buf->base;
13396 *buf_size = pb->mmap_size;
13397 return 0;
13398 }
13399
13400 /*
13401 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
13402 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
13403 * consume, do nothing and return success.
13404 * Returns:
13405 * - 0 on success;
13406 * - <0 on failure.
13407 */
perf_buffer__consume_buffer(struct perf_buffer * pb,size_t buf_idx)13408 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
13409 {
13410 struct perf_cpu_buf *cpu_buf;
13411
13412 if (buf_idx >= pb->cpu_cnt)
13413 return libbpf_err(-EINVAL);
13414
13415 cpu_buf = pb->cpu_bufs[buf_idx];
13416 if (!cpu_buf)
13417 return libbpf_err(-ENOENT);
13418
13419 return perf_buffer__process_records(pb, cpu_buf);
13420 }
13421
perf_buffer__consume(struct perf_buffer * pb)13422 int perf_buffer__consume(struct perf_buffer *pb)
13423 {
13424 int i, err;
13425
13426 for (i = 0; i < pb->cpu_cnt; i++) {
13427 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13428
13429 if (!cpu_buf)
13430 continue;
13431
13432 err = perf_buffer__process_records(pb, cpu_buf);
13433 if (err) {
13434 pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13435 return libbpf_err(err);
13436 }
13437 }
13438 return 0;
13439 }
13440
bpf_program__set_attach_target(struct bpf_program * prog,int attach_prog_fd,const char * attach_func_name)13441 int bpf_program__set_attach_target(struct bpf_program *prog,
13442 int attach_prog_fd,
13443 const char *attach_func_name)
13444 {
13445 int btf_obj_fd = 0, btf_id = 0, err;
13446
13447 if (!prog || attach_prog_fd < 0)
13448 return libbpf_err(-EINVAL);
13449
13450 if (prog->obj->loaded)
13451 return libbpf_err(-EINVAL);
13452
13453 if (attach_prog_fd && !attach_func_name) {
13454 /* remember attach_prog_fd and let bpf_program__load() find
13455 * BTF ID during the program load
13456 */
13457 prog->attach_prog_fd = attach_prog_fd;
13458 return 0;
13459 }
13460
13461 if (attach_prog_fd) {
13462 btf_id = libbpf_find_prog_btf_id(attach_func_name,
13463 attach_prog_fd);
13464 if (btf_id < 0)
13465 return libbpf_err(btf_id);
13466 } else {
13467 if (!attach_func_name)
13468 return libbpf_err(-EINVAL);
13469
13470 /* load btf_vmlinux, if not yet */
13471 err = bpf_object__load_vmlinux_btf(prog->obj, true);
13472 if (err)
13473 return libbpf_err(err);
13474 err = find_kernel_btf_id(prog->obj, attach_func_name,
13475 prog->expected_attach_type,
13476 &btf_obj_fd, &btf_id);
13477 if (err)
13478 return libbpf_err(err);
13479 }
13480
13481 prog->attach_btf_id = btf_id;
13482 prog->attach_btf_obj_fd = btf_obj_fd;
13483 prog->attach_prog_fd = attach_prog_fd;
13484 return 0;
13485 }
13486
parse_cpu_mask_str(const char * s,bool ** mask,int * mask_sz)13487 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13488 {
13489 int err = 0, n, len, start, end = -1;
13490 bool *tmp;
13491
13492 *mask = NULL;
13493 *mask_sz = 0;
13494
13495 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13496 while (*s) {
13497 if (*s == ',' || *s == '\n') {
13498 s++;
13499 continue;
13500 }
13501 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13502 if (n <= 0 || n > 2) {
13503 pr_warn("Failed to get CPU range %s: %d\n", s, n);
13504 err = -EINVAL;
13505 goto cleanup;
13506 } else if (n == 1) {
13507 end = start;
13508 }
13509 if (start < 0 || start > end) {
13510 pr_warn("Invalid CPU range [%d,%d] in %s\n",
13511 start, end, s);
13512 err = -EINVAL;
13513 goto cleanup;
13514 }
13515 tmp = realloc(*mask, end + 1);
13516 if (!tmp) {
13517 err = -ENOMEM;
13518 goto cleanup;
13519 }
13520 *mask = tmp;
13521 memset(tmp + *mask_sz, 0, start - *mask_sz);
13522 memset(tmp + start, 1, end - start + 1);
13523 *mask_sz = end + 1;
13524 s += len;
13525 }
13526 if (!*mask_sz) {
13527 pr_warn("Empty CPU range\n");
13528 return -EINVAL;
13529 }
13530 return 0;
13531 cleanup:
13532 free(*mask);
13533 *mask = NULL;
13534 return err;
13535 }
13536
parse_cpu_mask_file(const char * fcpu,bool ** mask,int * mask_sz)13537 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13538 {
13539 int fd, err = 0, len;
13540 char buf[128];
13541
13542 fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13543 if (fd < 0) {
13544 err = -errno;
13545 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13546 return err;
13547 }
13548 len = read(fd, buf, sizeof(buf));
13549 close(fd);
13550 if (len <= 0) {
13551 err = len ? -errno : -EINVAL;
13552 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13553 return err;
13554 }
13555 if (len >= sizeof(buf)) {
13556 pr_warn("CPU mask is too big in file %s\n", fcpu);
13557 return -E2BIG;
13558 }
13559 buf[len] = '\0';
13560
13561 return parse_cpu_mask_str(buf, mask, mask_sz);
13562 }
13563
libbpf_num_possible_cpus(void)13564 int libbpf_num_possible_cpus(void)
13565 {
13566 static const char *fcpu = "/sys/devices/system/cpu/possible";
13567 static int cpus;
13568 int err, n, i, tmp_cpus;
13569 bool *mask;
13570
13571 tmp_cpus = READ_ONCE(cpus);
13572 if (tmp_cpus > 0)
13573 return tmp_cpus;
13574
13575 err = parse_cpu_mask_file(fcpu, &mask, &n);
13576 if (err)
13577 return libbpf_err(err);
13578
13579 tmp_cpus = 0;
13580 for (i = 0; i < n; i++) {
13581 if (mask[i])
13582 tmp_cpus++;
13583 }
13584 free(mask);
13585
13586 WRITE_ONCE(cpus, tmp_cpus);
13587 return tmp_cpus;
13588 }
13589
populate_skeleton_maps(const struct bpf_object * obj,struct bpf_map_skeleton * maps,size_t map_cnt,size_t map_skel_sz)13590 static int populate_skeleton_maps(const struct bpf_object *obj,
13591 struct bpf_map_skeleton *maps,
13592 size_t map_cnt, size_t map_skel_sz)
13593 {
13594 int i;
13595
13596 for (i = 0; i < map_cnt; i++) {
13597 struct bpf_map_skeleton *map_skel = (void *)maps + i * map_skel_sz;
13598 struct bpf_map **map = map_skel->map;
13599 const char *name = map_skel->name;
13600 void **mmaped = map_skel->mmaped;
13601
13602 *map = bpf_object__find_map_by_name(obj, name);
13603 if (!*map) {
13604 pr_warn("failed to find skeleton map '%s'\n", name);
13605 return -ESRCH;
13606 }
13607
13608 /* externs shouldn't be pre-setup from user code */
13609 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13610 *mmaped = (*map)->mmaped;
13611 }
13612 return 0;
13613 }
13614
populate_skeleton_progs(const struct bpf_object * obj,struct bpf_prog_skeleton * progs,size_t prog_cnt,size_t prog_skel_sz)13615 static int populate_skeleton_progs(const struct bpf_object *obj,
13616 struct bpf_prog_skeleton *progs,
13617 size_t prog_cnt, size_t prog_skel_sz)
13618 {
13619 int i;
13620
13621 for (i = 0; i < prog_cnt; i++) {
13622 struct bpf_prog_skeleton *prog_skel = (void *)progs + i * prog_skel_sz;
13623 struct bpf_program **prog = prog_skel->prog;
13624 const char *name = prog_skel->name;
13625
13626 *prog = bpf_object__find_program_by_name(obj, name);
13627 if (!*prog) {
13628 pr_warn("failed to find skeleton program '%s'\n", name);
13629 return -ESRCH;
13630 }
13631 }
13632 return 0;
13633 }
13634
bpf_object__open_skeleton(struct bpf_object_skeleton * s,const struct bpf_object_open_opts * opts)13635 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13636 const struct bpf_object_open_opts *opts)
13637 {
13638 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
13639 .object_name = s->name,
13640 );
13641 struct bpf_object *obj;
13642 int err;
13643
13644 /* Attempt to preserve opts->object_name, unless overriden by user
13645 * explicitly. Overwriting object name for skeletons is discouraged,
13646 * as it breaks global data maps, because they contain object name
13647 * prefix as their own map name prefix. When skeleton is generated,
13648 * bpftool is making an assumption that this name will stay the same.
13649 */
13650 if (opts) {
13651 memcpy(&skel_opts, opts, sizeof(*opts));
13652 if (!opts->object_name)
13653 skel_opts.object_name = s->name;
13654 }
13655
13656 obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
13657 err = libbpf_get_error(obj);
13658 if (err) {
13659 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13660 s->name, err);
13661 return libbpf_err(err);
13662 }
13663
13664 *s->obj = obj;
13665 err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz);
13666 if (err) {
13667 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13668 return libbpf_err(err);
13669 }
13670
13671 err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz);
13672 if (err) {
13673 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13674 return libbpf_err(err);
13675 }
13676
13677 return 0;
13678 }
13679
bpf_object__open_subskeleton(struct bpf_object_subskeleton * s)13680 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13681 {
13682 int err, len, var_idx, i;
13683 const char *var_name;
13684 const struct bpf_map *map;
13685 struct btf *btf;
13686 __u32 map_type_id;
13687 const struct btf_type *map_type, *var_type;
13688 const struct bpf_var_skeleton *var_skel;
13689 struct btf_var_secinfo *var;
13690
13691 if (!s->obj)
13692 return libbpf_err(-EINVAL);
13693
13694 btf = bpf_object__btf(s->obj);
13695 if (!btf) {
13696 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13697 bpf_object__name(s->obj));
13698 return libbpf_err(-errno);
13699 }
13700
13701 err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz);
13702 if (err) {
13703 pr_warn("failed to populate subskeleton maps: %d\n", err);
13704 return libbpf_err(err);
13705 }
13706
13707 err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz);
13708 if (err) {
13709 pr_warn("failed to populate subskeleton maps: %d\n", err);
13710 return libbpf_err(err);
13711 }
13712
13713 for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13714 var_skel = (void *)s->vars + var_idx * s->var_skel_sz;
13715 map = *var_skel->map;
13716 map_type_id = bpf_map__btf_value_type_id(map);
13717 map_type = btf__type_by_id(btf, map_type_id);
13718
13719 if (!btf_is_datasec(map_type)) {
13720 pr_warn("type for map '%1$s' is not a datasec: %2$s",
13721 bpf_map__name(map),
13722 __btf_kind_str(btf_kind(map_type)));
13723 return libbpf_err(-EINVAL);
13724 }
13725
13726 len = btf_vlen(map_type);
13727 var = btf_var_secinfos(map_type);
13728 for (i = 0; i < len; i++, var++) {
13729 var_type = btf__type_by_id(btf, var->type);
13730 var_name = btf__name_by_offset(btf, var_type->name_off);
13731 if (strcmp(var_name, var_skel->name) == 0) {
13732 *var_skel->addr = map->mmaped + var->offset;
13733 break;
13734 }
13735 }
13736 }
13737 return 0;
13738 }
13739
bpf_object__destroy_subskeleton(struct bpf_object_subskeleton * s)13740 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13741 {
13742 if (!s)
13743 return;
13744 free(s->maps);
13745 free(s->progs);
13746 free(s->vars);
13747 free(s);
13748 }
13749
bpf_object__load_skeleton(struct bpf_object_skeleton * s)13750 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13751 {
13752 int i, err;
13753
13754 err = bpf_object__load(*s->obj);
13755 if (err) {
13756 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13757 return libbpf_err(err);
13758 }
13759
13760 for (i = 0; i < s->map_cnt; i++) {
13761 struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz;
13762 struct bpf_map *map = *map_skel->map;
13763 size_t mmap_sz = bpf_map_mmap_sz(map);
13764 int prot, map_fd = map->fd;
13765 void **mmaped = map_skel->mmaped;
13766
13767 if (!mmaped)
13768 continue;
13769
13770 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
13771 *mmaped = NULL;
13772 continue;
13773 }
13774
13775 if (map->def.type == BPF_MAP_TYPE_ARENA) {
13776 *mmaped = map->mmaped;
13777 continue;
13778 }
13779
13780 if (map->def.map_flags & BPF_F_RDONLY_PROG)
13781 prot = PROT_READ;
13782 else
13783 prot = PROT_READ | PROT_WRITE;
13784
13785 /* Remap anonymous mmap()-ed "map initialization image" as
13786 * a BPF map-backed mmap()-ed memory, but preserving the same
13787 * memory address. This will cause kernel to change process'
13788 * page table to point to a different piece of kernel memory,
13789 * but from userspace point of view memory address (and its
13790 * contents, being identical at this point) will stay the
13791 * same. This mapping will be released by bpf_object__close()
13792 * as per normal clean up procedure, so we don't need to worry
13793 * about it from skeleton's clean up perspective.
13794 */
13795 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
13796 if (*mmaped == MAP_FAILED) {
13797 err = -errno;
13798 *mmaped = NULL;
13799 pr_warn("failed to re-mmap() map '%s': %d\n",
13800 bpf_map__name(map), err);
13801 return libbpf_err(err);
13802 }
13803 }
13804
13805 return 0;
13806 }
13807
bpf_object__attach_skeleton(struct bpf_object_skeleton * s)13808 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13809 {
13810 int i, err;
13811
13812 for (i = 0; i < s->prog_cnt; i++) {
13813 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
13814 struct bpf_program *prog = *prog_skel->prog;
13815 struct bpf_link **link = prog_skel->link;
13816
13817 if (!prog->autoload || !prog->autoattach)
13818 continue;
13819
13820 /* auto-attaching not supported for this program */
13821 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13822 continue;
13823
13824 /* if user already set the link manually, don't attempt auto-attach */
13825 if (*link)
13826 continue;
13827
13828 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13829 if (err) {
13830 pr_warn("prog '%s': failed to auto-attach: %d\n",
13831 bpf_program__name(prog), err);
13832 return libbpf_err(err);
13833 }
13834
13835 /* It's possible that for some SEC() definitions auto-attach
13836 * is supported in some cases (e.g., if definition completely
13837 * specifies target information), but is not in other cases.
13838 * SEC("uprobe") is one such case. If user specified target
13839 * binary and function name, such BPF program can be
13840 * auto-attached. But if not, it shouldn't trigger skeleton's
13841 * attach to fail. It should just be skipped.
13842 * attach_fn signals such case with returning 0 (no error) and
13843 * setting link to NULL.
13844 */
13845 }
13846
13847 return 0;
13848 }
13849
bpf_object__detach_skeleton(struct bpf_object_skeleton * s)13850 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
13851 {
13852 int i;
13853
13854 for (i = 0; i < s->prog_cnt; i++) {
13855 struct bpf_prog_skeleton *prog_skel = (void *)s->progs + i * s->prog_skel_sz;
13856 struct bpf_link **link = prog_skel->link;
13857
13858 bpf_link__destroy(*link);
13859 *link = NULL;
13860 }
13861 }
13862
bpf_object__destroy_skeleton(struct bpf_object_skeleton * s)13863 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
13864 {
13865 if (!s)
13866 return;
13867
13868 if (s->progs)
13869 bpf_object__detach_skeleton(s);
13870 if (s->obj)
13871 bpf_object__close(*s->obj);
13872 free(s->maps);
13873 free(s->progs);
13874 free(s);
13875 }
13876