1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2021 Facebook */
3 
4 #define _GNU_SOURCE         /* See feature_test_macros(7) */
5 #include <unistd.h>
6 #include <sched.h>
7 #include <pthread.h>
8 #include <sys/syscall.h>   /* For SYS_xxx definitions */
9 #include <sys/types.h>
10 #include <sys/eventfd.h>
11 #include <sys/mman.h>
12 #include <test_progs.h>
13 #include <bpf/btf.h>
14 #include "task_local_storage_helpers.h"
15 #include "task_local_storage.skel.h"
16 #include "task_local_storage_exit_creds.skel.h"
17 #include "task_ls_recursion.skel.h"
18 #include "task_storage_nodeadlock.skel.h"
19 #include "uptr_test_common.h"
20 #include "task_ls_uptr.skel.h"
21 #include "uptr_update_failure.skel.h"
22 #include "uptr_failure.skel.h"
23 #include "uptr_map_failure.skel.h"
24 
test_sys_enter_exit(void)25 static void test_sys_enter_exit(void)
26 {
27 	struct task_local_storage *skel;
28 	int err;
29 
30 	skel = task_local_storage__open_and_load();
31 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
32 		return;
33 
34 	skel->bss->target_pid = sys_gettid();
35 
36 	err = task_local_storage__attach(skel);
37 	if (!ASSERT_OK(err, "skel_attach"))
38 		goto out;
39 
40 	sys_gettid();
41 	sys_gettid();
42 
43 	/* 3x syscalls: 1x attach and 2x gettid */
44 	ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
45 	ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
46 	ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
47 out:
48 	task_local_storage__destroy(skel);
49 }
50 
test_exit_creds(void)51 static void test_exit_creds(void)
52 {
53 	struct task_local_storage_exit_creds *skel;
54 	int err, run_count, sync_rcu_calls = 0;
55 	const int MAX_SYNC_RCU_CALLS = 1000;
56 
57 	skel = task_local_storage_exit_creds__open_and_load();
58 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
59 		return;
60 
61 	err = task_local_storage_exit_creds__attach(skel);
62 	if (!ASSERT_OK(err, "skel_attach"))
63 		goto out;
64 
65 	/* trigger at least one exit_creds() */
66 	if (CHECK_FAIL(system("ls > /dev/null")))
67 		goto out;
68 
69 	/* kern_sync_rcu is not enough on its own as the read section we want
70 	 * to wait for may start after we enter synchronize_rcu, so our call
71 	 * won't wait for the section to finish. Loop on the run counter
72 	 * as well to ensure the program has run.
73 	 */
74 	do {
75 		kern_sync_rcu();
76 		run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
77 	} while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
78 
79 	ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
80 		   "sync_rcu count too high");
81 	ASSERT_NEQ(run_count, 0, "run_count");
82 	ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
83 	ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
84 out:
85 	task_local_storage_exit_creds__destroy(skel);
86 }
87 
test_recursion(void)88 static void test_recursion(void)
89 {
90 	int err, map_fd, prog_fd, task_fd;
91 	struct task_ls_recursion *skel;
92 	struct bpf_prog_info info;
93 	__u32 info_len = sizeof(info);
94 	long value;
95 
96 	task_fd = sys_pidfd_open(getpid(), 0);
97 	if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
98 		return;
99 
100 	skel = task_ls_recursion__open_and_load();
101 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
102 		goto out;
103 
104 	err = task_ls_recursion__attach(skel);
105 	if (!ASSERT_OK(err, "skel_attach"))
106 		goto out;
107 
108 	/* trigger sys_enter, make sure it does not cause deadlock */
109 	skel->bss->test_pid = getpid();
110 	sys_gettid();
111 	skel->bss->test_pid = 0;
112 	task_ls_recursion__detach(skel);
113 
114 	/* Refer to the comment in BPF_PROG(on_update) for
115 	 * the explanation on the value 201 and 100.
116 	 */
117 	map_fd = bpf_map__fd(skel->maps.map_a);
118 	err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
119 	ASSERT_OK(err, "lookup map_a");
120 	ASSERT_EQ(value, 201, "map_a value");
121 	ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
122 
123 	map_fd = bpf_map__fd(skel->maps.map_b);
124 	err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
125 	ASSERT_OK(err, "lookup map_b");
126 	ASSERT_EQ(value, 100, "map_b value");
127 
128 	prog_fd = bpf_program__fd(skel->progs.on_update);
129 	memset(&info, 0, sizeof(info));
130 	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
131 	ASSERT_OK(err, "get prog info");
132 	ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
133 
134 	prog_fd = bpf_program__fd(skel->progs.on_enter);
135 	memset(&info, 0, sizeof(info));
136 	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
137 	ASSERT_OK(err, "get prog info");
138 	ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
139 
140 out:
141 	close(task_fd);
142 	task_ls_recursion__destroy(skel);
143 }
144 
145 static bool stop;
146 
waitall(const pthread_t * tids,int nr)147 static void waitall(const pthread_t *tids, int nr)
148 {
149 	int i;
150 
151 	stop = true;
152 	for (i = 0; i < nr; i++)
153 		pthread_join(tids[i], NULL);
154 }
155 
sock_create_loop(void * arg)156 static void *sock_create_loop(void *arg)
157 {
158 	struct task_storage_nodeadlock *skel = arg;
159 	int fd;
160 
161 	while (!stop) {
162 		fd = socket(AF_INET, SOCK_STREAM, 0);
163 		close(fd);
164 		if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
165 			stop = true;
166 	}
167 
168 	return NULL;
169 }
170 
test_nodeadlock(void)171 static void test_nodeadlock(void)
172 {
173 	struct task_storage_nodeadlock *skel;
174 	struct bpf_prog_info info = {};
175 	__u32 info_len = sizeof(info);
176 	const int nr_threads = 32;
177 	pthread_t tids[nr_threads];
178 	int i, prog_fd, err;
179 	cpu_set_t old, new;
180 
181 	/* Pin all threads to one cpu to increase the chance of preemption
182 	 * in a sleepable bpf prog.
183 	 */
184 	CPU_ZERO(&new);
185 	CPU_SET(0, &new);
186 	err = sched_getaffinity(getpid(), sizeof(old), &old);
187 	if (!ASSERT_OK(err, "getaffinity"))
188 		return;
189 	err = sched_setaffinity(getpid(), sizeof(new), &new);
190 	if (!ASSERT_OK(err, "setaffinity"))
191 		return;
192 
193 	skel = task_storage_nodeadlock__open_and_load();
194 	if (!ASSERT_OK_PTR(skel, "open_and_load"))
195 		goto done;
196 
197 	/* Unnecessary recursion and deadlock detection are reproducible
198 	 * in the preemptible kernel.
199 	 */
200 	if (!skel->kconfig->CONFIG_PREEMPTION) {
201 		test__skip();
202 		goto done;
203 	}
204 
205 	err = task_storage_nodeadlock__attach(skel);
206 	ASSERT_OK(err, "attach prog");
207 
208 	for (i = 0; i < nr_threads; i++) {
209 		err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
210 		if (err) {
211 			/* Only assert once here to avoid excessive
212 			 * PASS printing during test failure.
213 			 */
214 			ASSERT_OK(err, "pthread_create");
215 			waitall(tids, i);
216 			goto done;
217 		}
218 	}
219 
220 	/* With 32 threads, 1s is enough to reproduce the issue */
221 	sleep(1);
222 	waitall(tids, nr_threads);
223 
224 	info_len = sizeof(info);
225 	prog_fd = bpf_program__fd(skel->progs.socket_post_create);
226 	err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
227 	ASSERT_OK(err, "get prog info");
228 	ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
229 
230 	ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
231 	ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
232 
233 done:
234 	task_storage_nodeadlock__destroy(skel);
235 	sched_setaffinity(getpid(), sizeof(old), &old);
236 }
237 
238 static struct user_data udata __attribute__((aligned(16))) = {
239 	.a = 1,
240 	.b = 2,
241 };
242 
243 static struct user_data udata2 __attribute__((aligned(16))) = {
244 	.a = 3,
245 	.b = 4,
246 };
247 
check_udata2(int expected)248 static void check_udata2(int expected)
249 {
250 	udata2.result = udata2.nested_result = 0;
251 	usleep(1);
252 	ASSERT_EQ(udata2.result, expected, "udata2.result");
253 	ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result");
254 }
255 
test_uptr_basic(void)256 static void test_uptr_basic(void)
257 {
258 	int map_fd, parent_task_fd, ev_fd;
259 	struct value_type value = {};
260 	struct task_ls_uptr *skel;
261 	pid_t child_pid, my_tid;
262 	__u64 ev_dummy_data = 1;
263 	int err;
264 
265 	my_tid = sys_gettid();
266 	parent_task_fd = sys_pidfd_open(my_tid, 0);
267 	if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd"))
268 		return;
269 
270 	ev_fd = eventfd(0, 0);
271 	if (!ASSERT_OK_FD(ev_fd, "ev_fd")) {
272 		close(parent_task_fd);
273 		return;
274 	}
275 
276 	skel = task_ls_uptr__open_and_load();
277 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
278 		goto out;
279 
280 	map_fd = bpf_map__fd(skel->maps.datamap);
281 	value.udata = &udata;
282 	value.nested.udata = &udata;
283 	err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
284 	if (!ASSERT_OK(err, "update_elem(udata)"))
285 		goto out;
286 
287 	err = task_ls_uptr__attach(skel);
288 	if (!ASSERT_OK(err, "skel_attach"))
289 		goto out;
290 
291 	child_pid = fork();
292 	if (!ASSERT_NEQ(child_pid, -1, "fork"))
293 		goto out;
294 
295 	/* Call syscall in the child process, but access the map value of
296 	 * the parent process in the BPF program to check if the user kptr
297 	 * is translated/mapped correctly.
298 	 */
299 	if (child_pid == 0) {
300 		/* child */
301 
302 		/* Overwrite the user_data in the child process to check if
303 		 * the BPF program accesses the user_data of the parent.
304 		 */
305 		udata.a = 0;
306 		udata.b = 0;
307 
308 		/* Wait for the parent to set child_pid */
309 		read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
310 		exit(0);
311 	}
312 
313 	skel->bss->parent_pid = my_tid;
314 	skel->bss->target_pid = child_pid;
315 
316 	write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data));
317 
318 	err = waitpid(child_pid, NULL, 0);
319 	ASSERT_EQ(err, child_pid, "waitpid");
320 	ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result");
321 	ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result");
322 
323 	skel->bss->target_pid = my_tid;
324 
325 	/* update_elem: uptr changes from udata1 to udata2 */
326 	value.udata = &udata2;
327 	value.nested.udata = &udata2;
328 	err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
329 	if (!ASSERT_OK(err, "update_elem(udata2)"))
330 		goto out;
331 	check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
332 
333 	/* update_elem: uptr changes from udata2 uptr to NULL */
334 	memset(&value, 0, sizeof(value));
335 	err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
336 	if (!ASSERT_OK(err, "update_elem(udata2)"))
337 		goto out;
338 	check_udata2(0);
339 
340 	/* update_elem: uptr changes from NULL to udata2 */
341 	value.udata = &udata2;
342 	value.nested.udata = &udata2;
343 	err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST);
344 	if (!ASSERT_OK(err, "update_elem(udata2)"))
345 		goto out;
346 	check_udata2(MAGIC_VALUE + udata2.a + udata2.b);
347 
348 	/* Check if user programs can access the value of user kptrs
349 	 * through bpf_map_lookup_elem(). Make sure the kernel value is not
350 	 * leaked.
351 	 */
352 	err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value);
353 	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
354 		goto out;
355 	ASSERT_EQ(value.udata, NULL, "value.udata");
356 	ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata");
357 
358 	/* delete_elem */
359 	err = bpf_map_delete_elem(map_fd, &parent_task_fd);
360 	ASSERT_OK(err, "delete_elem(udata2)");
361 	check_udata2(0);
362 
363 	/* update_elem: add uptr back to test map_free */
364 	value.udata = &udata2;
365 	value.nested.udata = &udata2;
366 	err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST);
367 	ASSERT_OK(err, "update_elem(udata2)");
368 
369 out:
370 	task_ls_uptr__destroy(skel);
371 	close(ev_fd);
372 	close(parent_task_fd);
373 }
374 
test_uptr_across_pages(void)375 static void test_uptr_across_pages(void)
376 {
377 	int page_size = getpagesize();
378 	struct value_type value = {};
379 	struct task_ls_uptr *skel;
380 	int err, task_fd, map_fd;
381 	void *mem;
382 
383 	task_fd = sys_pidfd_open(getpid(), 0);
384 	if (!ASSERT_OK_FD(task_fd, "task_fd"))
385 		return;
386 
387 	mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
388 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
389 	if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) {
390 		close(task_fd);
391 		return;
392 	}
393 
394 	skel = task_ls_uptr__open_and_load();
395 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
396 		goto out;
397 
398 	map_fd = bpf_map__fd(skel->maps.datamap);
399 	value.udata = mem + page_size - offsetof(struct user_data, b);
400 	err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
401 	if (!ASSERT_ERR(err, "update_elem(udata)"))
402 		goto out;
403 	ASSERT_EQ(errno, EOPNOTSUPP, "errno");
404 
405 	value.udata = mem + page_size - sizeof(struct user_data);
406 	err = bpf_map_update_elem(map_fd, &task_fd, &value, 0);
407 	ASSERT_OK(err, "update_elem(udata)");
408 
409 out:
410 	task_ls_uptr__destroy(skel);
411 	close(task_fd);
412 	munmap(mem, page_size * 2);
413 }
414 
test_uptr_update_failure(void)415 static void test_uptr_update_failure(void)
416 {
417 	struct value_lock_type value = {};
418 	struct uptr_update_failure *skel;
419 	int err, task_fd, map_fd;
420 
421 	task_fd = sys_pidfd_open(getpid(), 0);
422 	if (!ASSERT_OK_FD(task_fd, "task_fd"))
423 		return;
424 
425 	skel = uptr_update_failure__open_and_load();
426 	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
427 		goto out;
428 
429 	map_fd = bpf_map__fd(skel->maps.datamap);
430 
431 	value.udata = &udata;
432 	err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK);
433 	if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)"))
434 		goto out;
435 	ASSERT_EQ(errno, EOPNOTSUPP, "errno");
436 
437 	err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST);
438 	if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)"))
439 		goto out;
440 	ASSERT_EQ(errno, ENOENT, "errno");
441 
442 	err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
443 	if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)"))
444 		goto out;
445 
446 	value.udata = &udata2;
447 	err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST);
448 	if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)"))
449 		goto out;
450 	ASSERT_EQ(errno, EEXIST, "errno");
451 
452 out:
453 	uptr_update_failure__destroy(skel);
454 	close(task_fd);
455 }
456 
test_uptr_map_failure(const char * map_name,int expected_errno)457 static void test_uptr_map_failure(const char *map_name, int expected_errno)
458 {
459 	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
460 	struct uptr_map_failure *skel;
461 	struct bpf_map *map;
462 	struct btf *btf;
463 	int map_fd, err;
464 
465 	skel = uptr_map_failure__open();
466 	if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open"))
467 		return;
468 
469 	map = bpf_object__find_map_by_name(skel->obj, map_name);
470 	btf = bpf_object__btf(skel->obj);
471 	err = btf__load_into_kernel(btf);
472 	if (!ASSERT_OK(err, "btf__load_into_kernel"))
473 		goto done;
474 
475 	create_attr.map_flags = bpf_map__map_flags(map);
476 	create_attr.btf_fd = btf__fd(btf);
477 	create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map);
478 	create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map);
479 	map_fd = bpf_map_create(bpf_map__type(map), map_name,
480 				bpf_map__key_size(map), bpf_map__value_size(map),
481 				0, &create_attr);
482 	if (ASSERT_ERR_FD(map_fd, "map_create"))
483 		ASSERT_EQ(errno, expected_errno, "errno");
484 	else
485 		close(map_fd);
486 
487 done:
488 	uptr_map_failure__destroy(skel);
489 }
490 
test_task_local_storage(void)491 void test_task_local_storage(void)
492 {
493 	if (test__start_subtest("sys_enter_exit"))
494 		test_sys_enter_exit();
495 	if (test__start_subtest("exit_creds"))
496 		test_exit_creds();
497 	if (test__start_subtest("recursion"))
498 		test_recursion();
499 	if (test__start_subtest("nodeadlock"))
500 		test_nodeadlock();
501 	if (test__start_subtest("uptr_basic"))
502 		test_uptr_basic();
503 	if (test__start_subtest("uptr_across_pages"))
504 		test_uptr_across_pages();
505 	if (test__start_subtest("uptr_update_failure"))
506 		test_uptr_update_failure();
507 	if (test__start_subtest("uptr_map_failure_e2big")) {
508 		if (getpagesize() == PAGE_SIZE)
509 			test_uptr_map_failure("large_uptr_map", E2BIG);
510 		else
511 			test__skip();
512 	}
513 	if (test__start_subtest("uptr_map_failure_size0"))
514 		test_uptr_map_failure("empty_uptr_map", EINVAL);
515 	if (test__start_subtest("uptr_map_failure_kstruct"))
516 		test_uptr_map_failure("kstruct_uptr_map", EINVAL);
517 	RUN_TESTS(uptr_failure);
518 }
519