1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
3
4 #include <argp.h>
5
6 #include <sys/prctl.h>
7 #include "local_storage_rcu_tasks_trace_bench.skel.h"
8 #include "bench.h"
9
10 #include <signal.h>
11
12 static struct {
13 __u32 nr_procs;
14 __u32 kthread_pid;
15 bool quiet;
16 } args = {
17 .nr_procs = 1000,
18 .kthread_pid = 0,
19 .quiet = false,
20 };
21
22 enum {
23 ARG_NR_PROCS = 7000,
24 ARG_KTHREAD_PID = 7001,
25 ARG_QUIET = 7002,
26 };
27
28 static const struct argp_option opts[] = {
29 { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
30 "Set number of user processes to spin up"},
31 { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
32 "Pid of rcu_tasks_trace kthread for ticks tracking"},
33 { "quiet", ARG_QUIET, "{0,1}", 0,
34 "If true, don't report progress"},
35 {},
36 };
37
parse_arg(int key,char * arg,struct argp_state * state)38 static error_t parse_arg(int key, char *arg, struct argp_state *state)
39 {
40 long ret;
41
42 switch (key) {
43 case ARG_NR_PROCS:
44 ret = strtol(arg, NULL, 10);
45 if (ret < 1 || ret > UINT_MAX) {
46 fprintf(stderr, "invalid nr_procs\n");
47 argp_usage(state);
48 }
49 args.nr_procs = ret;
50 break;
51 case ARG_KTHREAD_PID:
52 ret = strtol(arg, NULL, 10);
53 if (ret < 1) {
54 fprintf(stderr, "invalid kthread_pid\n");
55 argp_usage(state);
56 }
57 args.kthread_pid = ret;
58 break;
59 case ARG_QUIET:
60 ret = strtol(arg, NULL, 10);
61 if (ret < 0 || ret > 1) {
62 fprintf(stderr, "invalid quiet %ld\n", ret);
63 argp_usage(state);
64 }
65 args.quiet = ret;
66 break;
67 break;
68 default:
69 return ARGP_ERR_UNKNOWN;
70 }
71
72 return 0;
73 }
74
75 const struct argp bench_local_storage_rcu_tasks_trace_argp = {
76 .options = opts,
77 .parser = parse_arg,
78 };
79
80 #define MAX_SLEEP_PROCS 150000
81
validate(void)82 static void validate(void)
83 {
84 if (env.producer_cnt != 1) {
85 fprintf(stderr, "benchmark doesn't support multi-producer!\n");
86 exit(1);
87 }
88 if (env.consumer_cnt != 1) {
89 fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
90 exit(1);
91 }
92
93 if (args.nr_procs > MAX_SLEEP_PROCS) {
94 fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
95 MAX_SLEEP_PROCS);
96 exit(1);
97 }
98 }
99
kthread_pid_ticks(void)100 static long kthread_pid_ticks(void)
101 {
102 char procfs_path[100];
103 long stime;
104 FILE *f;
105
106 if (!args.kthread_pid)
107 return -1;
108
109 sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
110 f = fopen(procfs_path, "r");
111 if (!f) {
112 fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
113 goto err_out;
114 }
115 if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
116 fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
117 goto err_out;
118 }
119 fclose(f);
120 return stime;
121
122 err_out:
123 if (f)
124 fclose(f);
125 exit(1);
126 return 0;
127 }
128
129 static struct {
130 struct local_storage_rcu_tasks_trace_bench *skel;
131 long prev_kthread_stime;
132 } ctx;
133
sleep_and_loop(void)134 static void sleep_and_loop(void)
135 {
136 while (true) {
137 sleep(rand() % 4);
138 syscall(__NR_getpgid);
139 }
140 }
141
local_storage_tasks_trace_setup(void)142 static void local_storage_tasks_trace_setup(void)
143 {
144 int i, err, forkret, runner_pid;
145
146 runner_pid = getpid();
147
148 for (i = 0; i < args.nr_procs; i++) {
149 forkret = fork();
150 if (forkret < 0) {
151 fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
152 args.nr_procs);
153 goto err_out;
154 }
155
156 if (!forkret) {
157 err = prctl(PR_SET_PDEATHSIG, SIGKILL);
158 if (err < 0) {
159 fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
160 goto err_out;
161 }
162
163 if (getppid() != runner_pid) {
164 fprintf(stderr, "Runner died while spinning up procs, exiting\n");
165 goto err_out;
166 }
167 sleep_and_loop();
168 }
169 }
170 printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
171
172 setup_libbpf();
173
174 ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
175 if (!ctx.skel) {
176 fprintf(stderr, "Error doing open_and_load, exiting\n");
177 goto err_out;
178 }
179
180 ctx.prev_kthread_stime = kthread_pid_ticks();
181
182 if (!bpf_program__attach(ctx.skel->progs.get_local)) {
183 fprintf(stderr, "Error attaching bpf program\n");
184 goto err_out;
185 }
186
187 if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
188 fprintf(stderr, "Error attaching bpf program\n");
189 goto err_out;
190 }
191
192 if (!bpf_program__attach(ctx.skel->progs.postgp)) {
193 fprintf(stderr, "Error attaching bpf program\n");
194 goto err_out;
195 }
196
197 return;
198 err_out:
199 exit(1);
200 }
201
measure(struct bench_res * res)202 static void measure(struct bench_res *res)
203 {
204 long ticks;
205
206 res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
207 res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
208 ticks = kthread_pid_ticks();
209 res->stime = ticks - ctx.prev_kthread_stime;
210 ctx.prev_kthread_stime = ticks;
211 }
212
consumer(void * input)213 static void *consumer(void *input)
214 {
215 return NULL;
216 }
217
producer(void * input)218 static void *producer(void *input)
219 {
220 while (true)
221 syscall(__NR_getpgid);
222 return NULL;
223 }
224
report_progress(int iter,struct bench_res * res,long delta_ns)225 static void report_progress(int iter, struct bench_res *res, long delta_ns)
226 {
227 if (ctx.skel->bss->unexpected) {
228 fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
229 fprintf(stderr, "Data can't be trusted, exiting\n");
230 exit(1);
231 }
232
233 if (args.quiet)
234 return;
235
236 printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
237 iter, res->gp_ns / (double)res->gp_ct);
238 printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
239 iter, res->stime / (double)res->gp_ct);
240 }
241
report_final(struct bench_res res[],int res_cnt)242 static void report_final(struct bench_res res[], int res_cnt)
243 {
244 struct basic_stats gp_stat;
245
246 grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
247 printf("SUMMARY tasks_trace grace period latency");
248 printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
249 grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
250 printf("SUMMARY ticks per tasks_trace grace period");
251 printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
252 }
253
254 /* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
255 * of RCU Tasks-Trace.
256 *
257 * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
258 * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
259 * The number of forked tasks is configurable.
260 *
261 * exercising code paths which call call_rcu_tasks_trace while there are many
262 * thousands of tasks on the system should result in RCU Tasks-Trace having to
263 * do a noticeable amount of work.
264 *
265 * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
266 * after the grace period has ended, or by measuring grace period latency.
267 *
268 * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
269 * and rcu_tasks_trace_postgp functions to measure grace period latency and
270 * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
271 */
272 const struct bench bench_local_storage_tasks_trace = {
273 .name = "local-storage-tasks-trace",
274 .validate = validate,
275 .setup = local_storage_tasks_trace_setup,
276 .producer_thread = producer,
277 .consumer_thread = consumer,
278 .measure = measure,
279 .report_progress = report_progress,
280 .report_final = report_final,
281 };
282