xref: /aosp_15_r20/external/ltp/testcases/kernel/syscalls/perf_event_open/perf_event_open02.c (revision 49cdfc7efb34551c7342be41a7384b9c40d7cab7)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2009 Paul Mackerras <[email protected]>
4  * Copyright (c) 2014-2022 Linux Test Project
5  */
6 /*
7  * Here's a little test program that checks whether software counters
8  * (specifically, the task clock counter) work correctly when they're in
9  * a group with hardware counters.
10  *
11  * What it does is to create several groups, each with one hardware
12  * counter, counting instructions, plus a task clock counter.  It needs
13  * to know an upper bound N on the number of hardware counters you have
14  * (N defaults to 8), and it creates N+4 groups to force them to be
15  * multiplexed.  It also creates an overall task clock counter.
16  *
17  * Then it spins for a while, and then stops all the counters and reads
18  * them.  It takes the total of the task clock counters in the groups and
19  * computes the ratio of that total to the overall execution time from
20  * the overall task clock counter.
21  *
22  * That ratio should be equal to the number of actual hardware counters
23  * that can count instructions.  If the task clock counters in the groups
24  * don't stop when their group gets taken off the PMU, the ratio will
25  * instead be close to N+4.  The program will declare that the test fails
26  * if the ratio is greater than N (actually, N + 0.005 to allow for FP
27  * rounding errors and RT throttling overhead).
28  */
29 
30 #define _GNU_SOURCE
31 #include <errno.h>
32 #include <sched.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/prctl.h>
40 #include <sys/time.h>
41 #include <sys/types.h>
42 
43 #include "config.h"
44 #include "tst_test.h"
45 #include "lapi/cpuset.h"
46 #include "lapi/syscalls.h"
47 
48 #include "perf_event_open.h"
49 
50 #define MAX_CTRS	1000
51 
52 struct read_format {
53 	unsigned long long value;
54 	/* if PERF_FORMAT_TOTAL_TIME_ENABLED */
55 	unsigned long long time_enabled;
56 	/* if PERF_FORMAT_TOTAL_TIME_RUNNING */
57 	unsigned long long time_running;
58 };
59 
60 static char *verbose;
61 
62 static int ntotal, nhw;
63 static int tsk0 = -1, hwfd[MAX_CTRS], tskfd[MAX_CTRS];
64 static int volatile work_done;
65 static unsigned int est_loops;
66 
all_counters_set(int state)67 static void all_counters_set(int state)
68 {
69 	if (prctl(state) == -1)
70 		tst_brk(TBROK | TERRNO, "prctl(%d) failed", state);
71 }
72 
alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)73 static void alarm_handler(int sig LTP_ATTRIBUTE_UNUSED)
74 {
75 	work_done = 1;
76 }
77 
bench_work(int time_ms)78 static void bench_work(int time_ms)
79 {
80 	unsigned int i;
81 	struct itimerval val;
82 	struct sigaction sa;
83 
84 	memset(&sa, 0, sizeof(sa));
85 	sa.sa_handler = alarm_handler;
86 	sa.sa_flags = SA_RESETHAND;
87 	SAFE_SIGACTION(SIGALRM, &sa, NULL);
88 
89 	work_done = 0;
90 	memset(&val, 0, sizeof(val));
91 	val.it_value.tv_sec = time_ms / 1000;
92 	val.it_value.tv_usec = (time_ms % 1000) * 1000;
93 
94 	if (setitimer(ITIMER_REAL, &val, NULL))
95 		tst_brk(TBROK | TERRNO, "setitimer");
96 
97 	while (!work_done) {
98 		for (i = 0; i < 100000; ++i)
99 			asm volatile (""::"g" (i));
100 		est_loops++;
101 	}
102 
103 	tst_res(TINFO, "bench_work estimated loops = %u in %d ms", est_loops, time_ms);
104 }
105 
do_work(int mult)106 static void do_work(int mult)
107 {
108 	unsigned long i, j, loops = mult * est_loops;
109 
110 	for (j = 0; j < loops; j++)
111 		for (i = 0; i < 100000; i++)
112 			asm volatile (""::"g" (i));
113 }
114 
115 #ifndef __s390__
count_hardware_counters(void)116 static int count_hardware_counters(void)
117 {
118 	struct perf_event_attr hw_event;
119 	int i, hwctrs = 0;
120 	int fdarry[MAX_CTRS];
121 	struct read_format buf, buf2, diff;
122 
123 	memset(&hw_event, 0, sizeof(struct perf_event_attr));
124 
125 	hw_event.type = PERF_TYPE_HARDWARE;
126 	hw_event.size = sizeof(struct perf_event_attr);
127 	hw_event.disabled = 1;
128 	hw_event.config =  PERF_COUNT_HW_INSTRUCTIONS;
129 	hw_event.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
130 		PERF_FORMAT_TOTAL_TIME_RUNNING;
131 
132 	for (i = 0; i < MAX_CTRS; i++) {
133 		fdarry[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
134 
135 		all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
136 		do_work(1);
137 		if (read(fdarry[i], &buf, sizeof(buf)) != sizeof(buf))
138 			tst_brk(TBROK | TERRNO, "error reading counter(s) #1");
139 		do_work(1);
140 		all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
141 		if (read(fdarry[i], &buf2, sizeof(buf2)) != sizeof(buf2))
142 			tst_brk(TBROK | TERRNO, "error reading counter(s) #2");
143 
144 		diff.value = buf2.value - buf.value;
145 		diff.time_enabled = buf2.time_enabled - buf.time_enabled;
146 		diff.time_running = buf2.time_running - buf.time_running;
147 
148 		tst_res(TINFO, "[%d] value:%lld time_enabled:%lld "
149 		       "time_running:%lld", i, diff.value,
150 		       diff.time_enabled, diff.time_running);
151 
152 		/*
153 		 * Normally time_enabled and time_running are the same value.
154 		 * But if more events are started than available counter slots
155 		 * on the PMU, then multiplexing happens and events run only
156 		 * part of the time. Time_enabled and time_running's values
157 		 * will be different. In this case the time_enabled and time_
158 		 * running values can be used to scale an estimated value for
159 		 * the count. So if buf.time_enabled and buf.time_running are
160 		 * not equal, we can think that PMU hardware counters
161 		 * multiplexing happens and the number of the opened events
162 		 * are the number of max available hardware counters.
163 		 */
164 		if (diff.time_enabled != diff.time_running) {
165 			hwctrs = i;
166 			break;
167 		}
168 	}
169 
170 	for (i = 0; i <= hwctrs; i++)
171 		SAFE_CLOSE(fdarry[i]);
172 
173 	return hwctrs;
174 }
175 #endif /* __s390__ */
176 
bind_to_current_cpu(void)177 static void bind_to_current_cpu(void)
178 {
179 #ifdef HAVE_SCHED_GETCPU
180 	int cpu = sched_getcpu();
181 	size_t mask_size;
182 	cpu_set_t *mask;
183 
184 	if (cpu == -1)
185 		tst_brk(TBROK | TERRNO, "sched_getcpu() failed");
186 
187 	mask = CPU_ALLOC(cpu + 1);
188 	mask_size = CPU_ALLOC_SIZE(cpu + 1);
189 	CPU_ZERO_S(mask_size, mask);
190 	CPU_SET(cpu, mask);
191 	if (sched_setaffinity(0, mask_size, mask) == -1)
192 		tst_brk(TBROK | TERRNO, "sched_setaffinity() failed");
193 	CPU_FREE(mask);
194 #endif
195 }
196 
setup(void)197 static void setup(void)
198 {
199 	int i;
200 	struct perf_event_attr tsk_event, hw_event;
201 
202 	for (i = 0; i < MAX_CTRS; i++) {
203 		hwfd[i] = -1;
204 		tskfd[i] = -1;
205 	}
206 
207 	bench_work(500);
208 
209 	/*
210 	 * According to perf_event_open's manpage, the official way of
211 	 * knowing if perf_event_open() support is enabled is checking for
212 	 * the existence of the file /proc/sys/kernel/perf_event_paranoid.
213 	 */
214 	if (access("/proc/sys/kernel/perf_event_paranoid", F_OK) == -1)
215 		tst_brk(TCONF, "Kernel doesn't have perf_event support");
216 
217 	bind_to_current_cpu();
218 #ifdef __s390__
219 	/*
220 	 * On s390 the "time_enabled" and "time_running" values are always the
221 	 * same, therefore count_hardware_counters() does not work.
222 	 *
223 	 * There are distinct/dedicated counters that can be used independently.
224 	 * Use the dedicated counter for instructions here.
225 	 */
226 	ntotal = nhw = 1;
227 #else
228 	nhw = count_hardware_counters();
229 	ntotal = nhw + 4;
230 #endif
231 
232 	memset(&hw_event, 0, sizeof(struct perf_event_attr));
233 	memset(&tsk_event, 0, sizeof(struct perf_event_attr));
234 
235 	tsk_event.type =  PERF_TYPE_SOFTWARE;
236 	tsk_event.size = sizeof(struct perf_event_attr);
237 	tsk_event.disabled = 1;
238 	tsk_event.config = PERF_COUNT_SW_TASK_CLOCK;
239 
240 	hw_event.type = PERF_TYPE_HARDWARE;
241 	hw_event.size = sizeof(struct perf_event_attr);
242 	hw_event.disabled = 1;
243 	hw_event.config =  PERF_COUNT_HW_INSTRUCTIONS;
244 
245 	tsk0 = perf_event_open(&tsk_event, 0, -1, -1, 0);
246 	tsk_event.disabled = 0;
247 	for (i = 0; i < ntotal; ++i) {
248 		hwfd[i] = perf_event_open(&hw_event, 0, -1, -1, 0);
249 		tskfd[i] = perf_event_open(&tsk_event, 0, -1, hwfd[i], 0);
250 	}
251 }
252 
cleanup(void)253 static void cleanup(void)
254 {
255 	int i;
256 
257 	for (i = 0; i < ntotal; i++) {
258 		if (hwfd[i] != -1)
259 			SAFE_CLOSE(hwfd[i]);
260 		if (tskfd[i] != -1)
261 			SAFE_CLOSE(tskfd[i]);
262 	}
263 
264 	if (tsk0 != -1)
265 		SAFE_CLOSE(tsk0);
266 }
267 
verify(void)268 static void verify(void)
269 {
270 	unsigned long long vt0, vt[MAX_CTRS], vh[MAX_CTRS];
271 	unsigned long long vtsum = 0, vhsum = 0;
272 	int i;
273 	double ratio;
274 	struct sched_param sparam = {.sched_priority = 1};
275 
276 	if (sched_setscheduler(0, SCHED_FIFO, &sparam)) {
277 		tst_brk(TBROK | TERRNO,
278 			"sched_setscheduler(0, SCHED_FIFO, ...) failed");
279 	}
280 
281 	all_counters_set(PR_TASK_PERF_EVENTS_ENABLE);
282 	do_work(8);
283 	/* stop groups with hw counters first before tsk0 */
284 	for (i = 0; i < ntotal; i++) {
285 		ioctl(hwfd[i], PERF_EVENT_IOC_DISABLE);
286 		ioctl(tskfd[i], PERF_EVENT_IOC_DISABLE);
287 	}
288 	all_counters_set(PR_TASK_PERF_EVENTS_DISABLE);
289 
290 	sparam.sched_priority = 0;
291 	if (sched_setscheduler(0, SCHED_OTHER, &sparam)) {
292 		tst_brk(TBROK | TERRNO,
293 			"sched_setscheduler(0, SCHED_OTHER, ...) failed");
294 	}
295 
296 	if (read(tsk0, &vt0, sizeof(vt0)) != sizeof(vt0))
297 		tst_brk(TBROK | TERRNO, "error reading task clock counter");
298 
299 	for (i = 0; i < ntotal; ++i) {
300 		if (read(tskfd[i], &vt[i], sizeof(vt[i])) != sizeof(vt[i]) ||
301 		    read(hwfd[i], &vh[i], sizeof(vh[i])) != sizeof(vh[i]))
302 			tst_brk(TBROK | TERRNO, "error reading counter(s)");
303 		vtsum += vt[i];
304 		vhsum += vh[i];
305 	}
306 
307 	tst_res(TINFO, "nhw: %d, overall task clock: %llu", nhw, vt0);
308 	tst_res(TINFO, "hw sum: %llu, task clock sum: %llu", vhsum, vtsum);
309 
310 	if (verbose) {
311 		tst_res(TINFO, "hw counters:");
312 		for (i = 0; i < ntotal; ++i)
313 			tst_res(TINFO, " %llu", vh[i]);
314 		tst_res(TINFO, "task clock counters:");
315 		for (i = 0; i < ntotal; ++i)
316 			tst_res(TINFO, " %llu", vt[i]);
317 	}
318 
319 	ratio = (double)vtsum / vt0;
320 	tst_res(TINFO, "ratio: %lf", ratio);
321 	if (ratio > nhw + 0.005) {
322 		tst_res(TFAIL, "test failed (ratio was greater than %d)", nhw);
323 	} else {
324 		tst_res(TPASS, "test passed");
325 	}
326 }
327 
328 static struct tst_test test = {
329 	.setup = setup,
330 	.cleanup = cleanup,
331 	.options = (struct tst_option[]) {
332 		{"v", &verbose, "Verbose output"},
333 		{},
334 	},
335 	.test_all = verify,
336 	.needs_root = 1,
337 	.max_runtime = 72
338 };
339