xref: /aosp_15_r20/external/igt-gpu-tools/tests/perf.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <errno.h>
31 #include <signal.h>
32 #include <sys/stat.h>
33 #include <sys/time.h>
34 #include <sys/times.h>
35 #include <sys/types.h>
36 #include <dirent.h>
37 #include <time.h>
38 #include <poll.h>
39 #include <math.h>
40 
41 #include "igt.h"
42 #include "igt_sysfs.h"
43 #include "drm.h"
44 
45 IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
46 
47 #define GEN6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
48 #define GEN8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2))
49 
50 #define OAREPORT_REASON_MASK           0x3f
51 #define OAREPORT_REASON_SHIFT          19
52 #define OAREPORT_REASON_TIMER          (1<<0)
53 #define OAREPORT_REASON_INTERNAL       (3<<1)
54 #define OAREPORT_REASON_CTX_SWITCH     (1<<3)
55 #define OAREPORT_REASON_GO             (1<<4)
56 #define OAREPORT_REASON_CLK_RATIO      (1<<5)
57 
58 #define GFX_OP_PIPE_CONTROL     ((3 << 29) | (3 << 27) | (2 << 24))
59 #define PIPE_CONTROL_CS_STALL	   (1 << 20)
60 #define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET	(1 << 19)
61 #define PIPE_CONTROL_TLB_INVALIDATE     (1 << 18)
62 #define PIPE_CONTROL_SYNC_GFDT	  (1 << 17)
63 #define PIPE_CONTROL_MEDIA_STATE_CLEAR  (1 << 16)
64 #define PIPE_CONTROL_NO_WRITE	   (0 << 14)
65 #define PIPE_CONTROL_WRITE_IMMEDIATE    (1 << 14)
66 #define PIPE_CONTROL_WRITE_DEPTH_COUNT  (2 << 14)
67 #define PIPE_CONTROL_WRITE_TIMESTAMP    (3 << 14)
68 #define PIPE_CONTROL_DEPTH_STALL	(1 << 13)
69 #define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
70 #define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
71 #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE   (1 << 10) /* GM45+ only */
72 #define PIPE_CONTROL_ISP_DIS	    (1 << 9)
73 #define PIPE_CONTROL_INTERRUPT_ENABLE   (1 << 8)
74 #define PIPE_CONTROL_FLUSH_ENABLE       (1 << 7) /* Gen7+ only */
75 /* GT */
76 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE      (1 << 5)
77 #define PIPE_CONTROL_VF_CACHE_INVALIDATE	(1 << 4)
78 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE     (1 << 3)
79 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE     (1 << 2)
80 #define PIPE_CONTROL_STALL_AT_SCOREBOARD	(1 << 1)
81 #define PIPE_CONTROL_DEPTH_CACHE_FLUSH	  (1 << 0)
82 #define PIPE_CONTROL_PPGTT_WRITE	(0 << 2)
83 #define PIPE_CONTROL_GLOBAL_GTT_WRITE   (1 << 2)
84 
85 #define MAX_OA_BUF_SIZE (16 * 1024 * 1024)
86 
87 struct accumulator {
88 #define MAX_RAW_OA_COUNTERS 62
89 	enum drm_i915_oa_format format;
90 
91 	uint64_t deltas[MAX_RAW_OA_COUNTERS];
92 };
93 
94 struct oa_format {
95 	const char *name;
96 	size_t size;
97 	int a40_high_off; /* bytes */
98 	int a40_low_off;
99 	int n_a40;
100 	int a_off;
101 	int n_a;
102 	int first_a;
103 	int b_off;
104 	int n_b;
105 	int c_off;
106 	int n_c;
107 };
108 
109 static struct oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
110 	[I915_OA_FORMAT_A13] = { /* HSW only */
111 		"A13", .size = 64,
112 		.a_off = 12, .n_a = 13, },
113 	[I915_OA_FORMAT_A29] = { /* HSW only */
114 		"A29", .size = 128,
115 		.a_off = 12, .n_a = 29, },
116 	[I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */
117 		"A13_B8_C8", .size = 128,
118 		.a_off = 12, .n_a = 13,
119 		.b_off = 64, .n_b = 8,
120 		.c_off = 96, .n_c = 8, },
121 	[I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */
122 		"A45_B8_C8", .size = 256,
123 		.a_off = 12,  .n_a = 45,
124 		.b_off = 192, .n_b = 8,
125 		.c_off = 224, .n_c = 8, },
126 	[I915_OA_FORMAT_B4_C8] = { /* HSW only */
127 		"B4_C8", .size = 64,
128 		.b_off = 16, .n_b = 4,
129 		.c_off = 32, .n_c = 8, },
130 	[I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */
131 		"B4_C8_A16", .size = 128,
132 		.b_off = 16, .n_b = 4,
133 		.c_off = 32, .n_c = 8,
134 		.a_off = 60, .n_a = 16, .first_a = 29, },
135 	[I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */
136 		"C4_B8", .size = 64,
137 		.c_off = 16, .n_c = 4,
138 		.b_off = 28, .n_b = 8 },
139 };
140 
141 static struct oa_format gen8_oa_formats[I915_OA_FORMAT_MAX] = {
142 	[I915_OA_FORMAT_A12] = {
143 		"A12", .size = 64,
144 		.a_off = 12, .n_a = 12, .first_a = 7, },
145 	[I915_OA_FORMAT_A12_B8_C8] = {
146 		"A12_B8_C8", .size = 128,
147 		.a_off = 12, .n_a = 12,
148 		.b_off = 64, .n_b = 8,
149 		.c_off = 96, .n_c = 8, .first_a = 7, },
150 	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = {
151 		"A32u40_A4u32_B8_C8", .size = 256,
152 		.a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
153 		.a_off = 144, .n_a = 4, .first_a = 32,
154 		.b_off = 192, .n_b = 8,
155 		.c_off = 224, .n_c = 8, },
156 	[I915_OA_FORMAT_C4_B8] = {
157 		"C4_B8", .size = 64,
158 		.c_off = 16, .n_c = 4,
159 		.b_off = 32, .n_b = 8, },
160 };
161 
162 static bool hsw_undefined_a_counters[45] = {
163 	[4] = true,
164 	[6] = true,
165 	[9] = true,
166 	[11] = true,
167 	[14] = true,
168 	[16] = true,
169 	[19] = true,
170 	[21] = true,
171 	[24] = true,
172 	[26] = true,
173 	[29] = true,
174 	[31] = true,
175 	[34] = true,
176 	[43] = true,
177 	[44] = true,
178 };
179 
180 /* No A counters currently reserved/undefined for gen8+ so far */
181 static bool gen8_undefined_a_counters[45];
182 
183 static int drm_fd = -1;
184 static int sysfs = -1;
185 static int pm_fd = -1;
186 static int stream_fd = -1;
187 static uint32_t devid;
188 static int n_eus;
189 
190 static uint64_t test_metric_set_id = UINT64_MAX;
191 
192 static uint64_t timestamp_frequency = 12500000;
193 static uint64_t gt_max_freq_mhz = 0;
194 static enum drm_i915_oa_format test_oa_format;
195 static bool *undefined_a_counters;
196 static uint64_t oa_exp_1_millisec;
197 
198 static igt_render_copyfunc_t render_copy = NULL;
199 static uint32_t (*read_report_ticks)(uint32_t *report,
200 				     enum drm_i915_oa_format format);
201 static void (*sanity_check_reports)(uint32_t *oa_report0, uint32_t *oa_report1,
202 				    enum drm_i915_oa_format format);
203 
204 static struct oa_format
get_oa_format(enum drm_i915_oa_format format)205 get_oa_format(enum drm_i915_oa_format format)
206 {
207 	if (IS_HASWELL(devid))
208 		return hsw_oa_formats[format];
209 	return gen8_oa_formats[format];
210 }
211 
212 static void
__perf_close(int fd)213 __perf_close(int fd)
214 {
215 	close(fd);
216 	stream_fd = -1;
217 
218 	if (pm_fd >= 0) {
219 		close(pm_fd);
220 		pm_fd = -1;
221 	}
222 }
223 
224 static int
__perf_open(int fd,struct drm_i915_perf_open_param * param,bool prevent_pm)225 __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
226 {
227 	int ret;
228 	int32_t pm_value = 0;
229 
230 	if (stream_fd >= 0)
231 		__perf_close(stream_fd);
232 	if (pm_fd >= 0) {
233 		close(pm_fd);
234 		pm_fd = -1;
235 	}
236 
237 	ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
238 
239 	igt_assert(ret >= 0);
240 	errno = 0;
241 
242 	if (prevent_pm) {
243 		pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
244 		igt_assert(pm_fd >= 0);
245 
246 		igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
247 	}
248 
249 	return ret;
250 }
251 
252 static int
lookup_format(int i915_perf_fmt_id)253 lookup_format(int i915_perf_fmt_id)
254 {
255 	igt_assert(i915_perf_fmt_id < I915_OA_FORMAT_MAX);
256 	igt_assert(get_oa_format(i915_perf_fmt_id).name);
257 
258 	return i915_perf_fmt_id;
259 }
260 
261 static uint64_t
read_u64_file(const char * path)262 read_u64_file(const char *path)
263 {
264 	FILE *f;
265 	uint64_t val;
266 
267 	f = fopen(path, "r");
268 	igt_assert(f);
269 
270 	igt_assert_eq(fscanf(f, "%"PRIu64, &val), 1);
271 
272 	fclose(f);
273 
274 	return val;
275 }
276 
277 static void
write_u64_file(const char * path,uint64_t val)278 write_u64_file(const char *path, uint64_t val)
279 {
280 	FILE *f;
281 
282 	f = fopen(path, "w");
283 	igt_assert(f);
284 
285 	igt_assert(fprintf(f, "%"PRIu64, val) > 0);
286 
287 	fclose(f);
288 }
289 
290 static bool
try_sysfs_read_u64(const char * path,uint64_t * val)291 try_sysfs_read_u64(const char *path, uint64_t *val)
292 {
293 	return igt_sysfs_scanf(sysfs, path, "%"PRIu64, val) == 1;
294 }
295 
296 static unsigned long
sysfs_read(const char * path)297 sysfs_read(const char *path)
298 {
299 	unsigned long value;
300 
301 	igt_assert(igt_sysfs_scanf(sysfs, path, "%lu", &value) == 1);
302 
303 	return value;
304 }
305 
306 /* XXX: For Haswell this utility is only applicable to the render basic
307  * metric set.
308  *
309  * C2 corresponds to a clock counter for the Haswell render basic metric set
310  * but it's not included in all of the formats.
311  */
312 static uint32_t
hsw_read_report_ticks(uint32_t * report,enum drm_i915_oa_format format)313 hsw_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format)
314 {
315 	uint32_t *c = (uint32_t *)(((uint8_t *)report) + get_oa_format(format).c_off);
316 
317 	igt_assert_neq(get_oa_format(format).n_c, 0);
318 
319 	return c[2];
320 }
321 
322 static uint32_t
gen8_read_report_ticks(uint32_t * report,enum drm_i915_oa_format format)323 gen8_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format)
324 {
325 	return report[3];
326 }
327 
328 static void
gen8_read_report_clock_ratios(uint32_t * report,uint32_t * slice_freq_mhz,uint32_t * unslice_freq_mhz)329 gen8_read_report_clock_ratios(uint32_t *report,
330 			      uint32_t *slice_freq_mhz,
331 			      uint32_t *unslice_freq_mhz)
332 {
333 	uint32_t unslice_freq = report[0] & 0x1ff;
334 	uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
335 	uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
336 	uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
337 
338 	*slice_freq_mhz = (slice_freq * 16666) / 1000;
339 	*unslice_freq_mhz = (unslice_freq * 16666) / 1000;
340 }
341 
342 static const char *
gen8_read_report_reason(const uint32_t * report)343 gen8_read_report_reason(const uint32_t *report)
344 {
345 	uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
346 			   OAREPORT_REASON_MASK);
347 
348 	if (reason & (1<<0))
349 		return "timer";
350 	else if (reason & (1<<1))
351 	      return "internal trigger 1";
352 	else if (reason & (1<<2))
353 	      return "internal trigger 2";
354 	else if (reason & (1<<3))
355 	      return "context switch";
356 	else if (reason & (1<<4))
357 	      return "GO 1->0 transition (enter RC6)";
358 	else if (reason & (1<<5))
359 		return "[un]slice clock ratio change";
360 	else
361 		return "unknown";
362 }
363 
364 static uint64_t
timebase_scale(uint32_t u32_delta)365 timebase_scale(uint32_t u32_delta)
366 {
367 	return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency;
368 }
369 
370 /* Returns: the largest OA exponent that will still result in a sampling period
371  * less than or equal to the given @period.
372  */
373 static int
max_oa_exponent_for_period_lte(uint64_t period)374 max_oa_exponent_for_period_lte(uint64_t period)
375 {
376 	/* NB: timebase_scale() takes a uint32_t and an exponent of 30
377 	 * would already represent a period of ~3 minutes so there's
378 	 * really no need to consider higher exponents.
379 	 */
380 	for (int i = 0; i < 30; i++) {
381 		uint64_t oa_period = timebase_scale(2 << i);
382 
383 		if (oa_period > period)
384 			return max(0, i - 1);
385 	}
386 
387 	igt_assert(!"reached");
388 	return -1;
389 }
390 
391 /* Return: the largest OA exponent that will still result in a sampling
392  * frequency greater than the given @frequency.
393  */
394 static int
max_oa_exponent_for_freq_gt(uint64_t frequency)395 max_oa_exponent_for_freq_gt(uint64_t frequency)
396 {
397 	uint64_t period = NSEC_PER_SEC / frequency;
398 
399 	igt_assert_neq(period, 0);
400 
401 	return max_oa_exponent_for_period_lte(period - 1);
402 }
403 
404 static uint64_t
oa_exponent_to_ns(int exponent)405 oa_exponent_to_ns(int exponent)
406 {
407        return 1000000000ULL * (2ULL << exponent) / timestamp_frequency;
408 }
409 
410 static bool
oa_report_is_periodic(uint32_t oa_exponent,const uint32_t * report)411 oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
412 {
413 	if (IS_HASWELL(devid)) {
414 		/* For Haswell we don't have a documented report reason field
415 		 * (though empirically report[0] bit 10 does seem to correlate
416 		 * with a timer trigger reason) so we instead infer which
417 		 * reports are timer triggered by checking if the least
418 		 * significant bits are zero and the exponent bit is set.
419 		 */
420 		uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1;
421 
422 		if ((report[1] & oa_exponent_mask) == (1 << oa_exponent))
423 			return true;
424 	} else {
425 		if ((report[0] >> OAREPORT_REASON_SHIFT) &
426 		    OAREPORT_REASON_TIMER)
427 			return true;
428 	}
429 
430 	return false;
431 }
432 
433 static bool
oa_report_ctx_is_valid(uint32_t * report)434 oa_report_ctx_is_valid(uint32_t *report)
435 {
436 	if (IS_HASWELL(devid)) {
437 		return false; /* TODO */
438 	} else if (IS_GEN8(devid)) {
439 		return report[0] & (1ul << 25);
440 	} else if (AT_LEAST_GEN(devid, 9)) {
441 		return report[0] & (1ul << 16);
442 	}
443 
444 	igt_assert(!"Please update this function for newer Gen");
445 }
446 
447 static uint32_t
oa_report_get_ctx_id(uint32_t * report)448 oa_report_get_ctx_id(uint32_t *report)
449 {
450 	if (!oa_report_ctx_is_valid(report))
451 		return 0xffffffff;
452 	return report[2];
453 }
454 
455 static void
scratch_buf_memset(drm_intel_bo * bo,int width,int height,uint32_t color)456 scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
457 {
458 	int ret;
459 
460 	ret = drm_intel_bo_map(bo, true /* writable */);
461 	igt_assert_eq(ret, 0);
462 
463 	for (int i = 0; i < width * height; i++)
464 		((uint32_t *)bo->virtual)[i] = color;
465 
466 	drm_intel_bo_unmap(bo);
467 }
468 
469 static void
scratch_buf_init(drm_intel_bufmgr * bufmgr,struct igt_buf * buf,int width,int height,uint32_t color)470 scratch_buf_init(drm_intel_bufmgr *bufmgr,
471 		 struct igt_buf *buf,
472 		 int width, int height,
473 		 uint32_t color)
474 {
475 	size_t stride = width * 4;
476 	size_t size = stride * height;
477 	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
478 
479 	scratch_buf_memset(bo, width, height, color);
480 
481 	memset(buf, 0, sizeof(*buf));
482 
483 	buf->bo = bo;
484 	buf->stride = stride;
485 	buf->tiling = I915_TILING_NONE;
486 	buf->size = size;
487 	buf->bpp = 32;
488 }
489 
490 static void
emit_report_perf_count(struct intel_batchbuffer * batch,drm_intel_bo * dst_bo,int dst_offset,uint32_t report_id)491 emit_report_perf_count(struct intel_batchbuffer *batch,
492 		       drm_intel_bo *dst_bo,
493 		       int dst_offset,
494 		       uint32_t report_id)
495 {
496 	if (IS_HASWELL(devid)) {
497 		BEGIN_BATCH(3, 1);
498 		OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
499 		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
500 			  dst_offset);
501 		OUT_BATCH(report_id);
502 		ADVANCE_BATCH();
503 	} else {
504 		/* XXX: NB: n dwords arg is actually magic since it internally
505 		 * automatically accounts for larger addresses on gen >= 8...
506 		 */
507 		BEGIN_BATCH(3, 1);
508 		OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
509 		OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
510 			  dst_offset);
511 		OUT_BATCH(report_id);
512 		ADVANCE_BATCH();
513 	}
514 }
515 
516 static void
hsw_sanity_check_render_basic_reports(uint32_t * oa_report0,uint32_t * oa_report1,enum drm_i915_oa_format fmt)517 hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1,
518 				      enum drm_i915_oa_format fmt)
519 {
520 	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
521 	uint32_t clock_delta;
522 	uint32_t max_delta;
523 	struct oa_format format = get_oa_format(fmt);
524 
525 	igt_assert_neq(time_delta, 0);
526 
527 	/* As a special case we have to consider that on Haswell we
528 	 * can't explicitly derive a clock delta for all OA report
529 	 * formats...
530 	 */
531 	if (format.n_c == 0) {
532 		/* Assume running at max freq for sake of
533 		 * below sanity check on counters... */
534 		clock_delta = (gt_max_freq_mhz *
535 			       (uint64_t)time_delta) / 1000;
536 	} else {
537 		uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
538 		uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
539 		uint64_t freq;
540 
541 		clock_delta = ticks1 - ticks0;
542 
543 		igt_assert_neq(clock_delta, 0);
544 
545 		freq = ((uint64_t)clock_delta * 1000) / time_delta;
546 		igt_debug("freq = %"PRIu64"\n", freq);
547 
548 		igt_assert(freq <= gt_max_freq_mhz);
549 	}
550 
551 	igt_debug("clock delta = %"PRIu32"\n", clock_delta);
552 
553 	/* The maximum rate for any HSW counter =
554 	 *   clock_delta * N EUs
555 	 *
556 	 * Sanity check that no counters exceed this delta.
557 	 */
558 	max_delta = clock_delta * n_eus;
559 
560 	/* 40bit A counters were only introduced for Gen8+ */
561 	igt_assert_eq(format.n_a40, 0);
562 
563 	for (int j = 0; j < format.n_a; j++) {
564 		uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
565 					    format.a_off);
566 		uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
567 					    format.a_off);
568 		int a_id = format.first_a + j;
569 		uint32_t delta = a1[j] - a0[j];
570 
571 		if (undefined_a_counters[a_id])
572 			continue;
573 
574 		igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta);
575 		igt_assert(delta <= max_delta);
576 	}
577 
578 	for (int j = 0; j < format.n_b; j++) {
579 		uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) +
580 					    format.b_off);
581 		uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) +
582 					    format.b_off);
583 		uint32_t delta = b1[j] - b0[j];
584 
585 		igt_debug("B%d: delta = %"PRIu32"\n", j, delta);
586 		igt_assert(delta <= max_delta);
587 	}
588 
589 	for (int j = 0; j < format.n_c; j++) {
590 		uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
591 					    format.c_off);
592 		uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
593 					    format.c_off);
594 		uint32_t delta = c1[j] - c0[j];
595 
596 		igt_debug("C%d: delta = %"PRIu32"\n", j, delta);
597 		igt_assert(delta <= max_delta);
598 	}
599 }
600 
601 static uint64_t
gen8_read_40bit_a_counter(uint32_t * report,enum drm_i915_oa_format fmt,int a_id)602 gen8_read_40bit_a_counter(uint32_t *report, enum drm_i915_oa_format fmt, int a_id)
603 {
604 	struct oa_format format = get_oa_format(fmt);
605 	uint8_t *a40_high = (((uint8_t *)report) + format.a40_high_off);
606 	uint32_t *a40_low = (uint32_t *)(((uint8_t *)report) +
607 					 format.a40_low_off);
608 	uint64_t high = (uint64_t)(a40_high[a_id]) << 32;
609 
610 	return a40_low[a_id] | high;
611 }
612 
613 static uint64_t
gen8_40bit_a_delta(uint64_t value0,uint64_t value1)614 gen8_40bit_a_delta(uint64_t value0, uint64_t value1)
615 {
616 	if (value0 > value1)
617 		return (1ULL << 40) + value1 - value0;
618 	else
619 		return value1 - value0;
620 }
621 
622 static void
accumulate_uint32(size_t offset,uint32_t * report0,uint32_t * report1,uint64_t * delta)623 accumulate_uint32(size_t offset,
624 		  uint32_t *report0,
625                   uint32_t *report1,
626                   uint64_t *delta)
627 {
628 	uint32_t value0 = *(uint32_t *)(((uint8_t *)report0) + offset);
629 	uint32_t value1 = *(uint32_t *)(((uint8_t *)report1) + offset);
630 
631 	*delta += (uint32_t)(value1 - value0);
632 }
633 
634 static void
accumulate_uint40(int a_index,uint32_t * report0,uint32_t * report1,enum drm_i915_oa_format format,uint64_t * delta)635 accumulate_uint40(int a_index,
636                   uint32_t *report0,
637                   uint32_t *report1,
638 		  enum drm_i915_oa_format format,
639                   uint64_t *delta)
640 {
641 	uint64_t value0 = gen8_read_40bit_a_counter(report0, format, a_index),
642 		 value1 = gen8_read_40bit_a_counter(report1, format, a_index);
643 
644 	*delta += gen8_40bit_a_delta(value0, value1);
645 }
646 
647 static void
accumulate_reports(struct accumulator * accumulator,uint32_t * start,uint32_t * end)648 accumulate_reports(struct accumulator *accumulator,
649 		   uint32_t *start,
650 		   uint32_t *end)
651 {
652 	struct oa_format format = get_oa_format(accumulator->format);
653 	uint64_t *deltas = accumulator->deltas;
654 	int idx = 0;
655 
656 	if (intel_gen(devid) >= 8) {
657 		/* timestamp */
658 		accumulate_uint32(4, start, end, deltas + idx++);
659 
660 		/* clock cycles */
661 		accumulate_uint32(12, start, end, deltas + idx++);
662 	} else {
663 		/* timestamp */
664 		accumulate_uint32(4, start, end, deltas + idx++);
665 	}
666 
667 	for (int i = 0; i < format.n_a40; i++) {
668 		accumulate_uint40(i, start, end, accumulator->format,
669 				  deltas + idx++);
670 	}
671 
672 	for (int i = 0; i < format.n_a; i++) {
673 		accumulate_uint32(format.a_off + 4 * i,
674 				  start, end, deltas + idx++);
675 	}
676 
677 	for (int i = 0; i < format.n_b; i++) {
678 		accumulate_uint32(format.b_off + 4 * i,
679 				  start, end, deltas + idx++);
680 	}
681 
682 	for (int i = 0; i < format.n_c; i++) {
683 		accumulate_uint32(format.c_off + 4 * i,
684 				  start, end, deltas + idx++);
685 	}
686 }
687 
688 static void
accumulator_print(struct accumulator * accumulator,const char * title)689 accumulator_print(struct accumulator *accumulator, const char *title)
690 {
691 	struct oa_format format = get_oa_format(accumulator->format);
692 	uint64_t *deltas = accumulator->deltas;
693 	int idx = 0;
694 
695 	igt_debug("%s:\n", title);
696 	if (intel_gen(devid) >= 8) {
697 		igt_debug("\ttime delta = %"PRIu64"\n", deltas[idx++]);
698 		igt_debug("\tclock cycle delta = %"PRIu64"\n", deltas[idx++]);
699 
700 		for (int i = 0; i < format.n_a40; i++)
701 			igt_debug("\tA%u = %"PRIu64"\n", i, deltas[idx++]);
702 	} else {
703 		igt_debug("\ttime delta = %"PRIu64"\n", deltas[idx++]);
704 	}
705 
706 	for (int i = 0; i < format.n_a; i++) {
707 		int a_id = format.first_a + i;
708 		igt_debug("\tA%u = %"PRIu64"\n", a_id, deltas[idx++]);
709 	}
710 
711 	for (int i = 0; i < format.n_a; i++)
712 		igt_debug("\tB%u = %"PRIu64"\n", i, deltas[idx++]);
713 
714 	for (int i = 0; i < format.n_c; i++)
715 		igt_debug("\tC%u = %"PRIu64"\n", i, deltas[idx++]);
716 }
717 
718 /* The TestOa metric set is designed so */
719 static void
gen8_sanity_check_test_oa_reports(uint32_t * oa_report0,uint32_t * oa_report1,enum drm_i915_oa_format fmt)720 gen8_sanity_check_test_oa_reports(uint32_t *oa_report0, uint32_t *oa_report1,
721 				  enum drm_i915_oa_format fmt)
722 {
723 	struct oa_format format = get_oa_format(fmt);
724 	uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
725 	uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
726 	uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
727 	uint32_t clock_delta = ticks1 - ticks0;
728 	uint32_t max_delta;
729 	uint64_t freq;
730 	uint32_t *rpt0_b = (uint32_t *)(((uint8_t *)oa_report0) +
731 					format.b_off);
732 	uint32_t *rpt1_b = (uint32_t *)(((uint8_t *)oa_report1) +
733 					format.b_off);
734 	uint32_t b;
735 	uint32_t ref;
736 
737 
738 	igt_assert_neq(time_delta, 0);
739 	igt_assert_neq(clock_delta, 0);
740 
741 	freq = ((uint64_t)clock_delta * 1000) / time_delta;
742 	igt_debug("freq = %"PRIu64"\n", freq);
743 
744 	igt_assert(freq <= gt_max_freq_mhz);
745 
746 	igt_debug("clock delta = %"PRIu32"\n", clock_delta);
747 
748 	max_delta = clock_delta * n_eus;
749 
750 	/* Gen8+ has some 40bit A counters... */
751 	for (int j = 0; j < format.n_a40; j++) {
752 		uint64_t value0 = gen8_read_40bit_a_counter(oa_report0, fmt, j);
753 		uint64_t value1 = gen8_read_40bit_a_counter(oa_report1, fmt, j);
754 		uint64_t delta = gen8_40bit_a_delta(value0, value1);
755 
756 		if (undefined_a_counters[j])
757 			continue;
758 
759 		igt_debug("A%d: delta = %"PRIu64"\n", j, delta);
760 		igt_assert(delta <= max_delta);
761 	}
762 
763 	for (int j = 0; j < format.n_a; j++) {
764 		uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
765 					    format.a_off);
766 		uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
767 					    format.a_off);
768 		int a_id = format.first_a + j;
769 		uint32_t delta = a1[j] - a0[j];
770 
771 		if (undefined_a_counters[a_id])
772 			continue;
773 
774 		igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta);
775 		igt_assert(delta <= max_delta);
776 	}
777 
778 	/* The TestOa metric set defines all B counters to be a
779 	 * multiple of the gpu clock
780 	 */
781 	if (format.n_b) {
782 		b = rpt1_b[0] - rpt0_b[0];
783 		igt_debug("B0: delta = %"PRIu32"\n", b);
784 		igt_assert_eq(b, 0);
785 
786 		b = rpt1_b[1] - rpt0_b[1];
787 		igt_debug("B1: delta = %"PRIu32"\n", b);
788 		igt_assert_eq(b, clock_delta);
789 
790 		b = rpt1_b[2] - rpt0_b[2];
791 		igt_debug("B2: delta = %"PRIu32"\n", b);
792 		igt_assert_eq(b, clock_delta);
793 
794 		b = rpt1_b[3] - rpt0_b[3];
795 		ref = clock_delta / 2;
796 		igt_debug("B3: delta = %"PRIu32"\n", b);
797 		igt_assert(b >= ref - 1 && b <= ref + 1);
798 
799 		b = rpt1_b[4] - rpt0_b[4];
800 		ref = clock_delta / 3;
801 		igt_debug("B4: delta = %"PRIu32"\n", b);
802 		igt_assert(b >= ref - 1 && b <= ref + 1);
803 
804 		b = rpt1_b[5] - rpt0_b[5];
805 		ref = clock_delta / 3;
806 		igt_debug("B5: delta = %"PRIu32"\n", b);
807 		igt_assert(b >= ref - 1 && b <= ref + 1);
808 
809 		b = rpt1_b[6] - rpt0_b[6];
810 		ref = clock_delta / 6;
811 		igt_debug("B6: delta = %"PRIu32"\n", b);
812 		igt_assert(b >= ref - 1 && b <= ref + 1);
813 
814 		b = rpt1_b[7] - rpt0_b[7];
815 		ref = clock_delta * 2 / 3;
816 		igt_debug("B7: delta = %"PRIu32"\n", b);
817 		igt_assert(b >= ref - 1 && b <= ref + 1);
818 	}
819 
820 	for (int j = 0; j < format.n_c; j++) {
821 		uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
822 					    format.c_off);
823 		uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
824 					    format.c_off);
825 		uint32_t delta = c1[j] - c0[j];
826 
827 		igt_debug("C%d: delta = %"PRIu32"\n", j, delta);
828 		igt_assert(delta <= max_delta);
829 	}
830 }
831 
832 static uint64_t
get_cs_timestamp_frequency(void)833 get_cs_timestamp_frequency(void)
834 {
835 	int cs_ts_freq = 0;
836 	drm_i915_getparam_t gp;
837 
838 	gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
839 	gp.value = &cs_ts_freq;
840 	if (igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
841 		return cs_ts_freq;
842 
843 	igt_debug("Couldn't query CS timestamp frequency, trying to guess based on PCI-id\n");
844 
845 	if (IS_GEN7(devid) || IS_GEN8(devid))
846 		return 12500000;
847 	if (IS_SKYLAKE(devid) || IS_KABYLAKE(devid) || IS_COFFEELAKE(devid))
848 		return 12000000;
849 	if (IS_BROXTON(devid) || IS_GEMINILAKE(devid))
850 		return 19200000;
851 
852 	igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
853 }
854 
855 static bool
init_sys_info(void)856 init_sys_info(void)
857 {
858 	const char *test_set_name = NULL;
859 	const char *test_set_uuid = NULL;
860 	char buf[256];
861 
862 	igt_assert_neq(devid, 0);
863 
864 	timestamp_frequency = get_cs_timestamp_frequency();
865 	igt_assert_neq(timestamp_frequency, 0);
866 
867 	if (IS_HASWELL(devid)) {
868 		/* We don't have a TestOa metric set for Haswell so use
869 		 * RenderBasic
870 		 */
871 		test_set_name = "RenderBasic";
872 		test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212";
873 		test_oa_format = I915_OA_FORMAT_A45_B8_C8;
874 		undefined_a_counters = hsw_undefined_a_counters;
875 		read_report_ticks = hsw_read_report_ticks;
876 		sanity_check_reports = hsw_sanity_check_render_basic_reports;
877 
878 		if (intel_gt(devid) == 0)
879 			n_eus = 10;
880 		else if (intel_gt(devid) == 1)
881 			n_eus = 20;
882 		else if (intel_gt(devid) == 2)
883 			n_eus = 40;
884 		else {
885 			igt_assert(!"reached");
886 			return false;
887 		}
888 	} else {
889 		drm_i915_getparam_t gp;
890 
891 		test_set_name = "TestOa";
892 		test_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
893 		undefined_a_counters = gen8_undefined_a_counters;
894 		read_report_ticks = gen8_read_report_ticks;
895 		sanity_check_reports = gen8_sanity_check_test_oa_reports;
896 
897 		if (IS_BROADWELL(devid)) {
898 			test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e";
899 		} else if (IS_CHERRYVIEW(devid)) {
900 			test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa";
901 		} else if (IS_SKYLAKE(devid)) {
902 			switch (intel_gt(devid)) {
903 			case 1:
904 				test_set_uuid = "1651949f-0ac0-4cb1-a06f-dafd74a407d1";
905 				break;
906 			case 2:
907 				test_set_uuid = "2b985803-d3c9-4629-8a4f-634bfecba0e8";
908 				break;
909 			case 3:
910 				test_set_uuid = "882fa433-1f4a-4a67-a962-c741888fe5f5";
911 				break;
912 			default:
913 				igt_debug("unsupported Skylake GT size\n");
914 				return false;
915 			}
916 		} else if (IS_BROXTON(devid)) {
917 			test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
918 		} else if (IS_KABYLAKE(devid)) {
919 			switch (intel_gt(devid)) {
920 			case 1:
921 				test_set_uuid = "baa3c7e4-52b6-4b85-801e-465a94b746dd";
922 				break;
923 			case 2:
924 				test_set_uuid = "f1792f32-6db2-4b50-b4b2-557128f1688d";
925 				break;
926 			default:
927 				igt_debug("unsupported Kabylake GT size\n");
928 				return false;
929 			}
930 		} else if (IS_GEMINILAKE(devid)) {
931 			test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
932 		} else if (IS_COFFEELAKE(devid)) {
933 			switch (intel_gt(devid)) {
934 			case 1:
935 				test_set_uuid = "74fb4902-d3d3-4237-9e90-cbdc68d0a446";
936 				break;
937 			case 2:
938 				test_set_uuid = "577e8e2c-3fa0-4875-8743-3538d585e3b0";
939 				break;
940 			default:
941 				igt_debug("unsupported Coffeelake GT size\n");
942 				return false;
943 			}
944 		} else if (IS_CANNONLAKE(devid)) {
945 			test_set_uuid = "db41edd4-d8e7-4730-ad11-b9a2d6833503";
946 		} else if (IS_ICELAKE(devid)) {
947 			test_set_uuid = "a291665e-244b-4b76-9b9a-01de9d3c8068";
948 		} else {
949 			igt_debug("unsupported GT\n");
950 			return false;
951 		}
952 
953 		gp.param = I915_PARAM_EU_TOTAL;
954 		gp.value = &n_eus;
955 		do_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
956 	}
957 
958 	igt_debug("%s metric set UUID = %s\n",
959 		  test_set_name,
960 		  test_set_uuid);
961 
962 	oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
963 
964 	snprintf(buf, sizeof(buf), "metrics/%s/id", test_set_uuid);
965 
966 	return try_sysfs_read_u64(buf, &test_metric_set_id);
967 }
968 
969 static int
i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,uint8_t * buf,uint32_t max_size,uint32_t start_timestamp,uint32_t end_timestamp)970 i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
971 				  uint8_t *buf,
972 				  uint32_t max_size,
973 				  uint32_t start_timestamp,
974 				  uint32_t end_timestamp)
975 {
976 	size_t format_size = get_oa_format(oa_format).size;
977 	uint32_t last_seen_timestamp = start_timestamp;
978 	int total_len = 0;
979 
980 	while (last_seen_timestamp < end_timestamp) {
981 		int offset, len;
982 
983 		/* Running out of space. */
984 		if ((max_size - total_len) < format_size) {
985 			igt_warn("run out of space before reaching "
986 				 "end timestamp (%u/%u)\n",
987 				 last_seen_timestamp, end_timestamp);
988 			return -1;
989 		}
990 
991 		while ((len = read(stream_fd, &buf[total_len],
992 				   max_size - total_len)) < 0 &&
993 		       errno == EINTR)
994 			;
995 
996 		/* Intentionally return an error. */
997 		if (len <= 0) {
998 			if (errno == EAGAIN)
999 				return total_len;
1000 			else {
1001 				igt_warn("error read OA stream : %i\n", errno);
1002 				return -1;
1003 			}
1004 		}
1005 
1006 		offset = total_len;
1007 		total_len += len;
1008 
1009 		while (offset < total_len) {
1010 			const struct drm_i915_perf_record_header *header =
1011 				(const struct drm_i915_perf_record_header *) &buf[offset];
1012 			uint32_t *report = (uint32_t *) (header + 1);
1013 
1014 			if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
1015 				last_seen_timestamp = report[1];
1016 
1017 			offset += header->size;
1018 		}
1019 	}
1020 
1021 	return total_len;
1022 }
1023 
1024 /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
1025  * control parameter dev.i915.perf_stream_paranoid == 0 */
1026 static void
test_system_wide_paranoid(void)1027 test_system_wide_paranoid(void)
1028 {
1029 	igt_fork(child, 1) {
1030 		uint64_t properties[] = {
1031 			/* Include OA reports in samples */
1032 			DRM_I915_PERF_PROP_SAMPLE_OA, true,
1033 
1034 			/* OA unit configuration */
1035 			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1036 			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1037 			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1038 		};
1039 		struct drm_i915_perf_open_param param = {
1040 			.flags = I915_PERF_FLAG_FD_CLOEXEC |
1041 				I915_PERF_FLAG_FD_NONBLOCK,
1042 			.num_properties = sizeof(properties) / 16,
1043 			.properties_ptr = to_user_pointer(properties),
1044 		};
1045 
1046 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1047 
1048 		igt_drop_root();
1049 
1050 		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
1051 	}
1052 
1053 	igt_waitchildren();
1054 
1055 	igt_fork(child, 1) {
1056 		uint64_t properties[] = {
1057 			/* Include OA reports in samples */
1058 			DRM_I915_PERF_PROP_SAMPLE_OA, true,
1059 
1060 			/* OA unit configuration */
1061 			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1062 			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1063 			DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1064 		};
1065 		struct drm_i915_perf_open_param param = {
1066 			.flags = I915_PERF_FLAG_FD_CLOEXEC |
1067 				I915_PERF_FLAG_FD_NONBLOCK,
1068 			.num_properties = sizeof(properties) / 16,
1069 			.properties_ptr = to_user_pointer(properties),
1070 		};
1071 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
1072 
1073 		igt_drop_root();
1074 
1075 		stream_fd = __perf_open(drm_fd, &param, false);
1076 		__perf_close(stream_fd);
1077 	}
1078 
1079 	igt_waitchildren();
1080 
1081 	/* leave in paranoid state */
1082 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1083 }
1084 
1085 static void
test_invalid_open_flags(void)1086 test_invalid_open_flags(void)
1087 {
1088 	uint64_t properties[] = {
1089 		/* Include OA reports in samples */
1090 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1091 
1092 		/* OA unit configuration */
1093 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1094 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1095 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1096 	};
1097 	struct drm_i915_perf_open_param param = {
1098 		.flags = ~0, /* Undefined flag bits set! */
1099 		.num_properties = sizeof(properties) / 16,
1100 		.properties_ptr = to_user_pointer(properties),
1101 	};
1102 
1103 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1104 }
1105 
1106 static void
test_invalid_oa_metric_set_id(void)1107 test_invalid_oa_metric_set_id(void)
1108 {
1109 	uint64_t properties[] = {
1110 		/* Include OA reports in samples */
1111 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1112 
1113 		/* OA unit configuration */
1114 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1115 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1116 		DRM_I915_PERF_PROP_OA_METRICS_SET, UINT64_MAX,
1117 	};
1118 	struct drm_i915_perf_open_param param = {
1119 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
1120 			I915_PERF_FLAG_FD_NONBLOCK,
1121 		.num_properties = sizeof(properties) / 16,
1122 		.properties_ptr = to_user_pointer(properties),
1123 	};
1124 
1125 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1126 
1127 	properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
1128 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1129 
1130 	/* Check that we aren't just seeing false positives... */
1131 	properties[ARRAY_SIZE(properties) - 1] = test_metric_set_id;
1132 	stream_fd = __perf_open(drm_fd, &param, false);
1133 	__perf_close(stream_fd);
1134 
1135 	/* There's no valid default OA metric set ID... */
1136 	param.num_properties--;
1137 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1138 }
1139 
1140 static void
test_invalid_oa_format_id(void)1141 test_invalid_oa_format_id(void)
1142 {
1143 	uint64_t properties[] = {
1144 		/* Include OA reports in samples */
1145 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1146 
1147 		/* OA unit configuration */
1148 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1149 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1150 		DRM_I915_PERF_PROP_OA_FORMAT, UINT64_MAX,
1151 	};
1152 	struct drm_i915_perf_open_param param = {
1153 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
1154 			I915_PERF_FLAG_FD_NONBLOCK,
1155 		.num_properties = sizeof(properties) / 16,
1156 		.properties_ptr = to_user_pointer(properties),
1157 	};
1158 
1159 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1160 
1161 	properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
1162 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1163 
1164 	/* Check that we aren't just seeing false positives... */
1165 	properties[ARRAY_SIZE(properties) - 1] = test_oa_format;
1166 	stream_fd = __perf_open(drm_fd, &param, false);
1167 	__perf_close(stream_fd);
1168 
1169 	/* There's no valid default OA format... */
1170 	param.num_properties--;
1171 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1172 }
1173 
1174 static void
test_missing_sample_flags(void)1175 test_missing_sample_flags(void)
1176 {
1177 	uint64_t properties[] = {
1178 		/* No _PROP_SAMPLE_xyz flags */
1179 
1180 		/* OA unit configuration */
1181 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1182 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1183 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1184 	};
1185 	struct drm_i915_perf_open_param param = {
1186 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
1187 		.num_properties = sizeof(properties) / 16,
1188 		.properties_ptr = to_user_pointer(properties),
1189 	};
1190 
1191 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1192 }
1193 
1194 static void
read_2_oa_reports(int format_id,int exponent,uint32_t * oa_report0,uint32_t * oa_report1,bool timer_only)1195 read_2_oa_reports(int format_id,
1196 		  int exponent,
1197 		  uint32_t *oa_report0,
1198 		  uint32_t *oa_report1,
1199 		  bool timer_only)
1200 {
1201 	size_t format_size = get_oa_format(format_id).size;
1202 	size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
1203 			      format_size);
1204 	const struct drm_i915_perf_record_header *header;
1205 	uint32_t exponent_mask = (1 << (exponent + 1)) - 1;
1206 
1207 	/* Note: we allocate a large buffer so that each read() iteration
1208 	 * should scrape *all* pending records.
1209 	 *
1210 	 * The largest buffer the OA unit supports is 16MB.
1211 	 *
1212 	 * Being sure we are fetching all buffered reports allows us to
1213 	 * potentially throw away / skip all reports whenever we see
1214 	 * a _REPORT_LOST notification as a way of being sure are
1215 	 * measurements aren't skewed by a lost report.
1216 	 *
1217 	 * Note: that is is useful for some tests but also not something
1218 	 * applications would be expected to resort to. Lost reports are
1219 	 * somewhat unpredictable but typically don't pose a problem - except
1220 	 * to indicate that the OA unit may be over taxed if lots of reports
1221 	 * are being lost.
1222 	 */
1223 	int max_reports = MAX_OA_BUF_SIZE / format_size;
1224 	int buf_size = sample_size * max_reports * 1.5;
1225 	uint8_t *buf = malloc(buf_size);
1226 	int n = 0;
1227 
1228 	for (int i = 0; i < 1000; i++) {
1229 		ssize_t len;
1230 
1231 		while ((len = read(stream_fd, buf, buf_size)) < 0 &&
1232 		       errno == EINTR)
1233 			;
1234 
1235 		igt_assert(len > 0);
1236 		igt_debug("read %d bytes\n", (int)len);
1237 
1238 		for (size_t offset = 0; offset < len; offset += header->size) {
1239 			const uint32_t *report;
1240 
1241 			header = (void *)(buf + offset);
1242 
1243 			igt_assert_eq(header->pad, 0); /* Reserved */
1244 
1245 			/* Currently the only test that should ever expect to
1246 			 * see a _BUFFER_LOST error is the buffer_fill test,
1247 			 * otherwise something bad has probably happened...
1248 			 */
1249 			igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1250 
1251 			/* At high sampling frequencies the OA HW might not be
1252 			 * able to cope with all write requests and will notify
1253 			 * us that a report was lost. We restart our read of
1254 			 * two sequential reports due to the timeline blip this
1255 			 * implies
1256 			 */
1257 			if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
1258 				igt_debug("read restart: OA trigger collision / report lost\n");
1259 				n = 0;
1260 
1261 				/* XXX: break, because we don't know where
1262 				 * within the series of already read reports
1263 				 * there could be a blip from the lost report.
1264 				 */
1265 				break;
1266 			}
1267 
1268 			/* Currently the only other record type expected is a
1269 			 * _SAMPLE. Notably this test will need updating if
1270 			 * i915-perf is extended in the future with additional
1271 			 * record types.
1272 			 */
1273 			igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
1274 
1275 			igt_assert_eq(header->size, sample_size);
1276 
1277 			report = (const void *)(header + 1);
1278 
1279 			igt_debug("read report: reason = %x, timestamp = %x, exponent mask=%x\n",
1280 				  report[0], report[1], exponent_mask);
1281 
1282 			/* Don't expect zero for timestamps */
1283 			igt_assert_neq(report[1], 0);
1284 
1285 			if (timer_only) {
1286 				if (!oa_report_is_periodic(exponent, report)) {
1287 					igt_debug("skipping non timer report\n");
1288 					continue;
1289 				}
1290 			}
1291 
1292 			if (n++ == 0)
1293 				memcpy(oa_report0, report, format_size);
1294 			else {
1295 				memcpy(oa_report1, report, format_size);
1296 				free(buf);
1297 				return;
1298 			}
1299 		}
1300 	}
1301 
1302 	free(buf);
1303 
1304 	igt_assert(!"reached");
1305 }
1306 
1307 static void
open_and_read_2_oa_reports(int format_id,int exponent,uint32_t * oa_report0,uint32_t * oa_report1,bool timer_only)1308 open_and_read_2_oa_reports(int format_id,
1309 			   int exponent,
1310 			   uint32_t *oa_report0,
1311 			   uint32_t *oa_report1,
1312 			   bool timer_only)
1313 {
1314 	uint64_t properties[] = {
1315 		/* Include OA reports in samples */
1316 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1317 
1318 		/* OA unit configuration */
1319 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1320 		DRM_I915_PERF_PROP_OA_FORMAT, format_id,
1321 		DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
1322 
1323 	};
1324 	struct drm_i915_perf_open_param param = {
1325 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
1326 		.num_properties = sizeof(properties) / 16,
1327 		.properties_ptr = to_user_pointer(properties),
1328 	};
1329 
1330 	stream_fd = __perf_open(drm_fd, &param, false);
1331 
1332 	read_2_oa_reports(format_id, exponent,
1333 			  oa_report0, oa_report1, timer_only);
1334 
1335 	__perf_close(stream_fd);
1336 }
1337 
1338 static void
print_reports(uint32_t * oa_report0,uint32_t * oa_report1,int fmt)1339 print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
1340 {
1341 	struct oa_format format = get_oa_format(fmt);
1342 
1343 	igt_debug("TIMESTAMP: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1344 		  oa_report0[1], oa_report1[1], oa_report1[1] - oa_report0[1]);
1345 
1346 	if (IS_HASWELL(devid) && format.n_c == 0) {
1347 		igt_debug("CLOCK = N/A\n");
1348 	} else {
1349 		uint32_t clock0 = read_report_ticks(oa_report0, fmt);
1350 		uint32_t clock1 = read_report_ticks(oa_report1, fmt);
1351 
1352 		igt_debug("CLOCK: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1353 			  clock0, clock1, clock1 - clock0);
1354 	}
1355 
1356 	if (intel_gen(devid) >= 8) {
1357 		uint32_t slice_freq0, slice_freq1, unslice_freq0, unslice_freq1;
1358 		const char *reason0 = gen8_read_report_reason(oa_report0);
1359 		const char *reason1 = gen8_read_report_reason(oa_report1);
1360 
1361 		igt_debug("CTX ID: 1st = %"PRIu32", 2nd = %"PRIu32"\n",
1362 			  oa_report0[2], oa_report1[2]);
1363 
1364 		gen8_read_report_clock_ratios(oa_report0,
1365 					      &slice_freq0, &unslice_freq0);
1366 		gen8_read_report_clock_ratios(oa_report1,
1367 					      &slice_freq1, &unslice_freq1);
1368 
1369 		igt_debug("SLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
1370 			  slice_freq0, slice_freq1,
1371 			  ((int)slice_freq1 - (int)slice_freq0));
1372 		igt_debug("UNSLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
1373 			  unslice_freq0, unslice_freq1,
1374 			  ((int)unslice_freq1 - (int)unslice_freq0));
1375 
1376 		igt_debug("REASONS: 1st = \"%s\", 2nd = \"%s\"\n", reason0, reason1);
1377 	}
1378 
1379 	/* Gen8+ has some 40bit A counters... */
1380 	for (int j = 0; j < format.n_a40; j++) {
1381 		uint64_t value0 = gen8_read_40bit_a_counter(oa_report0, fmt, j);
1382 		uint64_t value1 = gen8_read_40bit_a_counter(oa_report1, fmt, j);
1383 		uint64_t delta = gen8_40bit_a_delta(value0, value1);
1384 
1385 		if (undefined_a_counters[j])
1386 			continue;
1387 
1388 		igt_debug("A%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
1389 			  j, value0, value1, delta);
1390 	}
1391 
1392 	for (int j = 0; j < format.n_a; j++) {
1393 		uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
1394 					    format.a_off);
1395 		uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
1396 					    format.a_off);
1397 		int a_id = format.first_a + j;
1398 		uint32_t delta = a1[j] - a0[j];
1399 
1400 		if (undefined_a_counters[a_id])
1401 			continue;
1402 
1403 		igt_debug("A%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1404 			  a_id, a0[j], a1[j], delta);
1405 	}
1406 
1407 	for (int j = 0; j < format.n_b; j++) {
1408 		uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) +
1409 					    format.b_off);
1410 		uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) +
1411 					    format.b_off);
1412 		uint32_t delta = b1[j] - b0[j];
1413 
1414 		igt_debug("B%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1415 			  j, b0[j], b1[j], delta);
1416 	}
1417 
1418 	for (int j = 0; j < format.n_c; j++) {
1419 		uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
1420 					    format.c_off);
1421 		uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
1422 					    format.c_off);
1423 		uint32_t delta = c1[j] - c0[j];
1424 
1425 		igt_debug("C%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1426 			  j, c0[j], c1[j], delta);
1427 	}
1428 }
1429 
1430 /* Debug function, only useful when reports don't make sense. */
1431 #if 0
1432 static void
1433 print_report(uint32_t *report, int fmt)
1434 {
1435 	struct oa_format format = get_oa_format(fmt);
1436 
1437 	igt_debug("TIMESTAMP: %"PRIu32"\n", report[1]);
1438 
1439 	if (IS_HASWELL(devid) && format.n_c == 0) {
1440 		igt_debug("CLOCK = N/A\n");
1441 	} else {
1442 		uint32_t clock = read_report_ticks(report, fmt);
1443 
1444 		igt_debug("CLOCK: %"PRIu32"\n", clock);
1445 	}
1446 
1447 	if (intel_gen(devid) >= 8) {
1448 		uint32_t slice_freq, unslice_freq;
1449 		const char *reason = gen8_read_report_reason(report);
1450 
1451 		gen8_read_report_clock_ratios(report, &slice_freq, &unslice_freq);
1452 
1453 		igt_debug("SLICE CLK: %umhz\n", slice_freq);
1454 		igt_debug("UNSLICE CLK: %umhz\n", unslice_freq);
1455 		igt_debug("REASON: \"%s\"\n", reason);
1456 		igt_debug("CTX ID: %"PRIu32"/%"PRIx32"\n", report[2], report[2]);
1457 	}
1458 
1459 	/* Gen8+ has some 40bit A counters... */
1460 	for (int j = 0; j < format.n_a40; j++) {
1461 		uint64_t value = gen8_read_40bit_a_counter(report, fmt, j);
1462 
1463 		if (undefined_a_counters[j])
1464 			continue;
1465 
1466 		igt_debug("A%d: %"PRIu64"\n", j, value);
1467 	}
1468 
1469 	for (int j = 0; j < format.n_a; j++) {
1470 		uint32_t *a = (uint32_t *)(((uint8_t *)report) +
1471 					   format.a_off);
1472 		int a_id = format.first_a + j;
1473 
1474 		if (undefined_a_counters[a_id])
1475 			continue;
1476 
1477 		igt_debug("A%d: %"PRIu32"\n", a_id, a[j]);
1478 	}
1479 
1480 	for (int j = 0; j < format.n_b; j++) {
1481 		uint32_t *b = (uint32_t *)(((uint8_t *)report) +
1482 					   format.b_off);
1483 
1484 		igt_debug("B%d: %"PRIu32"\n", j, b[j]);
1485 	}
1486 
1487 	for (int j = 0; j < format.n_c; j++) {
1488 		uint32_t *c = (uint32_t *)(((uint8_t *)report) +
1489 					   format.c_off);
1490 
1491 		igt_debug("C%d: %"PRIu32"\n", j, c[j]);
1492 	}
1493 }
1494 #endif
1495 
1496 static void
test_oa_formats(void)1497 test_oa_formats(void)
1498 {
1499 	for (int i = 0; i < I915_OA_FORMAT_MAX; i++) {
1500 		struct oa_format format = get_oa_format(i);
1501 		uint32_t oa_report0[64];
1502 		uint32_t oa_report1[64];
1503 
1504 		if (!format.name) /* sparse, indexed by ID */
1505 			continue;
1506 
1507 		igt_debug("Checking OA format %s\n", format.name);
1508 
1509 		open_and_read_2_oa_reports(i,
1510 					   oa_exp_1_millisec,
1511 					   oa_report0,
1512 					   oa_report1,
1513 					   false); /* timer reports only */
1514 
1515 		print_reports(oa_report0, oa_report1, i);
1516 		sanity_check_reports(oa_report0, oa_report1, i);
1517 	}
1518 }
1519 
1520 
1521 enum load {
1522 	LOW,
1523 	HIGH
1524 };
1525 
1526 #define LOAD_HELPER_PAUSE_USEC 500
1527 
1528 static struct load_helper {
1529 	int devid;
1530 	drm_intel_bufmgr *bufmgr;
1531 	drm_intel_context *context;
1532 	uint32_t context_id;
1533 	struct intel_batchbuffer *batch;
1534 	enum load load;
1535 	bool exit;
1536 	struct igt_helper_process igt_proc;
1537 	struct igt_buf src, dst;
1538 } lh = { 0, };
1539 
load_helper_signal_handler(int sig)1540 static void load_helper_signal_handler(int sig)
1541 {
1542 	if (sig == SIGUSR2)
1543 		lh.load = lh.load == LOW ? HIGH : LOW;
1544 	else
1545 		lh.exit = true;
1546 }
1547 
load_helper_set_load(enum load load)1548 static void load_helper_set_load(enum load load)
1549 {
1550 	igt_assert(lh.igt_proc.running);
1551 
1552 	if (lh.load == load)
1553 		return;
1554 
1555 	lh.load = load;
1556 	kill(lh.igt_proc.pid, SIGUSR2);
1557 }
1558 
load_helper_run(enum load load)1559 static void load_helper_run(enum load load)
1560 {
1561 	/*
1562 	 * FIXME fork helpers won't get cleaned up when started from within a
1563 	 * subtest, so handle the case where it sticks around a bit too long.
1564 	 */
1565 	if (lh.igt_proc.running) {
1566 		load_helper_set_load(load);
1567 		return;
1568 	}
1569 
1570 	lh.load = load;
1571 
1572 	igt_fork_helper(&lh.igt_proc) {
1573 		signal(SIGUSR1, load_helper_signal_handler);
1574 		signal(SIGUSR2, load_helper_signal_handler);
1575 
1576 		while (!lh.exit) {
1577 			int ret;
1578 
1579 			render_copy(lh.batch,
1580 				    lh.context,
1581 				    &lh.src, 0, 0, 1920, 1080,
1582 				    &lh.dst, 0, 0);
1583 
1584 			intel_batchbuffer_flush_with_context(lh.batch,
1585 							     lh.context);
1586 
1587 			ret = drm_intel_gem_context_get_id(lh.context,
1588 							   &lh.context_id);
1589 			igt_assert_eq(ret, 0);
1590 
1591 			drm_intel_bo_wait_rendering(lh.dst.bo);
1592 
1593 			/* Lower the load by pausing after every submitted
1594 			 * write. */
1595 			if (lh.load == LOW)
1596 				usleep(LOAD_HELPER_PAUSE_USEC);
1597 		}
1598 	}
1599 }
1600 
load_helper_stop(void)1601 static void load_helper_stop(void)
1602 {
1603 	kill(lh.igt_proc.pid, SIGUSR1);
1604 	igt_assert(igt_wait_helper(&lh.igt_proc) == 0);
1605 }
1606 
load_helper_init(void)1607 static void load_helper_init(void)
1608 {
1609 	int ret;
1610 
1611 	lh.devid = intel_get_drm_devid(drm_fd);
1612 
1613 	/* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
1614 	 * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
1615 	 * that's also all we care about for the rps testcase*/
1616 	igt_assert(intel_gen(lh.devid) >= 6);
1617 	lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1618 	igt_assert(lh.bufmgr);
1619 
1620 	drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr);
1621 
1622 	lh.context = drm_intel_gem_context_create(lh.bufmgr);
1623 	igt_assert(lh.context);
1624 
1625 	lh.context_id = 0xffffffff;
1626 	ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id);
1627 	igt_assert_eq(ret, 0);
1628 	igt_assert_neq(lh.context_id, 0xffffffff);
1629 
1630 	lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid);
1631 	igt_assert(lh.batch);
1632 
1633 	scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0);
1634 	scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0);
1635 }
1636 
load_helper_fini(void)1637 static void load_helper_fini(void)
1638 {
1639 	if (lh.igt_proc.running)
1640 		load_helper_stop();
1641 
1642 	if (lh.src.bo)
1643 		drm_intel_bo_unreference(lh.src.bo);
1644 	if (lh.dst.bo)
1645 		drm_intel_bo_unreference(lh.dst.bo);
1646 
1647 	if (lh.batch)
1648 		intel_batchbuffer_free(lh.batch);
1649 
1650 	if (lh.context)
1651 		drm_intel_gem_context_destroy(lh.context);
1652 
1653 	if (lh.bufmgr)
1654 		drm_intel_bufmgr_destroy(lh.bufmgr);
1655 }
1656 
expected_report_timing_delta(uint32_t delta,uint32_t expected_delta)1657 static bool expected_report_timing_delta(uint32_t delta, uint32_t expected_delta)
1658 {
1659 	/*
1660 	 * On ICL, the OA unit appears to be a bit more relaxed about
1661 	 * its timing for emitting OA reports (often missing the
1662 	 * deadline by 1 timestamp).
1663 	 */
1664 	if (IS_ICELAKE(devid))
1665 		return delta <= (expected_delta + 3);
1666 	else
1667 		return delta <= expected_delta;
1668 }
1669 
1670 static void
test_oa_exponents(void)1671 test_oa_exponents(void)
1672 {
1673 	load_helper_init();
1674 	load_helper_run(HIGH);
1675 
1676 	/* It's asking a lot to sample with a 160 nanosecond period and the
1677 	 * test can fail due to buffer overflows if it wasn't possible to
1678 	 * keep up, so we don't start from an exponent of zero...
1679 	 */
1680 	for (int exponent = 5; exponent < 20; exponent++) {
1681 		uint64_t properties[] = {
1682 			/* Include OA reports in samples */
1683 			DRM_I915_PERF_PROP_SAMPLE_OA, true,
1684 
1685 			/* OA unit configuration */
1686 			DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1687 			DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1688 			DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
1689 		};
1690 		struct drm_i915_perf_open_param param = {
1691 			.flags = I915_PERF_FLAG_FD_CLOEXEC,
1692 			.num_properties = ARRAY_SIZE(properties) / 2,
1693 			.properties_ptr = to_user_pointer(properties),
1694 		};
1695 		uint64_t expected_timestamp_delta = 2ULL << exponent;
1696 		size_t format_size = get_oa_format(test_oa_format).size;
1697 		size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
1698 				      format_size);
1699 		int max_reports = MAX_OA_BUF_SIZE / format_size;
1700 		int buf_size = sample_size * max_reports * 1.5;
1701 		uint8_t *buf = calloc(1, buf_size);
1702 		int ret, n_timer_reports = 0;
1703 		uint32_t matches = 0;
1704 		struct {
1705 			uint32_t report[64];
1706 		} timer_reports[30];
1707 
1708 		igt_debug("testing OA exponent %d,"
1709 			  " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n",
1710 			  exponent, expected_timestamp_delta,
1711 			  oa_exponent_to_ns(exponent),
1712 			  oa_exponent_to_ns(exponent) / 1000.0,
1713 			  oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
1714 
1715 		stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
1716 
1717 		while (n_timer_reports < ARRAY_SIZE(timer_reports)) {
1718 			struct drm_i915_perf_record_header *header;
1719 
1720 			while ((ret = read(stream_fd, buf, buf_size)) < 0 &&
1721 			       errno == EINTR)
1722 				;
1723 
1724 			/* igt_debug(" > read %i bytes\n", ret); */
1725 
1726 			/* We should never have no data. */
1727 			igt_assert(ret > 0);
1728 
1729 			for (int offset = 0;
1730 			     offset < ret && n_timer_reports < ARRAY_SIZE(timer_reports);
1731 			     offset += header->size) {
1732 				uint32_t *report;
1733 
1734 				header = (void *)(buf + offset);
1735 
1736 				if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) {
1737 					igt_assert(!"reached");
1738 					break;
1739 				}
1740 
1741 				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
1742 					igt_debug("report loss\n");
1743 
1744 				if (header->type != DRM_I915_PERF_RECORD_SAMPLE)
1745 					continue;
1746 
1747 				report = (void *)(header + 1);
1748 
1749 				if (!oa_report_is_periodic(exponent, report))
1750 					continue;
1751 
1752 				memcpy(timer_reports[n_timer_reports].report, report,
1753 				       sizeof(timer_reports[n_timer_reports].report));
1754 				n_timer_reports++;
1755 			}
1756 		}
1757 
1758 		__perf_close(stream_fd);
1759 
1760 		igt_debug("report%04i ts=%08x hw_id=0x%08x\n", 0,
1761 			  timer_reports[0].report[1],
1762 			  oa_report_get_ctx_id(timer_reports[0].report));
1763 		for (int i = 1; i < n_timer_reports; i++) {
1764 			uint32_t delta =
1765 				timer_reports[i].report[1] - timer_reports[i - 1].report[1];
1766 
1767 			igt_debug("report%04i ts=%08x hw_id=0x%08x delta=%u %s\n", i,
1768 				  timer_reports[i].report[1],
1769 				  oa_report_get_ctx_id(timer_reports[i].report),
1770 				  delta, expected_report_timing_delta(delta,
1771 								      expected_timestamp_delta) ? "" : "******");
1772 
1773 			matches += expected_report_timing_delta(delta,expected_timestamp_delta);
1774 		}
1775 
1776 		igt_debug("matches=%u/%u\n", matches, n_timer_reports - 1);
1777 
1778 		/* Allow for a couple of errors. */
1779 		igt_assert_lte(n_timer_reports - 3, matches);
1780 	}
1781 
1782 	load_helper_stop();
1783 	load_helper_fini();
1784 }
1785 
1786 /* The OA exponent selects a timestamp counter bit to trigger reports on.
1787  *
1788  * With a 64bit timestamp and least significant bit approx == 80ns then the MSB
1789  * equates to > 40 thousand years and isn't exposed via the i915 perf interface.
1790  *
1791  * The max exponent exposed is expected to be 31, which is still a fairly
1792  * ridiculous period (>5min) but is the maximum exponent where it's still
1793  * possible to use periodic sampling as a means for tracking the overflow of
1794  * 32bit OA report timestamps.
1795  */
1796 static void
test_invalid_oa_exponent(void)1797 test_invalid_oa_exponent(void)
1798 {
1799 	uint64_t properties[] = {
1800 		/* Include OA reports in samples */
1801 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1802 
1803 		/* OA unit configuration */
1804 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1805 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1806 		DRM_I915_PERF_PROP_OA_EXPONENT, 31, /* maximum exponent expected
1807 						       to be accepted */
1808 	};
1809 	struct drm_i915_perf_open_param param = {
1810 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
1811 		.num_properties = sizeof(properties) / 16,
1812 		.properties_ptr = to_user_pointer(properties),
1813 	};
1814 
1815 	stream_fd = __perf_open(drm_fd, &param, false);
1816 
1817 	__perf_close(stream_fd);
1818 
1819 	for (int i = 32; i < 65; i++) {
1820 		properties[7] = i;
1821 		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EINVAL);
1822 	}
1823 }
1824 
1825 /* The lowest periodic sampling exponent equates to a period of 160 nanoseconds
1826  * or a frequency of 6.25MHz which is only possible to request as root by
1827  * default. By default the maximum OA sampling rate is 100KHz
1828  */
1829 static void
test_low_oa_exponent_permissions(void)1830 test_low_oa_exponent_permissions(void)
1831 {
1832 	int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
1833 	int bad_exponent = max_oa_exponent_for_freq_gt(max_freq);
1834 	int ok_exponent = bad_exponent + 1;
1835 	uint64_t properties[] = {
1836 		/* Include OA reports in samples */
1837 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1838 
1839 		/* OA unit configuration */
1840 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1841 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1842 		DRM_I915_PERF_PROP_OA_EXPONENT, bad_exponent,
1843 	};
1844 	struct drm_i915_perf_open_param param = {
1845 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
1846 		.num_properties = sizeof(properties) / 16,
1847 		.properties_ptr = to_user_pointer(properties),
1848 	};
1849 	uint64_t oa_period, oa_freq;
1850 
1851 	igt_assert_eq(max_freq, 100000);
1852 
1853 	/* Avoid EACCES errors opening a stream without CAP_SYS_ADMIN */
1854 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
1855 
1856 	igt_fork(child, 1) {
1857 		igt_drop_root();
1858 
1859 		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
1860 	}
1861 
1862 	igt_waitchildren();
1863 
1864 	properties[7] = ok_exponent;
1865 
1866 	igt_fork(child, 1) {
1867 		igt_drop_root();
1868 
1869 		stream_fd = __perf_open(drm_fd, &param, false);
1870 		__perf_close(stream_fd);
1871 	}
1872 
1873 	igt_waitchildren();
1874 
1875 	oa_period = timebase_scale(2 << ok_exponent);
1876 	oa_freq = NSEC_PER_SEC / oa_period;
1877 	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
1878 
1879 	igt_fork(child, 1) {
1880 		igt_drop_root();
1881 
1882 		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, &param, EACCES);
1883 	}
1884 
1885 	igt_waitchildren();
1886 
1887 	/* restore the defaults */
1888 	write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
1889 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1890 }
1891 
1892 static void
test_per_context_mode_unprivileged(void)1893 test_per_context_mode_unprivileged(void)
1894 {
1895 	uint64_t properties[] = {
1896 		/* Single context sampling */
1897 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
1898 
1899 		/* Include OA reports in samples */
1900 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1901 
1902 		/* OA unit configuration */
1903 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1904 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1905 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1906 	};
1907 	struct drm_i915_perf_open_param param = {
1908 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
1909 		.num_properties = sizeof(properties) / 16,
1910 		.properties_ptr = to_user_pointer(properties),
1911 	};
1912 
1913 	/* should be default, but just to be sure... */
1914 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1915 
1916 	igt_fork(child, 1) {
1917 		drm_intel_context *context;
1918 		drm_intel_bufmgr *bufmgr;
1919 		uint32_t ctx_id = 0xffffffff; /* invalid id */
1920 		int ret;
1921 
1922 		igt_drop_root();
1923 
1924 		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1925 		context = drm_intel_gem_context_create(bufmgr);
1926 
1927 		igt_assert(context);
1928 
1929 		ret = drm_intel_gem_context_get_id(context, &ctx_id);
1930 		igt_assert_eq(ret, 0);
1931 		igt_assert_neq(ctx_id, 0xffffffff);
1932 
1933 		properties[1] = ctx_id;
1934 
1935 		stream_fd = __perf_open(drm_fd, &param, false);
1936 		__perf_close(stream_fd);
1937 
1938 		drm_intel_gem_context_destroy(context);
1939 		drm_intel_bufmgr_destroy(bufmgr);
1940 	}
1941 
1942 	igt_waitchildren();
1943 }
1944 
1945 static int64_t
get_time(void)1946 get_time(void)
1947 {
1948 	struct timespec ts;
1949 
1950 	clock_gettime(CLOCK_MONOTONIC, &ts);
1951 
1952 	return ts.tv_sec * 1000000000 + ts.tv_nsec;
1953 }
1954 
1955 /* Note: The interface doesn't currently provide strict guarantees or control
1956  * over the upper bound for how long it might take for a POLLIN event after
1957  * some OA report is written by the OA unit.
1958  *
1959  * The plan is to add a property later that gives some control over the maximum
1960  * latency, but for now we expect it is tuned for a fairly low latency
1961  * suitable for applications wanting to provide live feedback for captured
1962  * metrics.
1963  *
1964  * At the time of writing this test the driver was using a fixed 200Hz hrtimer
1965  * regardless of the OA sampling exponent.
1966  *
1967  * There is no lower bound since a stream configured for periodic sampling may
1968  * still contain other automatically triggered reports.
1969  *
1970  * What we try and check for here is that blocking reads don't return EAGAIN
1971  * and that we aren't spending any significant time burning the cpu in
1972  * kernelspace.
1973  */
1974 static void
test_blocking(void)1975 test_blocking(void)
1976 {
1977 	/* ~40 milliseconds
1978 	 *
1979 	 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
1980 	 * scheduling (liable to kick in when we make blocking poll()s/reads)
1981 	 * from interfering with the test.
1982 	 */
1983 	int oa_exponent = max_oa_exponent_for_period_lte(40000000);
1984 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
1985 	uint64_t properties[] = {
1986 		/* Include OA reports in samples */
1987 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
1988 
1989 		/* OA unit configuration */
1990 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1991 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1992 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1993 	};
1994 	struct drm_i915_perf_open_param param = {
1995 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
1996 			I915_PERF_FLAG_DISABLED,
1997 		.num_properties = sizeof(properties) / 16,
1998 		.properties_ptr = to_user_pointer(properties),
1999 	};
2000 	uint8_t buf[1024 * 1024];
2001 	struct tms start_times;
2002 	struct tms end_times;
2003 	int64_t user_ns, kernel_ns;
2004 	int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
2005 	int64_t test_duration_ns = tick_ns * 1000;
2006 
2007 	int max_iterations = (test_duration_ns / oa_period) + 2;
2008 	int n_extra_iterations = 0;
2009 
2010 	/* It's a bit tricky to put a lower limit here, but we expect a
2011 	 * relatively low latency for seeing reports, while we don't currently
2012 	 * give any control over this in the api.
2013 	 *
2014 	 * We assume a maximum latency of 6 millisecond to deliver a POLLIN and
2015 	 * read() after a new sample is written (46ms per iteration) considering
2016 	 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
2017 	 * to check for data and giving some time to read().
2018 	 */
2019 	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
2020 
2021 	int64_t start, end;
2022 	int n = 0;
2023 
2024 	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
2025 
2026 	times(&start_times);
2027 
2028 	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d,"
2029 		  " estimated max iter. = %d, oa_period = %"PRIu64"ns\n",
2030 		  (int)tick_ns, test_duration_ns,
2031 		  min_iterations, max_iterations, oa_period);
2032 
2033 	/* In the loop we perform blocking polls while the HW is sampling at
2034 	 * ~25Hz, with the expectation that we spend most of our time blocked
2035 	 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
2036 	 * association with this process (verified by looking at stime before
2037 	 * and after loop).
2038 	 *
2039 	 * We're looking to assert that less than 1% of the test duration is
2040 	 * spent in the kernel dealing with polling and read()ing.
2041 	 *
2042 	 * The test runs for a relatively long time considering the very low
2043 	 * resolution of stime in ticks of typically 10 milliseconds. Since we
2044 	 * don't know the fractional part of tick values we read from userspace
2045 	 * so our minimum threshold needs to be >= one tick since any
2046 	 * measurement might really be +- tick_ns (assuming we effectively get
2047 	 * floor(real_stime)).
2048 	 *
2049 	 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
2050 	 *
2051 	 * Also enable the stream just before poll/read to minimize
2052 	 * the error delta.
2053 	 */
2054 	start = get_time();
2055 	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2056 	for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
2057 		struct drm_i915_perf_record_header *header;
2058 		bool timer_report_read = false;
2059 		bool non_timer_report_read = false;
2060 		int ret;
2061 
2062 		while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2063 		       errno == EINTR)
2064 			;
2065 
2066 		igt_assert(ret > 0);
2067 
2068 		/* For Haswell reports don't contain a well defined reason
2069 		 * field we so assume all reports to be 'periodic'. For gen8+
2070 		 * we want to to consider that the HW automatically writes some
2071 		 * non periodic reports (e.g. on context switch) which might
2072 		 * lead to more successful read()s than expected due to
2073 		 * periodic sampling and we don't want these extra reads to
2074 		 * cause the test to fail...
2075 		 */
2076 		if (intel_gen(devid) >= 8) {
2077 			for (int offset = 0; offset < ret; offset += header->size) {
2078 				header = (void *)(buf + offset);
2079 
2080 				if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
2081 					uint32_t *report = (void *)(header + 1);
2082 
2083 					if (oa_report_is_periodic(oa_exponent,
2084 								  report))
2085 						timer_report_read = true;
2086 					else
2087 						non_timer_report_read = true;
2088 				}
2089 			}
2090 		}
2091 
2092 		if (non_timer_report_read && !timer_report_read)
2093 			n_extra_iterations++;
2094 
2095 		n++;
2096 	}
2097 
2098 	times(&end_times);
2099 
2100 	/* Using nanosecond units is fairly silly here, given the tick in-
2101 	 * precision - ah well, it's consistent with the get_time() units.
2102 	 */
2103 	user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
2104 	kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
2105 
2106 	igt_debug("%d blocking reads during test with ~25Hz OA sampling (expect no more than %d)\n",
2107 		  n, max_iterations);
2108 	igt_debug("%d extra iterations seen, not related to periodic sampling (e.g. context switches)\n",
2109 		  n_extra_iterations);
2110 	igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
2111 		  user_ns, (int)tick_ns,
2112 		  (int)start_times.tms_utime, (int)end_times.tms_utime);
2113 	igt_debug("time in kernelspace = %"PRIu64"ns (+-%dns) (start stime = %d, end = %d)\n",
2114 		  kernel_ns, (int)tick_ns,
2115 		  (int)start_times.tms_stime, (int)end_times.tms_stime);
2116 
2117 	/* With completely broken blocking (but also not returning an error) we
2118 	 * could end up with an open loop,
2119 	 */
2120 	igt_assert(n <= (max_iterations + n_extra_iterations));
2121 
2122 	/* Make sure the driver is reporting new samples with a reasonably
2123 	 * low latency...
2124 	 */
2125 	igt_assert(n > (min_iterations + n_extra_iterations));
2126 
2127 	igt_assert(kernel_ns <= (test_duration_ns / 100ull));
2128 
2129 	__perf_close(stream_fd);
2130 }
2131 
2132 static void
test_polling(void)2133 test_polling(void)
2134 {
2135 	/* ~40 milliseconds
2136 	 *
2137 	 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
2138 	 * scheduling (liable to kick in when we make blocking poll()s/reads)
2139 	 * from interfering with the test.
2140 	 */
2141 	int oa_exponent = max_oa_exponent_for_period_lte(40000000);
2142 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2143 	uint64_t properties[] = {
2144 		/* Include OA reports in samples */
2145 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2146 
2147 		/* OA unit configuration */
2148 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2149 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2150 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2151 	};
2152 	struct drm_i915_perf_open_param param = {
2153 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
2154 			I915_PERF_FLAG_DISABLED |
2155 			I915_PERF_FLAG_FD_NONBLOCK,
2156 		.num_properties = sizeof(properties) / 16,
2157 		.properties_ptr = to_user_pointer(properties),
2158 	};
2159 	uint8_t buf[1024 * 1024];
2160 	struct tms start_times;
2161 	struct tms end_times;
2162 	int64_t user_ns, kernel_ns;
2163 	int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
2164 	int64_t test_duration_ns = tick_ns * 1000;
2165 
2166 	int max_iterations = (test_duration_ns / oa_period) + 2;
2167 	int n_extra_iterations = 0;
2168 
2169 	/* It's a bit tricky to put a lower limit here, but we expect a
2170 	 * relatively low latency for seeing reports, while we don't currently
2171 	 * give any control over this in the api.
2172 	 *
2173 	 * We assume a maximum latency of 6 millisecond to deliver a POLLIN and
2174 	 * read() after a new sample is written (46ms per iteration) considering
2175 	 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
2176 	 * to check for data and giving some time to read().
2177 	 */
2178 	int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
2179 	int64_t start, end;
2180 	int n = 0;
2181 
2182 	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
2183 
2184 	times(&start_times);
2185 
2186 	igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d, max iter. = %d\n",
2187 		  (int)tick_ns, test_duration_ns,
2188 		  min_iterations, max_iterations);
2189 
2190 	/* In the loop we perform blocking polls while the HW is sampling at
2191 	 * ~25Hz, with the expectation that we spend most of our time blocked
2192 	 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
2193 	 * association with this process (verified by looking at stime before
2194 	 * and after loop).
2195 	 *
2196 	 * We're looking to assert that less than 1% of the test duration is
2197 	 * spent in the kernel dealing with polling and read()ing.
2198 	 *
2199 	 * The test runs for a relatively long time considering the very low
2200 	 * resolution of stime in ticks of typically 10 milliseconds. Since we
2201 	 * don't know the fractional part of tick values we read from userspace
2202 	 * so our minimum threshold needs to be >= one tick since any
2203 	 * measurement might really be +- tick_ns (assuming we effectively get
2204 	 * floor(real_stime)).
2205 	 *
2206 	 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
2207 	 *
2208 	 * Also enable the stream just before poll/read to minimize
2209 	 * the error delta.
2210 	 */
2211 	start = get_time();
2212 	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2213 	for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
2214 		struct pollfd pollfd = { .fd = stream_fd, .events = POLLIN };
2215 		struct drm_i915_perf_record_header *header;
2216 		bool timer_report_read = false;
2217 		bool non_timer_report_read = false;
2218 		int ret;
2219 
2220 		while ((ret = poll(&pollfd, 1, -1)) < 0 &&
2221 		       errno == EINTR)
2222 			;
2223 		igt_assert_eq(ret, 1);
2224 		igt_assert(pollfd.revents & POLLIN);
2225 
2226 		while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2227 		       errno == EINTR)
2228 			;
2229 
2230 		/* Don't expect to see EAGAIN if we've had a POLLIN event
2231 		 *
2232 		 * XXX: actually this is technically overly strict since we do
2233 		 * knowingly allow false positive POLLIN events. At least in
2234 		 * the future when supporting context filtering of metrics for
2235 		 * Gen8+ handled in the kernel then POLLIN events may be
2236 		 * delivered when we know there are pending reports to process
2237 		 * but before we've done any filtering to know for certain that
2238 		 * any reports are destined to be copied to userspace.
2239 		 *
2240 		 * Still, for now it's a reasonable sanity check.
2241 		 */
2242 		if (ret < 0)
2243 			igt_debug("Unexpected error when reading after poll = %d\n", errno);
2244 		igt_assert_neq(ret, -1);
2245 
2246 		/* For Haswell reports don't contain a well defined reason
2247 		 * field we so assume all reports to be 'periodic'. For gen8+
2248 		 * we want to to consider that the HW automatically writes some
2249 		 * non periodic reports (e.g. on context switch) which might
2250 		 * lead to more successful read()s than expected due to
2251 		 * periodic sampling and we don't want these extra reads to
2252 		 * cause the test to fail...
2253 		 */
2254 		if (intel_gen(devid) >= 8) {
2255 			for (int offset = 0; offset < ret; offset += header->size) {
2256 				header = (void *)(buf + offset);
2257 
2258 				if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
2259 					uint32_t *report = (void *)(header + 1);
2260 
2261 					if (oa_report_is_periodic(oa_exponent, report))
2262 						timer_report_read = true;
2263 					else
2264 						non_timer_report_read = true;
2265 				}
2266 			}
2267 		}
2268 
2269 		if (non_timer_report_read && !timer_report_read)
2270 			n_extra_iterations++;
2271 
2272 		/* At this point, after consuming pending reports (and hoping
2273 		 * the scheduler hasn't stopped us for too long we now
2274 		 * expect EAGAIN on read.
2275 		 */
2276 		while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2277 		       errno == EINTR)
2278 			;
2279 		igt_assert_eq(ret, -1);
2280 		igt_assert_eq(errno, EAGAIN);
2281 
2282 		n++;
2283 	}
2284 
2285 	times(&end_times);
2286 
2287 	/* Using nanosecond units is fairly silly here, given the tick in-
2288 	 * precision - ah well, it's consistent with the get_time() units.
2289 	 */
2290 	user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
2291 	kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
2292 
2293 	igt_debug("%d blocking reads during test with ~25Hz OA sampling (expect no more than %d)\n",
2294 		  n, max_iterations);
2295 	igt_debug("%d extra iterations seen, not related to periodic sampling (e.g. context switches)\n",
2296 		  n_extra_iterations);
2297 	igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
2298 		  user_ns, (int)tick_ns,
2299 		  (int)start_times.tms_utime, (int)end_times.tms_utime);
2300 	igt_debug("time in kernelspace = %"PRIu64"ns (+-%dns) (start stime = %d, end = %d)\n",
2301 		  kernel_ns, (int)tick_ns,
2302 		  (int)start_times.tms_stime, (int)end_times.tms_stime);
2303 
2304 	/* With completely broken blocking while polling (but still somehow
2305 	 * reporting a POLLIN event) we could end up with an open loop.
2306 	 */
2307 	igt_assert(n <= (max_iterations + n_extra_iterations));
2308 
2309 	/* Make sure the driver is reporting new samples with a reasonably
2310 	 * low latency...
2311 	 */
2312 	igt_assert(n > (min_iterations + n_extra_iterations));
2313 
2314 	igt_assert(kernel_ns <= (test_duration_ns / 100ull));
2315 
2316 	__perf_close(stream_fd);
2317 }
2318 
2319 static void
test_buffer_fill(void)2320 test_buffer_fill(void)
2321 {
2322 	/* ~5 micro second period */
2323 	int oa_exponent = max_oa_exponent_for_period_lte(5000);
2324 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2325 	uint64_t properties[] = {
2326 		/* Include OA reports in samples */
2327 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2328 
2329 		/* OA unit configuration */
2330 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2331 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2332 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2333 	};
2334 	struct drm_i915_perf_open_param param = {
2335 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
2336 		.num_properties = sizeof(properties) / 16,
2337 		.properties_ptr = to_user_pointer(properties),
2338 	};
2339 	struct drm_i915_perf_record_header *header;
2340 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
2341 	uint8_t *buf = malloc(buf_size);
2342 	int len;
2343 	size_t oa_buf_size = MAX_OA_BUF_SIZE;
2344 	size_t report_size = get_oa_format(test_oa_format).size;
2345 	int n_full_oa_reports = oa_buf_size / report_size;
2346 	uint64_t fill_duration = n_full_oa_reports * oa_period;
2347 
2348 	igt_assert(fill_duration < 1000000000);
2349 
2350 	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
2351 
2352 	for (int i = 0; i < 5; i++) {
2353 		bool overflow_seen;
2354 		uint32_t n_periodic_reports;
2355 		uint32_t first_timestamp = 0, last_timestamp = 0;
2356 		uint32_t last_periodic_report[64];
2357 
2358 		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2359 
2360 		nanosleep(&(struct timespec){ .tv_sec = 0,
2361 					      .tv_nsec = fill_duration * 1.25 },
2362 			  NULL);
2363 
2364 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2365 			;
2366 
2367 		igt_assert_neq(len, -1);
2368 
2369 		overflow_seen = false;
2370 		for (int offset = 0; offset < len; offset += header->size) {
2371 			header = (void *)(buf + offset);
2372 
2373 			if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
2374 				overflow_seen = true;
2375 		}
2376 
2377 		igt_assert_eq(overflow_seen, true);
2378 
2379 		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2380 
2381 		igt_debug("fill_duration = %"PRIu64"ns, oa_exponent = %u\n",
2382 			  fill_duration, oa_exponent);
2383 
2384 		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2385 
2386 		nanosleep(&(struct timespec){ .tv_sec = 0,
2387 					.tv_nsec = fill_duration / 2 },
2388 			NULL);
2389 
2390 		n_periodic_reports = 0;
2391 
2392 		/* Because of the race condition between notification of new
2393 		 * reports and reports landing in memory, we need to rely on
2394 		 * timestamps to figure whether we've read enough of them.
2395 		 */
2396 		while (((last_timestamp - first_timestamp) * oa_period) < (fill_duration / 2)) {
2397 
2398 			igt_debug("dts=%u elapsed=%"PRIu64" duration=%"PRIu64"\n",
2399 				  last_timestamp - first_timestamp,
2400 				  (last_timestamp - first_timestamp) * oa_period,
2401 				  fill_duration / 2);
2402 
2403 			while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2404 				;
2405 
2406 			igt_assert_neq(len, -1);
2407 
2408 			for (int offset = 0; offset < len; offset += header->size) {
2409 				uint32_t *report;
2410 
2411 				header = (void *) (buf + offset);
2412 				report = (void *) (header + 1);
2413 
2414 				switch (header->type) {
2415 				case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
2416 					igt_debug("report loss, trying again\n");
2417 					break;
2418 				case DRM_I915_PERF_RECORD_SAMPLE:
2419 					igt_debug(" > report ts=%u"
2420 						  " ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x nb_periodic=%u\n",
2421 						  report[1],
2422 						  n_periodic_reports > 0 ? report[1] - last_periodic_report[1] : 0,
2423 						  oa_report_is_periodic(oa_exponent, report),
2424 						  oa_report_get_ctx_id(report),
2425 						  n_periodic_reports);
2426 
2427 					if (first_timestamp == 0)
2428 						first_timestamp = report[1];
2429 					last_timestamp = report[1];
2430 
2431 					if (oa_report_is_periodic(oa_exponent, report)) {
2432 						memcpy(last_periodic_report, report,
2433 						       sizeof(last_periodic_report));
2434 						n_periodic_reports++;
2435 					}
2436 					break;
2437 				case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
2438 					igt_assert(!"unexpected overflow");
2439 					break;
2440 				}
2441 			}
2442 		}
2443 
2444 		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2445 
2446 		igt_debug("%f < %zu < %f\n",
2447 			  report_size * n_full_oa_reports * 0.45,
2448 			  n_periodic_reports * report_size,
2449 			  report_size * n_full_oa_reports * 0.55);
2450 
2451 		igt_assert(n_periodic_reports * report_size >
2452 			   report_size * n_full_oa_reports * 0.45);
2453 		igt_assert(n_periodic_reports * report_size <
2454 			   report_size * n_full_oa_reports * 0.55);
2455 	}
2456 
2457 	free(buf);
2458 
2459 	__perf_close(stream_fd);
2460 }
2461 
2462 static void
test_enable_disable(void)2463 test_enable_disable(void)
2464 {
2465 	/* ~5 micro second period */
2466 	int oa_exponent = max_oa_exponent_for_period_lte(5000);
2467 	uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2468 	uint64_t properties[] = {
2469 		/* Include OA reports in samples */
2470 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2471 
2472 		/* OA unit configuration */
2473 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2474 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2475 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2476 	};
2477 	struct drm_i915_perf_open_param param = {
2478 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
2479 			 I915_PERF_FLAG_DISABLED, /* Verify we start disabled */
2480 		.num_properties = sizeof(properties) / 16,
2481 		.properties_ptr = to_user_pointer(properties),
2482 	};
2483 	int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
2484 	uint8_t *buf = malloc(buf_size);
2485 	size_t oa_buf_size = MAX_OA_BUF_SIZE;
2486 	size_t report_size = get_oa_format(test_oa_format).size;
2487 	int n_full_oa_reports = oa_buf_size / report_size;
2488 	uint64_t fill_duration = n_full_oa_reports * oa_period;
2489 
2490 	load_helper_init();
2491 	load_helper_run(HIGH);
2492 
2493 	stream_fd = __perf_open(drm_fd, &param, true /* prevent_pm */);
2494 
2495 	for (int i = 0; i < 5; i++) {
2496 		int len;
2497 		uint32_t n_periodic_reports;
2498 		struct drm_i915_perf_record_header *header;
2499 		uint32_t first_timestamp = 0, last_timestamp = 0;
2500 		uint32_t last_periodic_report[64];
2501 
2502 		/* Giving enough time for an overflow might help catch whether
2503 		 * the OA unit has been enabled even if the driver might at
2504 		 * least avoid copying reports while disabled.
2505 		 */
2506 		nanosleep(&(struct timespec){ .tv_sec = 0,
2507 					      .tv_nsec = fill_duration * 1.25 },
2508 			  NULL);
2509 
2510 		while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2511 			;
2512 
2513 		igt_assert_eq(len, -1);
2514 		igt_assert_eq(errno, EIO);
2515 
2516 		do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2517 
2518 		nanosleep(&(struct timespec){ .tv_sec = 0,
2519 					      .tv_nsec = fill_duration / 2 },
2520 			NULL);
2521 
2522 		n_periodic_reports = 0;
2523 
2524 		/* Because of the race condition between notification of new
2525 		 * reports and reports landing in memory, we need to rely on
2526 		 * timestamps to figure whether we've read enough of them.
2527 		 */
2528 		while (((last_timestamp - first_timestamp) * oa_period) < (fill_duration / 2)) {
2529 
2530 			while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2531 				;
2532 
2533 			igt_assert_neq(len, -1);
2534 
2535 			for (int offset = 0; offset < len; offset += header->size) {
2536 				uint32_t *report;
2537 
2538 				header = (void *) (buf + offset);
2539 				report = (void *) (header + 1);
2540 
2541 				switch (header->type) {
2542 				case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
2543 					break;
2544 				case DRM_I915_PERF_RECORD_SAMPLE:
2545 					if (first_timestamp == 0)
2546 						first_timestamp = report[1];
2547 					last_timestamp = report[1];
2548 
2549 					igt_debug(" > report ts=%8x"
2550 						  " ts_delta_last_periodic=%s%8u"
2551 						  " is_timer=%i ctx_id=0x%8x\n",
2552 						  report[1],
2553 						  oa_report_is_periodic(oa_exponent, report) ? " " : "*",
2554 						  n_periodic_reports > 0 ? (report[1] - last_periodic_report[1]) : 0,
2555 						  oa_report_is_periodic(oa_exponent, report),
2556 						  oa_report_get_ctx_id(report));
2557 
2558 					if (oa_report_is_periodic(oa_exponent, report)) {
2559 						memcpy(last_periodic_report, report,
2560 						       sizeof(last_periodic_report));
2561 
2562 						/* We want to measure only the
2563 						 * periodic reports, ctx-switch
2564 						 * might inflate the content of
2565 						 * the buffer and skew or
2566 						 * measurement.
2567 						 */
2568 						n_periodic_reports++;
2569 					}
2570 					break;
2571 				case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
2572 					igt_assert(!"unexpected overflow");
2573 					break;
2574 				}
2575 			}
2576 
2577 		}
2578 
2579 		do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2580 
2581 		igt_debug("%f < %zu < %f\n",
2582 			  report_size * n_full_oa_reports * 0.45,
2583 			  n_periodic_reports * report_size,
2584 			  report_size * n_full_oa_reports * 0.55);
2585 
2586 		igt_assert((n_periodic_reports * report_size) >
2587 			   (report_size * n_full_oa_reports * 0.45));
2588 		igt_assert((n_periodic_reports * report_size) <
2589 			   report_size * n_full_oa_reports * 0.55);
2590 
2591 
2592 		/* It's considered an error to read a stream while it's disabled
2593 		 * since it would block indefinitely...
2594 		 */
2595 		len = read(stream_fd, buf, buf_size);
2596 
2597 		igt_assert_eq(len, -1);
2598 		igt_assert_eq(errno, EIO);
2599 	}
2600 
2601 	free(buf);
2602 
2603 	__perf_close(stream_fd);
2604 
2605 	load_helper_stop();
2606 	load_helper_fini();
2607 }
2608 
2609 static void
test_short_reads(void)2610 test_short_reads(void)
2611 {
2612 	int oa_exponent = max_oa_exponent_for_period_lte(5000);
2613 	uint64_t properties[] = {
2614 		/* Include OA reports in samples */
2615 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2616 
2617 		/* OA unit configuration */
2618 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2619 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2620 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2621 	};
2622 	struct drm_i915_perf_open_param param = {
2623 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
2624 		.num_properties = sizeof(properties) / 16,
2625 		.properties_ptr = to_user_pointer(properties),
2626 	};
2627 	size_t record_size = 256 + sizeof(struct drm_i915_perf_record_header);
2628 	size_t page_size = sysconf(_SC_PAGE_SIZE);
2629 	int zero_fd = open("/dev/zero", O_RDWR|O_CLOEXEC);
2630 	uint8_t *pages = mmap(NULL, page_size * 2,
2631 			      PROT_READ|PROT_WRITE, MAP_PRIVATE, zero_fd, 0);
2632 	struct drm_i915_perf_record_header *header;
2633 	int ret;
2634 
2635 	igt_assert_neq(zero_fd, -1);
2636 	close(zero_fd);
2637 	zero_fd = -1;
2638 
2639 	igt_assert(pages);
2640 
2641 	ret = mprotect(pages + page_size, page_size, PROT_NONE);
2642 	igt_assert_eq(ret, 0);
2643 
2644 	stream_fd = __perf_open(drm_fd, &param, false);
2645 
2646 	nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 5000000 }, NULL);
2647 
2648 	/* At this point there should be lots of pending reports to read */
2649 
2650 	/* A read that can return at least one record should result in a short
2651 	 * read not an EFAULT if the buffer is smaller than the requested read
2652 	 * size...
2653 	 *
2654 	 * Expect to see a sample record here, but at least skip over any
2655 	 * _RECORD_LOST notifications.
2656 	 */
2657 	do {
2658 		header = (void *)(pages + page_size - record_size);
2659 		ret = read(stream_fd,
2660 			   header,
2661 			   page_size);
2662 		igt_assert(ret > 0);
2663 	} while (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
2664 
2665 	igt_assert_eq(ret, record_size);
2666 
2667 	/* A read that can't return a single record because it would result
2668 	 * in a fault on buffer overrun should result in an EFAULT error...
2669 	 */
2670 	ret = read(stream_fd, pages + page_size - 16, page_size);
2671 	igt_assert_eq(ret, -1);
2672 	igt_assert_eq(errno, EFAULT);
2673 
2674 	/* A read that can't return a single record because the buffer is too
2675 	 * small should result in an ENOSPC error..
2676 	 *
2677 	 * Again, skip over _RECORD_LOST records (smaller than record_size/2)
2678 	 */
2679 	do {
2680 		header = (void *)(pages + page_size - record_size / 2);
2681 		ret = read(stream_fd,
2682 			   header,
2683 			   record_size / 2);
2684 	} while (ret > 0 && header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
2685 
2686 	igt_assert_eq(ret, -1);
2687 	igt_assert_eq(errno, ENOSPC);
2688 
2689 	__perf_close(stream_fd);
2690 
2691 	munmap(pages, page_size * 2);
2692 }
2693 
2694 static void
test_non_sampling_read_error(void)2695 test_non_sampling_read_error(void)
2696 {
2697 	uint64_t properties[] = {
2698 		/* XXX: even without periodic sampling we have to
2699 		 * specify at least one sample layout property...
2700 		 */
2701 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2702 
2703 		/* OA unit configuration */
2704 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2705 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2706 
2707 		/* XXX: no sampling exponent */
2708 	};
2709 	struct drm_i915_perf_open_param param = {
2710 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
2711 		.num_properties = sizeof(properties) / 16,
2712 		.properties_ptr = to_user_pointer(properties),
2713 	};
2714 	int ret;
2715 	uint8_t buf[1024];
2716 
2717 	stream_fd = __perf_open(drm_fd, &param, false);
2718 
2719 	ret = read(stream_fd, buf, sizeof(buf));
2720 	igt_assert_eq(ret, -1);
2721 	igt_assert_eq(errno, EIO);
2722 
2723 	__perf_close(stream_fd);
2724 }
2725 
2726 /* Check that attempts to read from a stream while it is disable will return
2727  * EIO instead of blocking indefinitely.
2728  */
2729 static void
test_disabled_read_error(void)2730 test_disabled_read_error(void)
2731 {
2732 	int oa_exponent = 5; /* 5 micro seconds */
2733 	uint64_t properties[] = {
2734 		/* XXX: even without periodic sampling we have to
2735 		 * specify at least one sample layout property...
2736 		 */
2737 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2738 
2739 		/* OA unit configuration */
2740 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2741 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2742 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2743 	};
2744 	struct drm_i915_perf_open_param param = {
2745 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
2746 			 I915_PERF_FLAG_DISABLED, /* XXX: open disabled */
2747 		.num_properties = sizeof(properties) / 16,
2748 		.properties_ptr = to_user_pointer(properties),
2749 	};
2750 	uint32_t oa_report0[64];
2751 	uint32_t oa_report1[64];
2752 	uint32_t buf[128] = { 0 };
2753 	int ret;
2754 
2755 	stream_fd = __perf_open(drm_fd, &param, false);
2756 
2757 	ret = read(stream_fd, buf, sizeof(buf));
2758 	igt_assert_eq(ret, -1);
2759 	igt_assert_eq(errno, EIO);
2760 
2761 	__perf_close(stream_fd);
2762 
2763 
2764 	param.flags &= ~I915_PERF_FLAG_DISABLED;
2765 	stream_fd = __perf_open(drm_fd, &param, false);
2766 
2767 	read_2_oa_reports(test_oa_format,
2768 			  oa_exponent,
2769 			  oa_report0,
2770 			  oa_report1,
2771 			  false); /* not just timer reports */
2772 
2773 	do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2774 
2775 	ret = read(stream_fd, buf, sizeof(buf));
2776 	igt_assert_eq(ret, -1);
2777 	igt_assert_eq(errno, EIO);
2778 
2779 	do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2780 
2781 	read_2_oa_reports(test_oa_format,
2782 			  oa_exponent,
2783 			  oa_report0,
2784 			  oa_report1,
2785 			  false); /* not just timer reports */
2786 
2787 	__perf_close(stream_fd);
2788 }
2789 
2790 static void
test_mi_rpc(void)2791 test_mi_rpc(void)
2792 {
2793 	uint64_t properties[] = {
2794 		/* Note: we have to specify at least one sample property even
2795 		 * though we aren't interested in samples in this case.
2796 		 */
2797 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2798 
2799 		/* OA unit configuration */
2800 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2801 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2802 
2803 		/* Note: no OA exponent specified in this case */
2804 	};
2805 	struct drm_i915_perf_open_param param = {
2806 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
2807 		.num_properties = sizeof(properties) / 16,
2808 		.properties_ptr = to_user_pointer(properties),
2809 	};
2810 	drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
2811 	drm_intel_context *context;
2812 	struct intel_batchbuffer *batch;
2813 	drm_intel_bo *bo;
2814 	uint32_t *report32;
2815 	int ret;
2816 
2817 	stream_fd = __perf_open(drm_fd, &param, false);
2818 
2819 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
2820 
2821 	context = drm_intel_gem_context_create(bufmgr);
2822 	igt_assert(context);
2823 
2824 	batch = intel_batchbuffer_alloc(bufmgr, devid);
2825 
2826 	bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
2827 
2828 	ret = drm_intel_bo_map(bo, true);
2829 	igt_assert_eq(ret, 0);
2830 
2831 	memset(bo->virtual, 0x80, 4096);
2832 	drm_intel_bo_unmap(bo);
2833 
2834 	emit_report_perf_count(batch,
2835 			       bo, /* dst */
2836 			       0, /* dst offset in bytes */
2837 			       0xdeadbeef); /* report ID */
2838 
2839 	intel_batchbuffer_flush_with_context(batch, context);
2840 
2841 	ret = drm_intel_bo_map(bo, false /* write enable */);
2842 	igt_assert_eq(ret, 0);
2843 
2844 	report32 = bo->virtual;
2845 	igt_assert_eq(report32[0], 0xdeadbeef); /* report ID */
2846 	igt_assert_neq(report32[1], 0); /* timestamp */
2847 
2848 	igt_assert_neq(report32[63], 0x80808080); /* end of report */
2849 	igt_assert_eq(report32[64], 0x80808080); /* after 256 byte report */
2850 
2851 	drm_intel_bo_unmap(bo);
2852 	drm_intel_bo_unreference(bo);
2853 	intel_batchbuffer_free(batch);
2854 	drm_intel_gem_context_destroy(context);
2855 	drm_intel_bufmgr_destroy(bufmgr);
2856 	__perf_close(stream_fd);
2857 }
2858 
2859 static void
emit_stall_timestamp_and_rpc(struct intel_batchbuffer * batch,drm_intel_bo * dst,int timestamp_offset,int report_dst_offset,uint32_t report_id)2860 emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
2861 			     drm_intel_bo *dst,
2862 			     int timestamp_offset,
2863 			     int report_dst_offset,
2864 			     uint32_t report_id)
2865 {
2866 	uint32_t pipe_ctl_flags = (PIPE_CONTROL_CS_STALL |
2867 				   PIPE_CONTROL_RENDER_TARGET_FLUSH |
2868 				   PIPE_CONTROL_WRITE_TIMESTAMP);
2869 
2870 	if (intel_gen(devid) >= 8) {
2871 		BEGIN_BATCH(5, 1);
2872 		OUT_BATCH(GFX_OP_PIPE_CONTROL | (6 - 2));
2873 		OUT_BATCH(pipe_ctl_flags);
2874 		OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2875 			  timestamp_offset);
2876 		OUT_BATCH(0); /* imm lower */
2877 		OUT_BATCH(0); /* imm upper */
2878 		ADVANCE_BATCH();
2879 	} else {
2880 		BEGIN_BATCH(5, 1);
2881 		OUT_BATCH(GFX_OP_PIPE_CONTROL | (5 - 2));
2882 		OUT_BATCH(pipe_ctl_flags);
2883 		OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2884 			  timestamp_offset);
2885 		OUT_BATCH(0); /* imm lower */
2886 		OUT_BATCH(0); /* imm upper */
2887 		ADVANCE_BATCH();
2888 	}
2889 
2890 	emit_report_perf_count(batch, dst, report_dst_offset, report_id);
2891 }
2892 
2893 /* Tests the INTEL_performance_query use case where an unprivileged process
2894  * should be able to configure the OA unit for per-context metrics (for a
2895  * context associated with that process' drm file descriptor) and the counters
2896  * should only relate to that specific context.
2897  *
2898  * Unfortunately only Haswell limits the progression of OA counters for a
2899  * single context and so this unit test is Haswell specific. For Gen8+ although
2900  * reports read via i915 perf can be filtered for a single context the counters
2901  * themselves always progress as global/system-wide counters affected by all
2902  * contexts.
2903  */
2904 static void
hsw_test_single_ctx_counters(void)2905 hsw_test_single_ctx_counters(void)
2906 {
2907 	uint64_t properties[] = {
2908 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
2909 
2910 		/* Note: we have to specify at least one sample property even
2911 		 * though we aren't interested in samples in this case
2912 		 */
2913 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
2914 
2915 		/* OA unit configuration */
2916 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2917 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2918 
2919 		/* Note: no OA exponent specified in this case */
2920 	};
2921 	struct drm_i915_perf_open_param param = {
2922 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
2923 		.num_properties = sizeof(properties) / 16,
2924 		.properties_ptr = to_user_pointer(properties),
2925 	};
2926 
2927 	/* should be default, but just to be sure... */
2928 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
2929 
2930 	igt_fork(child, 1) {
2931 		drm_intel_bufmgr *bufmgr;
2932 		drm_intel_context *context0, *context1;
2933 		struct intel_batchbuffer *batch;
2934 		struct igt_buf src[3], dst[3];
2935 		drm_intel_bo *bo;
2936 		uint32_t *report0_32, *report1_32;
2937 		uint64_t timestamp0_64, timestamp1_64;
2938 		uint32_t delta_ts64, delta_oa32;
2939 		uint64_t delta_ts64_ns, delta_oa32_ns;
2940 		uint32_t delta_delta;
2941 		int n_samples_written;
2942 		int width = 800;
2943 		int height = 600;
2944 		uint32_t ctx_id = 0xffffffff; /* invalid id */
2945 		int ret;
2946 
2947 		igt_drop_root();
2948 
2949 		bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
2950 		drm_intel_bufmgr_gem_enable_reuse(bufmgr);
2951 
2952 		for (int i = 0; i < ARRAY_SIZE(src); i++) {
2953 			scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
2954 			scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
2955 		}
2956 
2957 		batch = intel_batchbuffer_alloc(bufmgr, devid);
2958 
2959 		context0 = drm_intel_gem_context_create(bufmgr);
2960 		igt_assert(context0);
2961 
2962 		context1 = drm_intel_gem_context_create(bufmgr);
2963 		igt_assert(context1);
2964 
2965 		igt_debug("submitting warm up render_copy\n");
2966 
2967 		/* Submit some early, unmeasured, work to the context we want
2968 		 * to measure to try and catch issues with i915-perf
2969 		 * initializing the HW context ID for filtering.
2970 		 *
2971 		 * We do this because i915-perf single context filtering had
2972 		 * previously only relied on a hook into context pinning to
2973 		 * initialize the HW context ID, instead of also trying to
2974 		 * determine the HW ID while opening the stream, in case it
2975 		 * has already been pinned.
2976 		 *
2977 		 * This wasn't noticed by the previous unit test because we
2978 		 * were opening the stream while the context hadn't been
2979 		 * touched or pinned yet and so it worked out correctly to wait
2980 		 * for the pinning hook.
2981 		 *
2982 		 * Now a buggy version of i915-perf will fail to measure
2983 		 * anything for context0 once this initial render_copy() ends
2984 		 * up pinning the context since there won't ever be a pinning
2985 		 * hook callback.
2986 		 */
2987 		render_copy(batch,
2988 			    context0,
2989 			    &src[0], 0, 0, width, height,
2990 			    &dst[0], 0, 0);
2991 
2992 		ret = drm_intel_gem_context_get_id(context0, &ctx_id);
2993 		igt_assert_eq(ret, 0);
2994 		igt_assert_neq(ctx_id, 0xffffffff);
2995 		properties[1] = ctx_id;
2996 
2997 		intel_batchbuffer_flush_with_context(batch, context0);
2998 
2999 		scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
3000 		scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
3001 
3002 		igt_debug("opening i915-perf stream\n");
3003 		stream_fd = __perf_open(drm_fd, &param, false);
3004 
3005 		bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
3006 
3007 		ret = drm_intel_bo_map(bo, true /* write enable */);
3008 		igt_assert_eq(ret, 0);
3009 
3010 		memset(bo->virtual, 0x80, 4096);
3011 		drm_intel_bo_unmap(bo);
3012 
3013 		emit_stall_timestamp_and_rpc(batch,
3014 					     bo,
3015 					     512 /* timestamp offset */,
3016 					     0, /* report dst offset */
3017 					     0xdeadbeef); /* report id */
3018 
3019 		/* Explicitly flush here (even though the render_copy() call
3020 		 * will itself flush before/after the copy) to clarify that
3021 		 * that the PIPE_CONTROL + MI_RPC commands will be in a
3022 		 * separate batch from the copy.
3023 		 */
3024 		intel_batchbuffer_flush_with_context(batch, context0);
3025 
3026 		render_copy(batch,
3027 			    context0,
3028 			    &src[0], 0, 0, width, height,
3029 			    &dst[0], 0, 0);
3030 
3031 		/* Another redundant flush to clarify batch bo is free to reuse */
3032 		intel_batchbuffer_flush_with_context(batch, context0);
3033 
3034 		/* submit two copies on the other context to avoid a false
3035 		 * positive in case the driver somehow ended up filtering for
3036 		 * context1
3037 		 */
3038 		render_copy(batch,
3039 			    context1,
3040 			    &src[1], 0, 0, width, height,
3041 			    &dst[1], 0, 0);
3042 
3043 		render_copy(batch,
3044 			    context1,
3045 			    &src[2], 0, 0, width, height,
3046 			    &dst[2], 0, 0);
3047 
3048 		/* And another */
3049 		intel_batchbuffer_flush_with_context(batch, context1);
3050 
3051 		emit_stall_timestamp_and_rpc(batch,
3052 					     bo,
3053 					     520 /* timestamp offset */,
3054 					     256, /* report dst offset */
3055 					     0xbeefbeef); /* report id */
3056 
3057 		intel_batchbuffer_flush_with_context(batch, context0);
3058 
3059 		ret = drm_intel_bo_map(bo, false /* write enable */);
3060 		igt_assert_eq(ret, 0);
3061 
3062 		report0_32 = bo->virtual;
3063 		igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
3064 		igt_assert_neq(report0_32[1], 0); /* timestamp */
3065 
3066 		report1_32 = report0_32 + 64;
3067 		igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
3068 		igt_assert_neq(report1_32[1], 0); /* timestamp */
3069 
3070 		print_reports(report0_32, report1_32,
3071 			      lookup_format(test_oa_format));
3072 
3073 		/* A40 == N samples written to all render targets */
3074 		n_samples_written = report1_32[43] - report0_32[43];
3075 
3076 		igt_debug("n samples written = %d\n", n_samples_written);
3077 		igt_assert_eq(n_samples_written, width * height);
3078 
3079 		igt_debug("timestamp32 0 = %u\n", report0_32[1]);
3080 		igt_debug("timestamp32 1 = %u\n", report1_32[1]);
3081 
3082 		timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
3083 		timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
3084 
3085 		igt_debug("timestamp64 0 = %"PRIu64"\n", timestamp0_64);
3086 		igt_debug("timestamp64 1 = %"PRIu64"\n", timestamp1_64);
3087 
3088 		delta_ts64 = timestamp1_64 - timestamp0_64;
3089 		delta_oa32 = report1_32[1] - report0_32[1];
3090 
3091 		/* sanity check that we can pass the delta to timebase_scale */
3092 		igt_assert(delta_ts64 < UINT32_MAX);
3093 		delta_oa32_ns = timebase_scale(delta_oa32);
3094 		delta_ts64_ns = timebase_scale(delta_ts64);
3095 
3096 		igt_debug("ts32 delta = %u, = %uns\n",
3097 			  delta_oa32, (unsigned)delta_oa32_ns);
3098 		igt_debug("ts64 delta = %u, = %uns\n",
3099 			  delta_ts64, (unsigned)delta_ts64_ns);
3100 
3101 		/* The delta as calculated via the PIPE_CONTROL timestamp or
3102 		 * the OA report timestamps should be almost identical but
3103 		 * allow a 320 nanoseconds margin.
3104 		 */
3105 		delta_delta = delta_ts64_ns > delta_oa32_ns ?
3106 			(delta_ts64_ns - delta_oa32_ns) :
3107 			(delta_oa32_ns - delta_ts64_ns);
3108 		igt_assert(delta_delta <= 320);
3109 
3110 		for (int i = 0; i < ARRAY_SIZE(src); i++) {
3111 			drm_intel_bo_unreference(src[i].bo);
3112 			drm_intel_bo_unreference(dst[i].bo);
3113 		}
3114 
3115 		drm_intel_bo_unmap(bo);
3116 		drm_intel_bo_unreference(bo);
3117 		intel_batchbuffer_free(batch);
3118 		drm_intel_gem_context_destroy(context0);
3119 		drm_intel_gem_context_destroy(context1);
3120 		drm_intel_bufmgr_destroy(bufmgr);
3121 		__perf_close(stream_fd);
3122 	}
3123 
3124 	igt_waitchildren();
3125 }
3126 
3127 /* Tests the INTEL_performance_query use case where an unprivileged process
3128  * should be able to configure the OA unit for per-context metrics (for a
3129  * context associated with that process' drm file descriptor) and the counters
3130  * should only relate to that specific context.
3131  *
3132  * For Gen8+ although reports read via i915 perf can be filtered for a single
3133  * context the counters themselves always progress as global/system-wide
3134  * counters affected by all contexts. To support the INTEL_performance_query
3135  * use case on Gen8+ it's necessary to combine OABUFFER and
3136  * MI_REPORT_PERF_COUNT reports so that counter normalisation can take into
3137  * account context-switch reports and factor out any counter progression not
3138  * associated with the current context.
3139  */
3140 static void
gen8_test_single_ctx_render_target_writes_a_counter(void)3141 gen8_test_single_ctx_render_target_writes_a_counter(void)
3142 {
3143 	int oa_exponent = max_oa_exponent_for_period_lte(1000000);
3144 	uint64_t properties[] = {
3145 		DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
3146 
3147 		/* Note: we have to specify at least one sample property even
3148 		 * though we aren't interested in samples in this case
3149 		 */
3150 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
3151 
3152 		/* OA unit configuration */
3153 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
3154 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3155 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
3156 
3157 		/* Note: no OA exponent specified in this case */
3158 	};
3159 	struct drm_i915_perf_open_param param = {
3160 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
3161 		.num_properties = ARRAY_SIZE(properties) / 2,
3162 		.properties_ptr = to_user_pointer(properties),
3163 	};
3164 	size_t format_size = get_oa_format(test_oa_format).size;
3165 	size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
3166 			      format_size);
3167 	int max_reports = MAX_OA_BUF_SIZE / format_size;
3168 	int buf_size = sample_size * max_reports * 1.5;
3169 	int child_ret;
3170 	uint8_t *buf = malloc(buf_size);
3171 	ssize_t len;
3172 	struct igt_helper_process child = {};
3173 
3174 	/* should be default, but just to be sure... */
3175 	write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
3176 
3177 	do {
3178 
3179 		igt_fork_helper(&child) {
3180 			struct drm_i915_perf_record_header *header;
3181 			drm_intel_bufmgr *bufmgr;
3182 			drm_intel_context *context0, *context1;
3183 			struct intel_batchbuffer *batch;
3184 			struct igt_buf src[3], dst[3];
3185 			drm_intel_bo *bo;
3186 			uint32_t *report0_32, *report1_32;
3187 			uint32_t *prev, *lprev = NULL;
3188 			uint64_t timestamp0_64, timestamp1_64;
3189 			uint32_t delta_ts64, delta_oa32;
3190 			uint64_t delta_ts64_ns, delta_oa32_ns;
3191 			uint32_t delta_delta;
3192 			int width = 800;
3193 			int height = 600;
3194 			uint32_t ctx_id = 0xffffffff; /* invalid handle */
3195 			uint32_t ctx1_id = 0xffffffff;  /* invalid handle */
3196 			uint32_t current_ctx_id = 0xffffffff;
3197 			uint32_t n_invalid_ctx = 0;
3198 			int ret;
3199 			struct accumulator accumulator = {
3200 				.format = test_oa_format
3201 			};
3202 
3203 			bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
3204 			drm_intel_bufmgr_gem_enable_reuse(bufmgr);
3205 
3206 			for (int i = 0; i < ARRAY_SIZE(src); i++) {
3207 				scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
3208 				scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
3209 			}
3210 
3211 			batch = intel_batchbuffer_alloc(bufmgr, devid);
3212 
3213 			context0 = drm_intel_gem_context_create(bufmgr);
3214 			igt_assert(context0);
3215 
3216 			context1 = drm_intel_gem_context_create(bufmgr);
3217 			igt_assert(context1);
3218 
3219 			igt_debug("submitting warm up render_copy\n");
3220 
3221 			/* Submit some early, unmeasured, work to the context we want
3222 			 * to measure to try and catch issues with i915-perf
3223 			 * initializing the HW context ID for filtering.
3224 			 *
3225 			 * We do this because i915-perf single context filtering had
3226 			 * previously only relied on a hook into context pinning to
3227 			 * initialize the HW context ID, instead of also trying to
3228 			 * determine the HW ID while opening the stream, in case it
3229 			 * has already been pinned.
3230 			 *
3231 			 * This wasn't noticed by the previous unit test because we
3232 			 * were opening the stream while the context hadn't been
3233 			 * touched or pinned yet and so it worked out correctly to wait
3234 			 * for the pinning hook.
3235 			 *
3236 			 * Now a buggy version of i915-perf will fail to measure
3237 			 * anything for context0 once this initial render_copy() ends
3238 			 * up pinning the context since there won't ever be a pinning
3239 			 * hook callback.
3240 			 */
3241 			render_copy(batch,
3242 				    context0,
3243 				    &src[0], 0, 0, width, height,
3244 				    &dst[0], 0, 0);
3245 
3246 			ret = drm_intel_gem_context_get_id(context0, &ctx_id);
3247 			igt_assert_eq(ret, 0);
3248 			igt_assert_neq(ctx_id, 0xffffffff);
3249 			properties[1] = ctx_id;
3250 
3251 			scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
3252 			scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
3253 
3254 			igt_debug("opening i915-perf stream\n");
3255 			stream_fd = __perf_open(drm_fd, &param, false);
3256 
3257 			bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
3258 
3259 			ret = drm_intel_bo_map(bo, true /* write enable */);
3260 			igt_assert_eq(ret, 0);
3261 
3262 			memset(bo->virtual, 0x80, 4096);
3263 			drm_intel_bo_unmap(bo);
3264 
3265 			emit_stall_timestamp_and_rpc(batch,
3266 						     bo,
3267 						     512 /* timestamp offset */,
3268 						     0, /* report dst offset */
3269 						     0xdeadbeef); /* report id */
3270 
3271 			/* Explicitly flush here (even though the render_copy() call
3272 			 * will itself flush before/after the copy) to clarify that
3273 			 * that the PIPE_CONTROL + MI_RPC commands will be in a
3274 			 * separate batch from the copy.
3275 			 */
3276 			intel_batchbuffer_flush_with_context(batch, context0);
3277 
3278 			render_copy(batch,
3279 				    context0,
3280 				    &src[0], 0, 0, width, height,
3281 				    &dst[0], 0, 0);
3282 
3283 			/* Another redundant flush to clarify batch bo is free to reuse */
3284 			intel_batchbuffer_flush_with_context(batch, context0);
3285 
3286 			/* submit two copies on the other context to avoid a false
3287 			 * positive in case the driver somehow ended up filtering for
3288 			 * context1
3289 			 */
3290 			render_copy(batch,
3291 				    context1,
3292 				    &src[1], 0, 0, width, height,
3293 				    &dst[1], 0, 0);
3294 
3295 			ret = drm_intel_gem_context_get_id(context1, &ctx1_id);
3296 			igt_assert_eq(ret, 0);
3297 			igt_assert_neq(ctx1_id, 0xffffffff);
3298 
3299 			render_copy(batch,
3300 				    context1,
3301 				    &src[2], 0, 0, width, height,
3302 				    &dst[2], 0, 0);
3303 
3304 			/* And another */
3305 			intel_batchbuffer_flush_with_context(batch, context1);
3306 
3307 			emit_stall_timestamp_and_rpc(batch,
3308 						     bo,
3309 						     520 /* timestamp offset */,
3310 						     256, /* report dst offset */
3311 						     0xbeefbeef); /* report id */
3312 
3313 			intel_batchbuffer_flush_with_context(batch, context1);
3314 
3315 			ret = drm_intel_bo_map(bo, false /* write enable */);
3316 			igt_assert_eq(ret, 0);
3317 
3318 			report0_32 = bo->virtual;
3319 			igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
3320 			igt_assert_neq(report0_32[1], 0); /* timestamp */
3321 			prev = report0_32;
3322 			ctx_id = prev[2];
3323 			igt_debug("MI_RPC(start) CTX ID: %u\n", ctx_id);
3324 
3325 			report1_32 = report0_32 + 64; /* 64 uint32_t = 256bytes offset */
3326 			igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
3327 			igt_assert_neq(report1_32[1], 0); /* timestamp */
3328 			ctx1_id = report1_32[2];
3329 
3330 			memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
3331 			accumulate_reports(&accumulator, report0_32, report1_32);
3332 			igt_debug("total: A0 = %"PRIu64", A21 = %"PRIu64", A26 = %"PRIu64"\n",
3333 				  accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3334 				  accumulator.deltas[2 + 21],
3335 				  accumulator.deltas[2 + 26]);
3336 
3337 			igt_debug("oa_timestamp32 0 = %u\n", report0_32[1]);
3338 			igt_debug("oa_timestamp32 1 = %u\n", report1_32[1]);
3339 			igt_debug("ctx_id 0 = %u\n", report0_32[2]);
3340 			igt_debug("ctx_id 1 = %u\n", report1_32[2]);
3341 
3342 			timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
3343 			timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
3344 
3345 			igt_debug("ts_timestamp64 0 = %"PRIu64"\n", timestamp0_64);
3346 			igt_debug("ts_timestamp64 1 = %"PRIu64"\n", timestamp1_64);
3347 
3348 			delta_ts64 = timestamp1_64 - timestamp0_64;
3349 			delta_oa32 = report1_32[1] - report0_32[1];
3350 
3351 			/* sanity check that we can pass the delta to timebase_scale */
3352 			igt_assert(delta_ts64 < UINT32_MAX);
3353 			delta_oa32_ns = timebase_scale(delta_oa32);
3354 			delta_ts64_ns = timebase_scale(delta_ts64);
3355 
3356 			igt_debug("oa32 delta = %u, = %uns\n",
3357 				  delta_oa32, (unsigned)delta_oa32_ns);
3358 			igt_debug("ts64 delta = %u, = %uns\n",
3359 				  delta_ts64, (unsigned)delta_ts64_ns);
3360 
3361 			/* The delta as calculated via the PIPE_CONTROL timestamp or
3362 			 * the OA report timestamps should be almost identical but
3363 			 * allow a 500 nanoseconds margin.
3364 			 */
3365 			delta_delta = delta_ts64_ns > delta_oa32_ns ?
3366 				(delta_ts64_ns - delta_oa32_ns) :
3367 				(delta_oa32_ns - delta_ts64_ns);
3368 			if (delta_delta > 500) {
3369 				igt_debug("skipping\n");
3370 				exit(EAGAIN);
3371 			}
3372 
3373 			len = i915_read_reports_until_timestamp(test_oa_format,
3374 								buf, buf_size,
3375 								report0_32[1],
3376 								report1_32[1]);
3377 
3378 			igt_assert(len > 0);
3379 			igt_debug("read %d bytes\n", (int)len);
3380 
3381 			memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
3382 
3383 			for (size_t offset = 0; offset < len; offset += header->size) {
3384 				uint32_t *report;
3385 				uint32_t reason;
3386 				const char *skip_reason = NULL, *report_reason = NULL;
3387 				struct accumulator laccumulator = {
3388 					.format = test_oa_format
3389 				};
3390 
3391 
3392 				header = (void *)(buf + offset);
3393 
3394 				igt_assert_eq(header->pad, 0); /* Reserved */
3395 
3396 				/* Currently the only test that should ever expect to
3397 				 * see a _BUFFER_LOST error is the buffer_fill test,
3398 				 * otherwise something bad has probably happened...
3399 				 */
3400 				igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
3401 
3402 				/* At high sampling frequencies the OA HW might not be
3403 				 * able to cope with all write requests and will notify
3404 				 * us that a report was lost.
3405 				 *
3406 				 * XXX: we should maybe restart the test in this case?
3407 				 */
3408 				if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
3409 					igt_debug("OA trigger collision / report lost\n");
3410 					exit(EAGAIN);
3411 				}
3412 
3413 				/* Currently the only other record type expected is a
3414 				 * _SAMPLE. Notably this test will need updating if
3415 				 * i915-perf is extended in the future with additional
3416 				 * record types.
3417 				 */
3418 				igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
3419 
3420 				igt_assert_eq(header->size, sample_size);
3421 
3422 				report = (void *)(header + 1);
3423 
3424 				/* Don't expect zero for timestamps */
3425 				igt_assert_neq(report[1], 0);
3426 
3427 				igt_debug("report %p:\n", report);
3428 
3429 				/* Discard reports not contained in between the
3430 				 * timestamps we're looking at. */
3431 				{
3432 					uint32_t time_delta = report[1] - report0_32[1];
3433 
3434 					if (timebase_scale(time_delta) > 1000000000) {
3435 						skip_reason = "prior first mi-rpc";
3436 					}
3437 				}
3438 
3439 				{
3440 					uint32_t time_delta = report[1] - report1_32[1];
3441 
3442 					if (timebase_scale(time_delta) <= 1000000000) {
3443 						igt_debug("    comes after last MI_RPC (%u)\n",
3444 							  report1_32[1]);
3445 						report = report1_32;
3446 					}
3447 				}
3448 
3449 				/* Print out deltas for a few significant
3450 				 * counters for each report. */
3451 				if (lprev) {
3452 					memset(laccumulator.deltas, 0, sizeof(laccumulator.deltas));
3453 					accumulate_reports(&laccumulator, lprev, report);
3454 					igt_debug("    deltas: A0=%"PRIu64" A21=%"PRIu64", A26=%"PRIu64"\n",
3455 						  laccumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3456 						  laccumulator.deltas[2 + 21],
3457 						  laccumulator.deltas[2 + 26]);
3458 				}
3459 				lprev = report;
3460 
3461 				/* Print out reason for the report. */
3462 				reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
3463 					  OAREPORT_REASON_MASK);
3464 
3465 				if (reason & OAREPORT_REASON_CTX_SWITCH) {
3466 					report_reason = "ctx-load";
3467 				} else if (reason & OAREPORT_REASON_TIMER) {
3468 					report_reason = "timer";
3469 				} else if (reason & OAREPORT_REASON_INTERNAL ||
3470 					   reason & OAREPORT_REASON_GO ||
3471 					   reason & OAREPORT_REASON_CLK_RATIO) {
3472 					report_reason = "internal/go/clk-ratio";
3473 				} else {
3474 					report_reason = "end-mi-rpc";
3475 				}
3476 				igt_debug("    ctx_id=%u/%x reason=%s oa_timestamp32=%u\n",
3477 					  report[2], report[2], report_reason, report[1]);
3478 
3479 				/* Should we skip this report?
3480 				 *
3481 				 *   Only if the current context id of
3482 				 *   the stream is not the one we want
3483 				 *   to measure.
3484 				 */
3485 				if (current_ctx_id != ctx_id) {
3486 					skip_reason = "not our context";
3487 				}
3488 
3489 				if (n_invalid_ctx > 1) {
3490 					skip_reason = "too many invalid context events";
3491 				}
3492 
3493 				if (!skip_reason) {
3494 					accumulate_reports(&accumulator, prev, report);
3495 					igt_debug(" -> Accumulated deltas A0=%"PRIu64" A21=%"PRIu64", A26=%"PRIu64"\n",
3496 						  accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3497 						  accumulator.deltas[2 + 21],
3498 						  accumulator.deltas[2 + 26]);
3499 				} else {
3500 					igt_debug(" -> Skipping: %s\n", skip_reason);
3501 				}
3502 
3503 
3504 				/* Finally update current-ctx_id, only possible
3505 				 * with a valid context id. */
3506 				if (oa_report_ctx_is_valid(report)) {
3507 					current_ctx_id = report[2];
3508 					n_invalid_ctx = 0;
3509 				} else {
3510 					n_invalid_ctx++;
3511 				}
3512 
3513 				prev = report;
3514 
3515 				if (report == report1_32) {
3516 					igt_debug("Breaking on end of report\n");
3517 					print_reports(report0_32, report1_32,
3518 						      lookup_format(test_oa_format));
3519 					break;
3520 				}
3521 			}
3522 
3523 			igt_debug("n samples written = %"PRIu64"/%"PRIu64" (%ix%i)\n",
3524 				  accumulator.deltas[2 + 21],/* skip timestamp + clock cycles */
3525 				  accumulator.deltas[2 + 26],
3526 				  width, height);
3527 			accumulator_print(&accumulator, "filtered");
3528 
3529 			ret = drm_intel_bo_map(src[0].bo, false /* write enable */);
3530 			igt_assert_eq(ret, 0);
3531 			ret = drm_intel_bo_map(dst[0].bo, false /* write enable */);
3532 			igt_assert_eq(ret, 0);
3533 
3534 			ret = memcmp(src[0].bo->virtual, dst[0].bo->virtual, 4 * width * height);
3535 			if (ret != 0) {
3536 				accumulator_print(&accumulator, "total");
3537 				/* This needs to be investigated... From time
3538 				 * to time, the work we kick off doesn't seem
3539 				 * to happen. WTH?? */
3540 				exit(EAGAIN);
3541 			}
3542 
3543 			drm_intel_bo_unmap(src[0].bo);
3544 			drm_intel_bo_unmap(dst[0].bo);
3545 
3546 			igt_assert_eq(accumulator.deltas[2 + 26], width * height);
3547 
3548 			for (int i = 0; i < ARRAY_SIZE(src); i++) {
3549 				drm_intel_bo_unreference(src[i].bo);
3550 				drm_intel_bo_unreference(dst[i].bo);
3551 			}
3552 
3553 			drm_intel_bo_unmap(bo);
3554 			drm_intel_bo_unreference(bo);
3555 			intel_batchbuffer_free(batch);
3556 			drm_intel_gem_context_destroy(context0);
3557 			drm_intel_gem_context_destroy(context1);
3558 			drm_intel_bufmgr_destroy(bufmgr);
3559 			__perf_close(stream_fd);
3560 		}
3561 
3562 		child_ret = igt_wait_helper(&child);
3563 
3564 		igt_assert(WEXITSTATUS(child_ret) == EAGAIN ||
3565 			   WEXITSTATUS(child_ret) == 0);
3566 
3567 	} while (WEXITSTATUS(child_ret) == EAGAIN);
3568 }
3569 
rc6_residency_ms(void)3570 static unsigned long rc6_residency_ms(void)
3571 {
3572 	return sysfs_read("power/rc6_residency_ms");
3573 }
3574 
3575 static void
test_rc6_disable(void)3576 test_rc6_disable(void)
3577 {
3578 	uint64_t properties[] = {
3579 		/* Include OA reports in samples */
3580 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
3581 
3582 		/* OA unit configuration */
3583 		DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
3584 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3585 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
3586 	};
3587 	struct drm_i915_perf_open_param param = {
3588 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
3589 		.num_properties = sizeof(properties) / 16,
3590 		.properties_ptr = to_user_pointer(properties),
3591 	};
3592 	unsigned long n_events_start, n_events_end;
3593 	unsigned long rc6_enabled;
3594 
3595 	rc6_enabled = 0;
3596 	igt_sysfs_scanf(sysfs, "power/rc6_enable", "%lu", &rc6_enabled);
3597 	igt_require(rc6_enabled);
3598 
3599 	stream_fd = __perf_open(drm_fd, &param, false);
3600 
3601 	n_events_start = rc6_residency_ms();
3602 	nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
3603 	n_events_end = rc6_residency_ms();
3604 	igt_assert_eq(n_events_end - n_events_start, 0);
3605 
3606 	__perf_close(stream_fd);
3607 	gem_quiescent_gpu(drm_fd);
3608 
3609 	n_events_start = rc6_residency_ms();
3610 	nanosleep(&(struct timespec){ .tv_sec = 1, .tv_nsec = 0 }, NULL);
3611 	n_events_end = rc6_residency_ms();
3612 	igt_assert_neq(n_events_end - n_events_start, 0);
3613 }
3614 
__i915_perf_add_config(int fd,struct drm_i915_perf_oa_config * config)3615 static int __i915_perf_add_config(int fd, struct drm_i915_perf_oa_config *config)
3616 {
3617 	int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, config);
3618 	if (ret < 0)
3619 		ret = -errno;
3620 	return ret;
3621 }
3622 
i915_perf_add_config(int fd,struct drm_i915_perf_oa_config * config)3623 static int i915_perf_add_config(int fd, struct drm_i915_perf_oa_config *config)
3624 {
3625 	int config_id = __i915_perf_add_config(fd, config);
3626 
3627 	igt_debug("config_id=%i\n", config_id);
3628 	igt_assert(config_id > 0);
3629 
3630 	return config_id;
3631 }
3632 
i915_perf_remove_config(int fd,uint64_t config_id)3633 static void i915_perf_remove_config(int fd, uint64_t config_id)
3634 {
3635 	igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
3636 				&config_id), 0);
3637 }
3638 
has_i915_perf_userspace_config(int fd)3639 static bool has_i915_perf_userspace_config(int fd)
3640 {
3641 	uint64_t config = 0;
3642 	int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
3643 	igt_assert_eq(ret, -1);
3644 
3645 	igt_debug("errno=%i\n", errno);
3646 
3647 	return errno != EINVAL;
3648 }
3649 
3650 static void
test_invalid_create_userspace_config(void)3651 test_invalid_create_userspace_config(void)
3652 {
3653 	struct drm_i915_perf_oa_config config;
3654 	const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3655 	const char *invalid_uuid = "blablabla-wrong";
3656 	uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3657 	uint32_t invalid_mux_regs[] = { 0x12345678 /* invalid register */, 0x0 };
3658 
3659 	igt_require(has_i915_perf_userspace_config(drm_fd));
3660 
3661 	memset(&config, 0, sizeof(config));
3662 
3663 	/* invalid uuid */
3664 	strncpy(config.uuid, invalid_uuid, sizeof(config.uuid));
3665 	config.n_mux_regs = 1;
3666 	config.mux_regs_ptr = to_user_pointer(mux_regs);
3667 	config.n_boolean_regs = 0;
3668 	config.n_flex_regs = 0;
3669 
3670 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3671 
3672 	/* invalid mux_regs */
3673 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3674 	config.n_mux_regs = 1;
3675 	config.mux_regs_ptr = to_user_pointer(invalid_mux_regs);
3676 	config.n_boolean_regs = 0;
3677 	config.n_flex_regs = 0;
3678 
3679 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3680 
3681 	/* empty config */
3682 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3683 	config.n_mux_regs = 0;
3684 	config.mux_regs_ptr = to_user_pointer(mux_regs);
3685 	config.n_boolean_regs = 0;
3686 	config.n_flex_regs = 0;
3687 
3688 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3689 
3690 	/* empty config with null pointers */
3691 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3692 	config.n_mux_regs = 1;
3693 	config.mux_regs_ptr = to_user_pointer(NULL);
3694 	config.n_boolean_regs = 2;
3695 	config.boolean_regs_ptr = to_user_pointer(NULL);
3696 	config.n_flex_regs = 3;
3697 	config.flex_regs_ptr = to_user_pointer(NULL);
3698 
3699 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3700 
3701 	/* invalid pointers */
3702 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3703 	config.n_mux_regs = 42;
3704 	config.mux_regs_ptr = to_user_pointer((void *) 0xDEADBEEF);
3705 	config.n_boolean_regs = 0;
3706 	config.n_flex_regs = 0;
3707 
3708 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EFAULT);
3709 }
3710 
3711 static void
test_invalid_remove_userspace_config(void)3712 test_invalid_remove_userspace_config(void)
3713 {
3714 	struct drm_i915_perf_oa_config config;
3715 	const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3716 	uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3717 	uint64_t config_id, wrong_config_id = 999999999;
3718 	char path[512];
3719 
3720 	igt_require(has_i915_perf_userspace_config(drm_fd));
3721 
3722 	snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3723 
3724 	/* Destroy previous configuration if present */
3725 	if (try_sysfs_read_u64(path, &config_id))
3726 		i915_perf_remove_config(drm_fd, config_id);
3727 
3728 	memset(&config, 0, sizeof(config));
3729 
3730 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3731 
3732 	config.n_mux_regs = 1;
3733 	config.mux_regs_ptr = to_user_pointer(mux_regs);
3734 	config.n_boolean_regs = 0;
3735 	config.n_flex_regs = 0;
3736 
3737 	config_id = i915_perf_add_config(drm_fd, &config);
3738 
3739 	/* Removing configs without permissions should fail. */
3740 	igt_fork(child, 1) {
3741 		igt_drop_root();
3742 
3743 		do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id, EACCES);
3744 	}
3745 	igt_waitchildren();
3746 
3747 	/* Removing invalid config ID should fail. */
3748 	do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &wrong_config_id, ENOENT);
3749 
3750 	i915_perf_remove_config(drm_fd, config_id);
3751 }
3752 
3753 static void
test_create_destroy_userspace_config(void)3754 test_create_destroy_userspace_config(void)
3755 {
3756 	struct drm_i915_perf_oa_config config;
3757 	const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3758 	uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3759 	uint32_t flex_regs[100];
3760 	int i;
3761 	uint64_t config_id;
3762 	uint64_t properties[] = {
3763 		DRM_I915_PERF_PROP_OA_METRICS_SET, 0, /* Filled later */
3764 
3765 		/* OA unit configuration */
3766 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
3767 		DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3768 		DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
3769 		DRM_I915_PERF_PROP_OA_METRICS_SET
3770 	};
3771 	struct drm_i915_perf_open_param param = {
3772 		.flags = I915_PERF_FLAG_FD_CLOEXEC |
3773 		I915_PERF_FLAG_FD_NONBLOCK |
3774 		I915_PERF_FLAG_DISABLED,
3775 		.num_properties = ARRAY_SIZE(properties) / 2,
3776 		.properties_ptr = to_user_pointer(properties),
3777 	};
3778 	char path[512];
3779 
3780 	igt_require(has_i915_perf_userspace_config(drm_fd));
3781 
3782 	snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3783 
3784 	/* Destroy previous configuration if present */
3785 	if (try_sysfs_read_u64(path, &config_id))
3786 		i915_perf_remove_config(drm_fd, config_id);
3787 
3788 	memset(&config, 0, sizeof(config));
3789 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3790 
3791 	config.n_mux_regs = 1;
3792 	config.mux_regs_ptr = to_user_pointer(mux_regs);
3793 
3794 	/* Flex EU counters are only available on gen8+ */
3795 	if (intel_gen(devid) >= 8) {
3796 		for (i = 0; i < ARRAY_SIZE(flex_regs) / 2; i++) {
3797 			flex_regs[i * 2] = 0xe458; /* EU_PERF_CNTL0 */
3798 			flex_regs[i * 2 + 1] = 0x0;
3799 		}
3800 		config.flex_regs_ptr = to_user_pointer(flex_regs);
3801 		config.n_flex_regs = ARRAY_SIZE(flex_regs) / 2;
3802 	}
3803 
3804 	config.n_boolean_regs = 0;
3805 
3806 	/* Creating configs without permissions shouldn't work. */
3807 	igt_fork(child, 1) {
3808 		igt_drop_root();
3809 
3810 		igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EACCES);
3811 	}
3812 	igt_waitchildren();
3813 
3814 	/* Create a new config */
3815 	config_id = i915_perf_add_config(drm_fd, &config);
3816 
3817 	/* Verify that adding the another config with the same uuid fails. */
3818 	igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EADDRINUSE);
3819 
3820 	/* Try to use the new config */
3821 	properties[1] = config_id;
3822 	stream_fd = __perf_open(drm_fd, &param, false);
3823 
3824 	/* Verify that destroying the config doesn't yield any error. */
3825 	i915_perf_remove_config(drm_fd, config_id);
3826 
3827 	/* Read the config to verify shouldn't raise any issue. */
3828 	config_id = i915_perf_add_config(drm_fd, &config);
3829 
3830 	__perf_close(stream_fd);
3831 
3832 	i915_perf_remove_config(drm_fd, config_id);
3833 }
3834 
3835 /* Registers required by userspace. This list should be maintained by
3836  * the OA configs developers and agreed upon with kernel developers as
3837  * some of the registers have bits used by the kernel (for workarounds
3838  * for instance) and other bits that need to be set by the OA configs.
3839  */
3840 static void
test_whitelisted_registers_userspace_config(void)3841 test_whitelisted_registers_userspace_config(void)
3842 {
3843 	struct drm_i915_perf_oa_config config;
3844 	const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3845 	uint32_t mux_regs[200];
3846 	uint32_t b_counters_regs[200];
3847 	uint32_t flex_regs[200];
3848 	uint32_t i;
3849 	uint64_t config_id;
3850 	char path[512];
3851 	int ret;
3852 	const uint32_t flex[] = {
3853 		0xe458,
3854 		0xe558,
3855 		0xe658,
3856 		0xe758,
3857 		0xe45c,
3858 		0xe55c,
3859 		0xe65c
3860 	};
3861 
3862 	igt_require(has_i915_perf_userspace_config(drm_fd));
3863 
3864 	snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3865 
3866 	if (try_sysfs_read_u64(path, &config_id))
3867 		i915_perf_remove_config(drm_fd, config_id);
3868 
3869 	memset(&config, 0, sizeof(config));
3870 	memcpy(config.uuid, uuid, sizeof(config.uuid));
3871 
3872 	/* OASTARTTRIG[1-8] */
3873 	for (i = 0x2710; i <= 0x272c; i += 4) {
3874 		b_counters_regs[config.n_boolean_regs * 2] = i;
3875 		b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
3876 		config.n_boolean_regs++;
3877 	}
3878 	/* OAREPORTTRIG[1-8] */
3879 	for (i = 0x2740; i <= 0x275c; i += 4) {
3880 		b_counters_regs[config.n_boolean_regs * 2] = i;
3881 		b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
3882 		config.n_boolean_regs++;
3883 	}
3884 	config.boolean_regs_ptr = (uintptr_t) b_counters_regs;
3885 
3886 	if (intel_gen(devid) >= 8) {
3887 		/* Flex EU registers, only from Gen8+. */
3888 		for (i = 0; i < ARRAY_SIZE(flex); i++) {
3889 			flex_regs[config.n_flex_regs * 2] = flex[i];
3890 			flex_regs[config.n_flex_regs * 2 + 1] = 0;
3891 			config.n_flex_regs++;
3892 		}
3893 		config.flex_regs_ptr = (uintptr_t) flex_regs;
3894 	}
3895 
3896 	/* Mux registers (too many of them, just checking bounds) */
3897 	i = 0;
3898 
3899 	/* NOA_WRITE */
3900 	mux_regs[i++] = 0x9800;
3901 	mux_regs[i++] = 0;
3902 
3903 	if (IS_HASWELL(devid)) {
3904 		/* Haswell specific. undocumented... */
3905 		mux_regs[i++] = 0x9ec0;
3906 		mux_regs[i++] = 0;
3907 
3908 		mux_regs[i++] = 0x25100;
3909 		mux_regs[i++] = 0;
3910 		mux_regs[i++] = 0x2ff90;
3911 		mux_regs[i++] = 0;
3912 	}
3913 
3914 	if (intel_gen(devid) >= 8 && !IS_CHERRYVIEW(devid)) {
3915 		/* NOA_CONFIG */
3916 		mux_regs[i++] = 0xD04;
3917 		mux_regs[i++] = 0;
3918 		mux_regs[i++] = 0xD2C;
3919 		mux_regs[i++] = 0;
3920 		/* WAIT_FOR_RC6_EXIT */
3921 		mux_regs[i++] = 0x20CC;
3922 		mux_regs[i++] = 0;
3923 	}
3924 
3925 	/* HALF_SLICE_CHICKEN2 (shared with kernel workaround) */
3926 	mux_regs[i++] = 0xE180;
3927 	mux_regs[i++] = 0;
3928 
3929 	if (IS_CHERRYVIEW(devid)) {
3930 		/* Cherryview specific. undocumented... */
3931 		mux_regs[i++] = 0x182300;
3932 		mux_regs[i++] = 0;
3933 		mux_regs[i++] = 0x1823A4;
3934 		mux_regs[i++] = 0;
3935 	}
3936 
3937 	/* PERFCNT[12] */
3938 	mux_regs[i++] = 0x91B8;
3939 	mux_regs[i++] = 0;
3940 	/* PERFMATRIX */
3941 	mux_regs[i++] = 0x91C8;
3942 	mux_regs[i++] = 0;
3943 
3944 	config.mux_regs_ptr = (uintptr_t) mux_regs;
3945 	config.n_mux_regs = i / 2;
3946 
3947 	/* Create a new config */
3948 	ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
3949 	igt_assert(ret > 0); /* Config 0 should be used by the kernel */
3950 	config_id = ret;
3951 
3952 	i915_perf_remove_config(drm_fd, config_id);
3953 }
3954 
3955 static unsigned
read_i915_module_ref(void)3956 read_i915_module_ref(void)
3957 {
3958 	FILE *fp = fopen("/proc/modules", "r");
3959 	char *line = NULL;
3960 	size_t line_buf_size = 0;
3961 	int len = 0;
3962 	unsigned ref_count;
3963 
3964 	igt_assert(fp);
3965 
3966 	while ((len = getline(&line, &line_buf_size, fp)) > 0) {
3967 		if (strncmp(line, "i915 ", 5) == 0) {
3968 			unsigned long mem;
3969 			int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
3970 			igt_assert(ret == 2);
3971 			goto done;
3972 		}
3973 	}
3974 
3975 	igt_assert(!"reached");
3976 
3977 done:
3978 	free(line);
3979 	fclose(fp);
3980 	return ref_count;
3981 }
3982 
3983 /* check that an open i915 perf stream holds a reference on the drm i915 module
3984  * including in the corner case where the original drm fd has been closed.
3985  */
3986 static void
test_i915_ref_count(void)3987 test_i915_ref_count(void)
3988 {
3989 	uint64_t properties[] = {
3990 		/* Include OA reports in samples */
3991 		DRM_I915_PERF_PROP_SAMPLE_OA, true,
3992 
3993 		/* OA unit configuration */
3994 		DRM_I915_PERF_PROP_OA_METRICS_SET, 0 /* updated below */,
3995 		DRM_I915_PERF_PROP_OA_FORMAT, 0, /* update below */
3996 		DRM_I915_PERF_PROP_OA_EXPONENT, 0, /* update below */
3997 	};
3998 	struct drm_i915_perf_open_param param = {
3999 		.flags = I915_PERF_FLAG_FD_CLOEXEC,
4000 		.num_properties = sizeof(properties) / 16,
4001 		.properties_ptr = to_user_pointer(properties),
4002 	};
4003 	unsigned baseline, ref_count0, ref_count1;
4004 	uint32_t oa_report0[64];
4005 	uint32_t oa_report1[64];
4006 
4007 	/* This should be the first test before the first fixture so no drm_fd
4008 	 * should have been opened so far...
4009 	 */
4010 	igt_assert_eq(drm_fd, -1);
4011 
4012 	baseline = read_i915_module_ref();
4013 	igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
4014 
4015 	drm_fd = __drm_open_driver(DRIVER_INTEL);
4016 	devid = intel_get_drm_devid(drm_fd);
4017 	sysfs = igt_sysfs_open(drm_fd);
4018 
4019 	/* Note: these global variables are only initialized after calling
4020 	 * init_sys_info()...
4021 	 */
4022 	igt_require(init_sys_info());
4023 	properties[3] = test_metric_set_id;
4024 	properties[5] = test_oa_format;
4025 	properties[7] = oa_exp_1_millisec;
4026 
4027 	ref_count0 = read_i915_module_ref();
4028 	igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
4029 	igt_assert(ref_count0 > baseline);
4030 
4031 	stream_fd = __perf_open(drm_fd, &param, false);
4032 	ref_count1 = read_i915_module_ref();
4033 	igt_debug("ref count after opening i915 perf stream = %u\n", ref_count1);
4034 	igt_assert(ref_count1 > ref_count0);
4035 
4036 	close(drm_fd);
4037 	close(sysfs);
4038 	drm_fd = -1;
4039 	sysfs = -1;
4040 	ref_count0 = read_i915_module_ref();
4041 	igt_debug("ref count after closing drm fd = %u\n", ref_count0);
4042 
4043 	igt_assert(ref_count0 > baseline);
4044 
4045 	read_2_oa_reports(test_oa_format,
4046 			  oa_exp_1_millisec,
4047 			  oa_report0,
4048 			  oa_report1,
4049 			  false); /* not just timer reports */
4050 
4051 	__perf_close(stream_fd);
4052 	ref_count0 = read_i915_module_ref();
4053 	igt_debug("ref count after closing i915 perf stream fd = %u\n", ref_count0);
4054 	igt_assert_eq(ref_count0, baseline);
4055 }
4056 
4057 static void
test_sysctl_defaults(void)4058 test_sysctl_defaults(void)
4059 {
4060 	int paranoid = read_u64_file("/proc/sys/dev/i915/perf_stream_paranoid");
4061 	int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
4062 
4063 	igt_assert_eq(paranoid, 1);
4064 	igt_assert_eq(max_freq, 100000);
4065 }
4066 
4067 igt_main
4068 {
4069 	igt_skip_on_simulation();
4070 
4071 	igt_fixture {
4072 		struct stat sb;
4073 
4074 		igt_require(stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb)
4075 			    == 0);
4076 		igt_require(stat("/proc/sys/dev/i915/oa_max_sample_rate", &sb)
4077 			    == 0);
4078 	}
4079 
4080 	igt_subtest("i915-ref-count")
4081 		test_i915_ref_count();
4082 
4083 	igt_subtest("sysctl-defaults")
4084 		test_sysctl_defaults();
4085 
4086 	igt_fixture {
4087 		/* We expect that the ref count test before these fixtures
4088 		 * should have closed drm_fd...
4089 		 */
4090 		igt_assert_eq(drm_fd, -1);
4091 
4092 		drm_fd = drm_open_driver(DRIVER_INTEL);
4093 		igt_require_gem(drm_fd);
4094 
4095 		devid = intel_get_drm_devid(drm_fd);
4096 		sysfs = igt_sysfs_open(drm_fd);
4097 
4098 		igt_require(init_sys_info());
4099 
4100 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
4101 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
4102 
4103 		gt_max_freq_mhz = sysfs_read("gt_boost_freq_mhz");
4104 
4105 		render_copy = igt_get_render_copyfunc(devid);
4106 		igt_require_f(render_copy, "no render-copy function\n");
4107 	}
4108 
4109 	igt_subtest("non-system-wide-paranoid")
4110 		test_system_wide_paranoid();
4111 
4112 	igt_subtest("invalid-open-flags")
4113 		test_invalid_open_flags();
4114 
4115 	igt_subtest("invalid-oa-metric-set-id")
4116 		test_invalid_oa_metric_set_id();
4117 
4118 	igt_subtest("invalid-oa-format-id")
4119 		test_invalid_oa_format_id();
4120 
4121 	igt_subtest("missing-sample-flags")
4122 		test_missing_sample_flags();
4123 
4124 	igt_subtest("oa-formats")
4125 		test_oa_formats();
4126 
4127 	igt_subtest("invalid-oa-exponent")
4128 		test_invalid_oa_exponent();
4129 	igt_subtest("low-oa-exponent-permissions")
4130 		test_low_oa_exponent_permissions();
4131 	igt_subtest("oa-exponents")
4132 		test_oa_exponents();
4133 
4134 	igt_subtest("per-context-mode-unprivileged") {
4135 		igt_require(IS_HASWELL(devid));
4136 		test_per_context_mode_unprivileged();
4137 	}
4138 
4139 	igt_subtest("buffer-fill")
4140 		test_buffer_fill();
4141 
4142 	igt_subtest("disabled-read-error")
4143 		test_disabled_read_error();
4144 	igt_subtest("non-sampling-read-error")
4145 		test_non_sampling_read_error();
4146 
4147 	igt_subtest("enable-disable")
4148 		test_enable_disable();
4149 
4150 	igt_subtest("blocking")
4151 		test_blocking();
4152 
4153 	igt_subtest("polling")
4154 		test_polling();
4155 
4156 	igt_subtest("short-reads")
4157 		test_short_reads();
4158 
4159 	igt_subtest("mi-rpc")
4160 		test_mi_rpc();
4161 
4162 	igt_subtest("unprivileged-single-ctx-counters") {
4163 		igt_require(IS_HASWELL(devid));
4164 		hsw_test_single_ctx_counters();
4165 	}
4166 
4167 	igt_subtest("gen8-unprivileged-single-ctx-counters") {
4168 		/* For Gen8+ the OA unit can no longer be made to clock gate
4169 		 * for a specific context. Additionally the partial-replacement
4170 		 * functionality to HW filter timer reports for a specific
4171 		 * context (SKL+) can't stop multiple applications viewing
4172 		 * system-wide data via MI_REPORT_PERF_COUNT commands.
4173 		 */
4174 		igt_require(intel_gen(devid) >= 8);
4175 		gen8_test_single_ctx_render_target_writes_a_counter();
4176 	}
4177 
4178 	igt_subtest("rc6-disable")
4179 		test_rc6_disable();
4180 
4181 	igt_subtest("invalid-create-userspace-config")
4182 		test_invalid_create_userspace_config();
4183 
4184 	igt_subtest("invalid-remove-userspace-config")
4185 		test_invalid_remove_userspace_config();
4186 
4187 	igt_subtest("create-destroy-userspace-config")
4188 		test_create_destroy_userspace_config();
4189 
4190 	igt_subtest("whitelisted-registers-userspace-config")
4191 		test_whitelisted_registers_userspace_config();
4192 
4193 	igt_fixture {
4194 		/* leave sysctl options in their default state... */
4195 		write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
4196 		write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
4197 
4198 		close(drm_fd);
4199 	}
4200 }
4201