1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <fcntl.h>
29 #include <inttypes.h>
30 #include <errno.h>
31 #include <signal.h>
32 #include <sys/stat.h>
33 #include <sys/time.h>
34 #include <sys/times.h>
35 #include <sys/types.h>
36 #include <dirent.h>
37 #include <time.h>
38 #include <poll.h>
39 #include <math.h>
40
41 #include "igt.h"
42 #include "igt_sysfs.h"
43 #include "drm.h"
44
45 IGT_TEST_DESCRIPTION("Test the i915 perf metrics streaming interface");
46
47 #define GEN6_MI_REPORT_PERF_COUNT ((0x28 << 23) | (3 - 2))
48 #define GEN8_MI_REPORT_PERF_COUNT ((0x28 << 23) | (4 - 2))
49
50 #define OAREPORT_REASON_MASK 0x3f
51 #define OAREPORT_REASON_SHIFT 19
52 #define OAREPORT_REASON_TIMER (1<<0)
53 #define OAREPORT_REASON_INTERNAL (3<<1)
54 #define OAREPORT_REASON_CTX_SWITCH (1<<3)
55 #define OAREPORT_REASON_GO (1<<4)
56 #define OAREPORT_REASON_CLK_RATIO (1<<5)
57
58 #define GFX_OP_PIPE_CONTROL ((3 << 29) | (3 << 27) | (2 << 24))
59 #define PIPE_CONTROL_CS_STALL (1 << 20)
60 #define PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET (1 << 19)
61 #define PIPE_CONTROL_TLB_INVALIDATE (1 << 18)
62 #define PIPE_CONTROL_SYNC_GFDT (1 << 17)
63 #define PIPE_CONTROL_MEDIA_STATE_CLEAR (1 << 16)
64 #define PIPE_CONTROL_NO_WRITE (0 << 14)
65 #define PIPE_CONTROL_WRITE_IMMEDIATE (1 << 14)
66 #define PIPE_CONTROL_WRITE_DEPTH_COUNT (2 << 14)
67 #define PIPE_CONTROL_WRITE_TIMESTAMP (3 << 14)
68 #define PIPE_CONTROL_DEPTH_STALL (1 << 13)
69 #define PIPE_CONTROL_RENDER_TARGET_FLUSH (1 << 12)
70 #define PIPE_CONTROL_INSTRUCTION_INVALIDATE (1 << 11)
71 #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */
72 #define PIPE_CONTROL_ISP_DIS (1 << 9)
73 #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8)
74 #define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */
75 /* GT */
76 #define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5)
77 #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4)
78 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1 << 3)
79 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1 << 2)
80 #define PIPE_CONTROL_STALL_AT_SCOREBOARD (1 << 1)
81 #define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0)
82 #define PIPE_CONTROL_PPGTT_WRITE (0 << 2)
83 #define PIPE_CONTROL_GLOBAL_GTT_WRITE (1 << 2)
84
85 #define MAX_OA_BUF_SIZE (16 * 1024 * 1024)
86
87 struct accumulator {
88 #define MAX_RAW_OA_COUNTERS 62
89 enum drm_i915_oa_format format;
90
91 uint64_t deltas[MAX_RAW_OA_COUNTERS];
92 };
93
94 struct oa_format {
95 const char *name;
96 size_t size;
97 int a40_high_off; /* bytes */
98 int a40_low_off;
99 int n_a40;
100 int a_off;
101 int n_a;
102 int first_a;
103 int b_off;
104 int n_b;
105 int c_off;
106 int n_c;
107 };
108
109 static struct oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
110 [I915_OA_FORMAT_A13] = { /* HSW only */
111 "A13", .size = 64,
112 .a_off = 12, .n_a = 13, },
113 [I915_OA_FORMAT_A29] = { /* HSW only */
114 "A29", .size = 128,
115 .a_off = 12, .n_a = 29, },
116 [I915_OA_FORMAT_A13_B8_C8] = { /* HSW only */
117 "A13_B8_C8", .size = 128,
118 .a_off = 12, .n_a = 13,
119 .b_off = 64, .n_b = 8,
120 .c_off = 96, .n_c = 8, },
121 [I915_OA_FORMAT_A45_B8_C8] = { /* HSW only */
122 "A45_B8_C8", .size = 256,
123 .a_off = 12, .n_a = 45,
124 .b_off = 192, .n_b = 8,
125 .c_off = 224, .n_c = 8, },
126 [I915_OA_FORMAT_B4_C8] = { /* HSW only */
127 "B4_C8", .size = 64,
128 .b_off = 16, .n_b = 4,
129 .c_off = 32, .n_c = 8, },
130 [I915_OA_FORMAT_B4_C8_A16] = { /* HSW only */
131 "B4_C8_A16", .size = 128,
132 .b_off = 16, .n_b = 4,
133 .c_off = 32, .n_c = 8,
134 .a_off = 60, .n_a = 16, .first_a = 29, },
135 [I915_OA_FORMAT_C4_B8] = { /* HSW+ (header differs from HSW-Gen8+) */
136 "C4_B8", .size = 64,
137 .c_off = 16, .n_c = 4,
138 .b_off = 28, .n_b = 8 },
139 };
140
141 static struct oa_format gen8_oa_formats[I915_OA_FORMAT_MAX] = {
142 [I915_OA_FORMAT_A12] = {
143 "A12", .size = 64,
144 .a_off = 12, .n_a = 12, .first_a = 7, },
145 [I915_OA_FORMAT_A12_B8_C8] = {
146 "A12_B8_C8", .size = 128,
147 .a_off = 12, .n_a = 12,
148 .b_off = 64, .n_b = 8,
149 .c_off = 96, .n_c = 8, .first_a = 7, },
150 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = {
151 "A32u40_A4u32_B8_C8", .size = 256,
152 .a40_high_off = 160, .a40_low_off = 16, .n_a40 = 32,
153 .a_off = 144, .n_a = 4, .first_a = 32,
154 .b_off = 192, .n_b = 8,
155 .c_off = 224, .n_c = 8, },
156 [I915_OA_FORMAT_C4_B8] = {
157 "C4_B8", .size = 64,
158 .c_off = 16, .n_c = 4,
159 .b_off = 32, .n_b = 8, },
160 };
161
162 static bool hsw_undefined_a_counters[45] = {
163 [4] = true,
164 [6] = true,
165 [9] = true,
166 [11] = true,
167 [14] = true,
168 [16] = true,
169 [19] = true,
170 [21] = true,
171 [24] = true,
172 [26] = true,
173 [29] = true,
174 [31] = true,
175 [34] = true,
176 [43] = true,
177 [44] = true,
178 };
179
180 /* No A counters currently reserved/undefined for gen8+ so far */
181 static bool gen8_undefined_a_counters[45];
182
183 static int drm_fd = -1;
184 static int sysfs = -1;
185 static int pm_fd = -1;
186 static int stream_fd = -1;
187 static uint32_t devid;
188 static int n_eus;
189
190 static uint64_t test_metric_set_id = UINT64_MAX;
191
192 static uint64_t timestamp_frequency = 12500000;
193 static uint64_t gt_max_freq_mhz = 0;
194 static enum drm_i915_oa_format test_oa_format;
195 static bool *undefined_a_counters;
196 static uint64_t oa_exp_1_millisec;
197
198 static igt_render_copyfunc_t render_copy = NULL;
199 static uint32_t (*read_report_ticks)(uint32_t *report,
200 enum drm_i915_oa_format format);
201 static void (*sanity_check_reports)(uint32_t *oa_report0, uint32_t *oa_report1,
202 enum drm_i915_oa_format format);
203
204 static struct oa_format
get_oa_format(enum drm_i915_oa_format format)205 get_oa_format(enum drm_i915_oa_format format)
206 {
207 if (IS_HASWELL(devid))
208 return hsw_oa_formats[format];
209 return gen8_oa_formats[format];
210 }
211
212 static void
__perf_close(int fd)213 __perf_close(int fd)
214 {
215 close(fd);
216 stream_fd = -1;
217
218 if (pm_fd >= 0) {
219 close(pm_fd);
220 pm_fd = -1;
221 }
222 }
223
224 static int
__perf_open(int fd,struct drm_i915_perf_open_param * param,bool prevent_pm)225 __perf_open(int fd, struct drm_i915_perf_open_param *param, bool prevent_pm)
226 {
227 int ret;
228 int32_t pm_value = 0;
229
230 if (stream_fd >= 0)
231 __perf_close(stream_fd);
232 if (pm_fd >= 0) {
233 close(pm_fd);
234 pm_fd = -1;
235 }
236
237 ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_OPEN, param);
238
239 igt_assert(ret >= 0);
240 errno = 0;
241
242 if (prevent_pm) {
243 pm_fd = open("/dev/cpu_dma_latency", O_RDWR);
244 igt_assert(pm_fd >= 0);
245
246 igt_assert_eq(write(pm_fd, &pm_value, sizeof(pm_value)), sizeof(pm_value));
247 }
248
249 return ret;
250 }
251
252 static int
lookup_format(int i915_perf_fmt_id)253 lookup_format(int i915_perf_fmt_id)
254 {
255 igt_assert(i915_perf_fmt_id < I915_OA_FORMAT_MAX);
256 igt_assert(get_oa_format(i915_perf_fmt_id).name);
257
258 return i915_perf_fmt_id;
259 }
260
261 static uint64_t
read_u64_file(const char * path)262 read_u64_file(const char *path)
263 {
264 FILE *f;
265 uint64_t val;
266
267 f = fopen(path, "r");
268 igt_assert(f);
269
270 igt_assert_eq(fscanf(f, "%"PRIu64, &val), 1);
271
272 fclose(f);
273
274 return val;
275 }
276
277 static void
write_u64_file(const char * path,uint64_t val)278 write_u64_file(const char *path, uint64_t val)
279 {
280 FILE *f;
281
282 f = fopen(path, "w");
283 igt_assert(f);
284
285 igt_assert(fprintf(f, "%"PRIu64, val) > 0);
286
287 fclose(f);
288 }
289
290 static bool
try_sysfs_read_u64(const char * path,uint64_t * val)291 try_sysfs_read_u64(const char *path, uint64_t *val)
292 {
293 return igt_sysfs_scanf(sysfs, path, "%"PRIu64, val) == 1;
294 }
295
296 static unsigned long
sysfs_read(const char * path)297 sysfs_read(const char *path)
298 {
299 unsigned long value;
300
301 igt_assert(igt_sysfs_scanf(sysfs, path, "%lu", &value) == 1);
302
303 return value;
304 }
305
306 /* XXX: For Haswell this utility is only applicable to the render basic
307 * metric set.
308 *
309 * C2 corresponds to a clock counter for the Haswell render basic metric set
310 * but it's not included in all of the formats.
311 */
312 static uint32_t
hsw_read_report_ticks(uint32_t * report,enum drm_i915_oa_format format)313 hsw_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format)
314 {
315 uint32_t *c = (uint32_t *)(((uint8_t *)report) + get_oa_format(format).c_off);
316
317 igt_assert_neq(get_oa_format(format).n_c, 0);
318
319 return c[2];
320 }
321
322 static uint32_t
gen8_read_report_ticks(uint32_t * report,enum drm_i915_oa_format format)323 gen8_read_report_ticks(uint32_t *report, enum drm_i915_oa_format format)
324 {
325 return report[3];
326 }
327
328 static void
gen8_read_report_clock_ratios(uint32_t * report,uint32_t * slice_freq_mhz,uint32_t * unslice_freq_mhz)329 gen8_read_report_clock_ratios(uint32_t *report,
330 uint32_t *slice_freq_mhz,
331 uint32_t *unslice_freq_mhz)
332 {
333 uint32_t unslice_freq = report[0] & 0x1ff;
334 uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
335 uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
336 uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
337
338 *slice_freq_mhz = (slice_freq * 16666) / 1000;
339 *unslice_freq_mhz = (unslice_freq * 16666) / 1000;
340 }
341
342 static const char *
gen8_read_report_reason(const uint32_t * report)343 gen8_read_report_reason(const uint32_t *report)
344 {
345 uint32_t reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
346 OAREPORT_REASON_MASK);
347
348 if (reason & (1<<0))
349 return "timer";
350 else if (reason & (1<<1))
351 return "internal trigger 1";
352 else if (reason & (1<<2))
353 return "internal trigger 2";
354 else if (reason & (1<<3))
355 return "context switch";
356 else if (reason & (1<<4))
357 return "GO 1->0 transition (enter RC6)";
358 else if (reason & (1<<5))
359 return "[un]slice clock ratio change";
360 else
361 return "unknown";
362 }
363
364 static uint64_t
timebase_scale(uint32_t u32_delta)365 timebase_scale(uint32_t u32_delta)
366 {
367 return ((uint64_t)u32_delta * NSEC_PER_SEC) / timestamp_frequency;
368 }
369
370 /* Returns: the largest OA exponent that will still result in a sampling period
371 * less than or equal to the given @period.
372 */
373 static int
max_oa_exponent_for_period_lte(uint64_t period)374 max_oa_exponent_for_period_lte(uint64_t period)
375 {
376 /* NB: timebase_scale() takes a uint32_t and an exponent of 30
377 * would already represent a period of ~3 minutes so there's
378 * really no need to consider higher exponents.
379 */
380 for (int i = 0; i < 30; i++) {
381 uint64_t oa_period = timebase_scale(2 << i);
382
383 if (oa_period > period)
384 return max(0, i - 1);
385 }
386
387 igt_assert(!"reached");
388 return -1;
389 }
390
391 /* Return: the largest OA exponent that will still result in a sampling
392 * frequency greater than the given @frequency.
393 */
394 static int
max_oa_exponent_for_freq_gt(uint64_t frequency)395 max_oa_exponent_for_freq_gt(uint64_t frequency)
396 {
397 uint64_t period = NSEC_PER_SEC / frequency;
398
399 igt_assert_neq(period, 0);
400
401 return max_oa_exponent_for_period_lte(period - 1);
402 }
403
404 static uint64_t
oa_exponent_to_ns(int exponent)405 oa_exponent_to_ns(int exponent)
406 {
407 return 1000000000ULL * (2ULL << exponent) / timestamp_frequency;
408 }
409
410 static bool
oa_report_is_periodic(uint32_t oa_exponent,const uint32_t * report)411 oa_report_is_periodic(uint32_t oa_exponent, const uint32_t *report)
412 {
413 if (IS_HASWELL(devid)) {
414 /* For Haswell we don't have a documented report reason field
415 * (though empirically report[0] bit 10 does seem to correlate
416 * with a timer trigger reason) so we instead infer which
417 * reports are timer triggered by checking if the least
418 * significant bits are zero and the exponent bit is set.
419 */
420 uint32_t oa_exponent_mask = (1 << (oa_exponent + 1)) - 1;
421
422 if ((report[1] & oa_exponent_mask) == (1 << oa_exponent))
423 return true;
424 } else {
425 if ((report[0] >> OAREPORT_REASON_SHIFT) &
426 OAREPORT_REASON_TIMER)
427 return true;
428 }
429
430 return false;
431 }
432
433 static bool
oa_report_ctx_is_valid(uint32_t * report)434 oa_report_ctx_is_valid(uint32_t *report)
435 {
436 if (IS_HASWELL(devid)) {
437 return false; /* TODO */
438 } else if (IS_GEN8(devid)) {
439 return report[0] & (1ul << 25);
440 } else if (AT_LEAST_GEN(devid, 9)) {
441 return report[0] & (1ul << 16);
442 }
443
444 igt_assert(!"Please update this function for newer Gen");
445 }
446
447 static uint32_t
oa_report_get_ctx_id(uint32_t * report)448 oa_report_get_ctx_id(uint32_t *report)
449 {
450 if (!oa_report_ctx_is_valid(report))
451 return 0xffffffff;
452 return report[2];
453 }
454
455 static void
scratch_buf_memset(drm_intel_bo * bo,int width,int height,uint32_t color)456 scratch_buf_memset(drm_intel_bo *bo, int width, int height, uint32_t color)
457 {
458 int ret;
459
460 ret = drm_intel_bo_map(bo, true /* writable */);
461 igt_assert_eq(ret, 0);
462
463 for (int i = 0; i < width * height; i++)
464 ((uint32_t *)bo->virtual)[i] = color;
465
466 drm_intel_bo_unmap(bo);
467 }
468
469 static void
scratch_buf_init(drm_intel_bufmgr * bufmgr,struct igt_buf * buf,int width,int height,uint32_t color)470 scratch_buf_init(drm_intel_bufmgr *bufmgr,
471 struct igt_buf *buf,
472 int width, int height,
473 uint32_t color)
474 {
475 size_t stride = width * 4;
476 size_t size = stride * height;
477 drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
478
479 scratch_buf_memset(bo, width, height, color);
480
481 memset(buf, 0, sizeof(*buf));
482
483 buf->bo = bo;
484 buf->stride = stride;
485 buf->tiling = I915_TILING_NONE;
486 buf->size = size;
487 buf->bpp = 32;
488 }
489
490 static void
emit_report_perf_count(struct intel_batchbuffer * batch,drm_intel_bo * dst_bo,int dst_offset,uint32_t report_id)491 emit_report_perf_count(struct intel_batchbuffer *batch,
492 drm_intel_bo *dst_bo,
493 int dst_offset,
494 uint32_t report_id)
495 {
496 if (IS_HASWELL(devid)) {
497 BEGIN_BATCH(3, 1);
498 OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT);
499 OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
500 dst_offset);
501 OUT_BATCH(report_id);
502 ADVANCE_BATCH();
503 } else {
504 /* XXX: NB: n dwords arg is actually magic since it internally
505 * automatically accounts for larger addresses on gen >= 8...
506 */
507 BEGIN_BATCH(3, 1);
508 OUT_BATCH(GEN8_MI_REPORT_PERF_COUNT);
509 OUT_RELOC(dst_bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
510 dst_offset);
511 OUT_BATCH(report_id);
512 ADVANCE_BATCH();
513 }
514 }
515
516 static void
hsw_sanity_check_render_basic_reports(uint32_t * oa_report0,uint32_t * oa_report1,enum drm_i915_oa_format fmt)517 hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t *oa_report1,
518 enum drm_i915_oa_format fmt)
519 {
520 uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
521 uint32_t clock_delta;
522 uint32_t max_delta;
523 struct oa_format format = get_oa_format(fmt);
524
525 igt_assert_neq(time_delta, 0);
526
527 /* As a special case we have to consider that on Haswell we
528 * can't explicitly derive a clock delta for all OA report
529 * formats...
530 */
531 if (format.n_c == 0) {
532 /* Assume running at max freq for sake of
533 * below sanity check on counters... */
534 clock_delta = (gt_max_freq_mhz *
535 (uint64_t)time_delta) / 1000;
536 } else {
537 uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
538 uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
539 uint64_t freq;
540
541 clock_delta = ticks1 - ticks0;
542
543 igt_assert_neq(clock_delta, 0);
544
545 freq = ((uint64_t)clock_delta * 1000) / time_delta;
546 igt_debug("freq = %"PRIu64"\n", freq);
547
548 igt_assert(freq <= gt_max_freq_mhz);
549 }
550
551 igt_debug("clock delta = %"PRIu32"\n", clock_delta);
552
553 /* The maximum rate for any HSW counter =
554 * clock_delta * N EUs
555 *
556 * Sanity check that no counters exceed this delta.
557 */
558 max_delta = clock_delta * n_eus;
559
560 /* 40bit A counters were only introduced for Gen8+ */
561 igt_assert_eq(format.n_a40, 0);
562
563 for (int j = 0; j < format.n_a; j++) {
564 uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
565 format.a_off);
566 uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
567 format.a_off);
568 int a_id = format.first_a + j;
569 uint32_t delta = a1[j] - a0[j];
570
571 if (undefined_a_counters[a_id])
572 continue;
573
574 igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta);
575 igt_assert(delta <= max_delta);
576 }
577
578 for (int j = 0; j < format.n_b; j++) {
579 uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) +
580 format.b_off);
581 uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) +
582 format.b_off);
583 uint32_t delta = b1[j] - b0[j];
584
585 igt_debug("B%d: delta = %"PRIu32"\n", j, delta);
586 igt_assert(delta <= max_delta);
587 }
588
589 for (int j = 0; j < format.n_c; j++) {
590 uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
591 format.c_off);
592 uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
593 format.c_off);
594 uint32_t delta = c1[j] - c0[j];
595
596 igt_debug("C%d: delta = %"PRIu32"\n", j, delta);
597 igt_assert(delta <= max_delta);
598 }
599 }
600
601 static uint64_t
gen8_read_40bit_a_counter(uint32_t * report,enum drm_i915_oa_format fmt,int a_id)602 gen8_read_40bit_a_counter(uint32_t *report, enum drm_i915_oa_format fmt, int a_id)
603 {
604 struct oa_format format = get_oa_format(fmt);
605 uint8_t *a40_high = (((uint8_t *)report) + format.a40_high_off);
606 uint32_t *a40_low = (uint32_t *)(((uint8_t *)report) +
607 format.a40_low_off);
608 uint64_t high = (uint64_t)(a40_high[a_id]) << 32;
609
610 return a40_low[a_id] | high;
611 }
612
613 static uint64_t
gen8_40bit_a_delta(uint64_t value0,uint64_t value1)614 gen8_40bit_a_delta(uint64_t value0, uint64_t value1)
615 {
616 if (value0 > value1)
617 return (1ULL << 40) + value1 - value0;
618 else
619 return value1 - value0;
620 }
621
622 static void
accumulate_uint32(size_t offset,uint32_t * report0,uint32_t * report1,uint64_t * delta)623 accumulate_uint32(size_t offset,
624 uint32_t *report0,
625 uint32_t *report1,
626 uint64_t *delta)
627 {
628 uint32_t value0 = *(uint32_t *)(((uint8_t *)report0) + offset);
629 uint32_t value1 = *(uint32_t *)(((uint8_t *)report1) + offset);
630
631 *delta += (uint32_t)(value1 - value0);
632 }
633
634 static void
accumulate_uint40(int a_index,uint32_t * report0,uint32_t * report1,enum drm_i915_oa_format format,uint64_t * delta)635 accumulate_uint40(int a_index,
636 uint32_t *report0,
637 uint32_t *report1,
638 enum drm_i915_oa_format format,
639 uint64_t *delta)
640 {
641 uint64_t value0 = gen8_read_40bit_a_counter(report0, format, a_index),
642 value1 = gen8_read_40bit_a_counter(report1, format, a_index);
643
644 *delta += gen8_40bit_a_delta(value0, value1);
645 }
646
647 static void
accumulate_reports(struct accumulator * accumulator,uint32_t * start,uint32_t * end)648 accumulate_reports(struct accumulator *accumulator,
649 uint32_t *start,
650 uint32_t *end)
651 {
652 struct oa_format format = get_oa_format(accumulator->format);
653 uint64_t *deltas = accumulator->deltas;
654 int idx = 0;
655
656 if (intel_gen(devid) >= 8) {
657 /* timestamp */
658 accumulate_uint32(4, start, end, deltas + idx++);
659
660 /* clock cycles */
661 accumulate_uint32(12, start, end, deltas + idx++);
662 } else {
663 /* timestamp */
664 accumulate_uint32(4, start, end, deltas + idx++);
665 }
666
667 for (int i = 0; i < format.n_a40; i++) {
668 accumulate_uint40(i, start, end, accumulator->format,
669 deltas + idx++);
670 }
671
672 for (int i = 0; i < format.n_a; i++) {
673 accumulate_uint32(format.a_off + 4 * i,
674 start, end, deltas + idx++);
675 }
676
677 for (int i = 0; i < format.n_b; i++) {
678 accumulate_uint32(format.b_off + 4 * i,
679 start, end, deltas + idx++);
680 }
681
682 for (int i = 0; i < format.n_c; i++) {
683 accumulate_uint32(format.c_off + 4 * i,
684 start, end, deltas + idx++);
685 }
686 }
687
688 static void
accumulator_print(struct accumulator * accumulator,const char * title)689 accumulator_print(struct accumulator *accumulator, const char *title)
690 {
691 struct oa_format format = get_oa_format(accumulator->format);
692 uint64_t *deltas = accumulator->deltas;
693 int idx = 0;
694
695 igt_debug("%s:\n", title);
696 if (intel_gen(devid) >= 8) {
697 igt_debug("\ttime delta = %"PRIu64"\n", deltas[idx++]);
698 igt_debug("\tclock cycle delta = %"PRIu64"\n", deltas[idx++]);
699
700 for (int i = 0; i < format.n_a40; i++)
701 igt_debug("\tA%u = %"PRIu64"\n", i, deltas[idx++]);
702 } else {
703 igt_debug("\ttime delta = %"PRIu64"\n", deltas[idx++]);
704 }
705
706 for (int i = 0; i < format.n_a; i++) {
707 int a_id = format.first_a + i;
708 igt_debug("\tA%u = %"PRIu64"\n", a_id, deltas[idx++]);
709 }
710
711 for (int i = 0; i < format.n_a; i++)
712 igt_debug("\tB%u = %"PRIu64"\n", i, deltas[idx++]);
713
714 for (int i = 0; i < format.n_c; i++)
715 igt_debug("\tC%u = %"PRIu64"\n", i, deltas[idx++]);
716 }
717
718 /* The TestOa metric set is designed so */
719 static void
gen8_sanity_check_test_oa_reports(uint32_t * oa_report0,uint32_t * oa_report1,enum drm_i915_oa_format fmt)720 gen8_sanity_check_test_oa_reports(uint32_t *oa_report0, uint32_t *oa_report1,
721 enum drm_i915_oa_format fmt)
722 {
723 struct oa_format format = get_oa_format(fmt);
724 uint32_t time_delta = timebase_scale(oa_report1[1] - oa_report0[1]);
725 uint32_t ticks0 = read_report_ticks(oa_report0, fmt);
726 uint32_t ticks1 = read_report_ticks(oa_report1, fmt);
727 uint32_t clock_delta = ticks1 - ticks0;
728 uint32_t max_delta;
729 uint64_t freq;
730 uint32_t *rpt0_b = (uint32_t *)(((uint8_t *)oa_report0) +
731 format.b_off);
732 uint32_t *rpt1_b = (uint32_t *)(((uint8_t *)oa_report1) +
733 format.b_off);
734 uint32_t b;
735 uint32_t ref;
736
737
738 igt_assert_neq(time_delta, 0);
739 igt_assert_neq(clock_delta, 0);
740
741 freq = ((uint64_t)clock_delta * 1000) / time_delta;
742 igt_debug("freq = %"PRIu64"\n", freq);
743
744 igt_assert(freq <= gt_max_freq_mhz);
745
746 igt_debug("clock delta = %"PRIu32"\n", clock_delta);
747
748 max_delta = clock_delta * n_eus;
749
750 /* Gen8+ has some 40bit A counters... */
751 for (int j = 0; j < format.n_a40; j++) {
752 uint64_t value0 = gen8_read_40bit_a_counter(oa_report0, fmt, j);
753 uint64_t value1 = gen8_read_40bit_a_counter(oa_report1, fmt, j);
754 uint64_t delta = gen8_40bit_a_delta(value0, value1);
755
756 if (undefined_a_counters[j])
757 continue;
758
759 igt_debug("A%d: delta = %"PRIu64"\n", j, delta);
760 igt_assert(delta <= max_delta);
761 }
762
763 for (int j = 0; j < format.n_a; j++) {
764 uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
765 format.a_off);
766 uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
767 format.a_off);
768 int a_id = format.first_a + j;
769 uint32_t delta = a1[j] - a0[j];
770
771 if (undefined_a_counters[a_id])
772 continue;
773
774 igt_debug("A%d: delta = %"PRIu32"\n", a_id, delta);
775 igt_assert(delta <= max_delta);
776 }
777
778 /* The TestOa metric set defines all B counters to be a
779 * multiple of the gpu clock
780 */
781 if (format.n_b) {
782 b = rpt1_b[0] - rpt0_b[0];
783 igt_debug("B0: delta = %"PRIu32"\n", b);
784 igt_assert_eq(b, 0);
785
786 b = rpt1_b[1] - rpt0_b[1];
787 igt_debug("B1: delta = %"PRIu32"\n", b);
788 igt_assert_eq(b, clock_delta);
789
790 b = rpt1_b[2] - rpt0_b[2];
791 igt_debug("B2: delta = %"PRIu32"\n", b);
792 igt_assert_eq(b, clock_delta);
793
794 b = rpt1_b[3] - rpt0_b[3];
795 ref = clock_delta / 2;
796 igt_debug("B3: delta = %"PRIu32"\n", b);
797 igt_assert(b >= ref - 1 && b <= ref + 1);
798
799 b = rpt1_b[4] - rpt0_b[4];
800 ref = clock_delta / 3;
801 igt_debug("B4: delta = %"PRIu32"\n", b);
802 igt_assert(b >= ref - 1 && b <= ref + 1);
803
804 b = rpt1_b[5] - rpt0_b[5];
805 ref = clock_delta / 3;
806 igt_debug("B5: delta = %"PRIu32"\n", b);
807 igt_assert(b >= ref - 1 && b <= ref + 1);
808
809 b = rpt1_b[6] - rpt0_b[6];
810 ref = clock_delta / 6;
811 igt_debug("B6: delta = %"PRIu32"\n", b);
812 igt_assert(b >= ref - 1 && b <= ref + 1);
813
814 b = rpt1_b[7] - rpt0_b[7];
815 ref = clock_delta * 2 / 3;
816 igt_debug("B7: delta = %"PRIu32"\n", b);
817 igt_assert(b >= ref - 1 && b <= ref + 1);
818 }
819
820 for (int j = 0; j < format.n_c; j++) {
821 uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
822 format.c_off);
823 uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
824 format.c_off);
825 uint32_t delta = c1[j] - c0[j];
826
827 igt_debug("C%d: delta = %"PRIu32"\n", j, delta);
828 igt_assert(delta <= max_delta);
829 }
830 }
831
832 static uint64_t
get_cs_timestamp_frequency(void)833 get_cs_timestamp_frequency(void)
834 {
835 int cs_ts_freq = 0;
836 drm_i915_getparam_t gp;
837
838 gp.param = I915_PARAM_CS_TIMESTAMP_FREQUENCY;
839 gp.value = &cs_ts_freq;
840 if (igt_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0)
841 return cs_ts_freq;
842
843 igt_debug("Couldn't query CS timestamp frequency, trying to guess based on PCI-id\n");
844
845 if (IS_GEN7(devid) || IS_GEN8(devid))
846 return 12500000;
847 if (IS_SKYLAKE(devid) || IS_KABYLAKE(devid) || IS_COFFEELAKE(devid))
848 return 12000000;
849 if (IS_BROXTON(devid) || IS_GEMINILAKE(devid))
850 return 19200000;
851
852 igt_skip("Kernel with PARAM_CS_TIMESTAMP_FREQUENCY support required\n");
853 }
854
855 static bool
init_sys_info(void)856 init_sys_info(void)
857 {
858 const char *test_set_name = NULL;
859 const char *test_set_uuid = NULL;
860 char buf[256];
861
862 igt_assert_neq(devid, 0);
863
864 timestamp_frequency = get_cs_timestamp_frequency();
865 igt_assert_neq(timestamp_frequency, 0);
866
867 if (IS_HASWELL(devid)) {
868 /* We don't have a TestOa metric set for Haswell so use
869 * RenderBasic
870 */
871 test_set_name = "RenderBasic";
872 test_set_uuid = "403d8832-1a27-4aa6-a64e-f5389ce7b212";
873 test_oa_format = I915_OA_FORMAT_A45_B8_C8;
874 undefined_a_counters = hsw_undefined_a_counters;
875 read_report_ticks = hsw_read_report_ticks;
876 sanity_check_reports = hsw_sanity_check_render_basic_reports;
877
878 if (intel_gt(devid) == 0)
879 n_eus = 10;
880 else if (intel_gt(devid) == 1)
881 n_eus = 20;
882 else if (intel_gt(devid) == 2)
883 n_eus = 40;
884 else {
885 igt_assert(!"reached");
886 return false;
887 }
888 } else {
889 drm_i915_getparam_t gp;
890
891 test_set_name = "TestOa";
892 test_oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;
893 undefined_a_counters = gen8_undefined_a_counters;
894 read_report_ticks = gen8_read_report_ticks;
895 sanity_check_reports = gen8_sanity_check_test_oa_reports;
896
897 if (IS_BROADWELL(devid)) {
898 test_set_uuid = "d6de6f55-e526-4f79-a6a6-d7315c09044e";
899 } else if (IS_CHERRYVIEW(devid)) {
900 test_set_uuid = "4a534b07-cba3-414d-8d60-874830e883aa";
901 } else if (IS_SKYLAKE(devid)) {
902 switch (intel_gt(devid)) {
903 case 1:
904 test_set_uuid = "1651949f-0ac0-4cb1-a06f-dafd74a407d1";
905 break;
906 case 2:
907 test_set_uuid = "2b985803-d3c9-4629-8a4f-634bfecba0e8";
908 break;
909 case 3:
910 test_set_uuid = "882fa433-1f4a-4a67-a962-c741888fe5f5";
911 break;
912 default:
913 igt_debug("unsupported Skylake GT size\n");
914 return false;
915 }
916 } else if (IS_BROXTON(devid)) {
917 test_set_uuid = "5ee72f5c-092f-421e-8b70-225f7c3e9612";
918 } else if (IS_KABYLAKE(devid)) {
919 switch (intel_gt(devid)) {
920 case 1:
921 test_set_uuid = "baa3c7e4-52b6-4b85-801e-465a94b746dd";
922 break;
923 case 2:
924 test_set_uuid = "f1792f32-6db2-4b50-b4b2-557128f1688d";
925 break;
926 default:
927 igt_debug("unsupported Kabylake GT size\n");
928 return false;
929 }
930 } else if (IS_GEMINILAKE(devid)) {
931 test_set_uuid = "dd3fd789-e783-4204-8cd0-b671bbccb0cf";
932 } else if (IS_COFFEELAKE(devid)) {
933 switch (intel_gt(devid)) {
934 case 1:
935 test_set_uuid = "74fb4902-d3d3-4237-9e90-cbdc68d0a446";
936 break;
937 case 2:
938 test_set_uuid = "577e8e2c-3fa0-4875-8743-3538d585e3b0";
939 break;
940 default:
941 igt_debug("unsupported Coffeelake GT size\n");
942 return false;
943 }
944 } else if (IS_CANNONLAKE(devid)) {
945 test_set_uuid = "db41edd4-d8e7-4730-ad11-b9a2d6833503";
946 } else if (IS_ICELAKE(devid)) {
947 test_set_uuid = "a291665e-244b-4b76-9b9a-01de9d3c8068";
948 } else {
949 igt_debug("unsupported GT\n");
950 return false;
951 }
952
953 gp.param = I915_PARAM_EU_TOTAL;
954 gp.value = &n_eus;
955 do_ioctl(drm_fd, DRM_IOCTL_I915_GETPARAM, &gp);
956 }
957
958 igt_debug("%s metric set UUID = %s\n",
959 test_set_name,
960 test_set_uuid);
961
962 oa_exp_1_millisec = max_oa_exponent_for_period_lte(1000000);
963
964 snprintf(buf, sizeof(buf), "metrics/%s/id", test_set_uuid);
965
966 return try_sysfs_read_u64(buf, &test_metric_set_id);
967 }
968
969 static int
i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,uint8_t * buf,uint32_t max_size,uint32_t start_timestamp,uint32_t end_timestamp)970 i915_read_reports_until_timestamp(enum drm_i915_oa_format oa_format,
971 uint8_t *buf,
972 uint32_t max_size,
973 uint32_t start_timestamp,
974 uint32_t end_timestamp)
975 {
976 size_t format_size = get_oa_format(oa_format).size;
977 uint32_t last_seen_timestamp = start_timestamp;
978 int total_len = 0;
979
980 while (last_seen_timestamp < end_timestamp) {
981 int offset, len;
982
983 /* Running out of space. */
984 if ((max_size - total_len) < format_size) {
985 igt_warn("run out of space before reaching "
986 "end timestamp (%u/%u)\n",
987 last_seen_timestamp, end_timestamp);
988 return -1;
989 }
990
991 while ((len = read(stream_fd, &buf[total_len],
992 max_size - total_len)) < 0 &&
993 errno == EINTR)
994 ;
995
996 /* Intentionally return an error. */
997 if (len <= 0) {
998 if (errno == EAGAIN)
999 return total_len;
1000 else {
1001 igt_warn("error read OA stream : %i\n", errno);
1002 return -1;
1003 }
1004 }
1005
1006 offset = total_len;
1007 total_len += len;
1008
1009 while (offset < total_len) {
1010 const struct drm_i915_perf_record_header *header =
1011 (const struct drm_i915_perf_record_header *) &buf[offset];
1012 uint32_t *report = (uint32_t *) (header + 1);
1013
1014 if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
1015 last_seen_timestamp = report[1];
1016
1017 offset += header->size;
1018 }
1019 }
1020
1021 return total_len;
1022 }
1023
1024 /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
1025 * control parameter dev.i915.perf_stream_paranoid == 0 */
1026 static void
test_system_wide_paranoid(void)1027 test_system_wide_paranoid(void)
1028 {
1029 igt_fork(child, 1) {
1030 uint64_t properties[] = {
1031 /* Include OA reports in samples */
1032 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1033
1034 /* OA unit configuration */
1035 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1036 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1037 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1038 };
1039 struct drm_i915_perf_open_param param = {
1040 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1041 I915_PERF_FLAG_FD_NONBLOCK,
1042 .num_properties = sizeof(properties) / 16,
1043 .properties_ptr = to_user_pointer(properties),
1044 };
1045
1046 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1047
1048 igt_drop_root();
1049
1050 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EACCES);
1051 }
1052
1053 igt_waitchildren();
1054
1055 igt_fork(child, 1) {
1056 uint64_t properties[] = {
1057 /* Include OA reports in samples */
1058 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1059
1060 /* OA unit configuration */
1061 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1062 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1063 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1064 };
1065 struct drm_i915_perf_open_param param = {
1066 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1067 I915_PERF_FLAG_FD_NONBLOCK,
1068 .num_properties = sizeof(properties) / 16,
1069 .properties_ptr = to_user_pointer(properties),
1070 };
1071 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
1072
1073 igt_drop_root();
1074
1075 stream_fd = __perf_open(drm_fd, ¶m, false);
1076 __perf_close(stream_fd);
1077 }
1078
1079 igt_waitchildren();
1080
1081 /* leave in paranoid state */
1082 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1083 }
1084
1085 static void
test_invalid_open_flags(void)1086 test_invalid_open_flags(void)
1087 {
1088 uint64_t properties[] = {
1089 /* Include OA reports in samples */
1090 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1091
1092 /* OA unit configuration */
1093 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1094 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1095 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1096 };
1097 struct drm_i915_perf_open_param param = {
1098 .flags = ~0, /* Undefined flag bits set! */
1099 .num_properties = sizeof(properties) / 16,
1100 .properties_ptr = to_user_pointer(properties),
1101 };
1102
1103 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1104 }
1105
1106 static void
test_invalid_oa_metric_set_id(void)1107 test_invalid_oa_metric_set_id(void)
1108 {
1109 uint64_t properties[] = {
1110 /* Include OA reports in samples */
1111 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1112
1113 /* OA unit configuration */
1114 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1115 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1116 DRM_I915_PERF_PROP_OA_METRICS_SET, UINT64_MAX,
1117 };
1118 struct drm_i915_perf_open_param param = {
1119 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1120 I915_PERF_FLAG_FD_NONBLOCK,
1121 .num_properties = sizeof(properties) / 16,
1122 .properties_ptr = to_user_pointer(properties),
1123 };
1124
1125 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1126
1127 properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
1128 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1129
1130 /* Check that we aren't just seeing false positives... */
1131 properties[ARRAY_SIZE(properties) - 1] = test_metric_set_id;
1132 stream_fd = __perf_open(drm_fd, ¶m, false);
1133 __perf_close(stream_fd);
1134
1135 /* There's no valid default OA metric set ID... */
1136 param.num_properties--;
1137 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1138 }
1139
1140 static void
test_invalid_oa_format_id(void)1141 test_invalid_oa_format_id(void)
1142 {
1143 uint64_t properties[] = {
1144 /* Include OA reports in samples */
1145 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1146
1147 /* OA unit configuration */
1148 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1149 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1150 DRM_I915_PERF_PROP_OA_FORMAT, UINT64_MAX,
1151 };
1152 struct drm_i915_perf_open_param param = {
1153 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1154 I915_PERF_FLAG_FD_NONBLOCK,
1155 .num_properties = sizeof(properties) / 16,
1156 .properties_ptr = to_user_pointer(properties),
1157 };
1158
1159 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1160
1161 properties[ARRAY_SIZE(properties) - 1] = 0; /* ID 0 is also be reserved as invalid */
1162 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1163
1164 /* Check that we aren't just seeing false positives... */
1165 properties[ARRAY_SIZE(properties) - 1] = test_oa_format;
1166 stream_fd = __perf_open(drm_fd, ¶m, false);
1167 __perf_close(stream_fd);
1168
1169 /* There's no valid default OA format... */
1170 param.num_properties--;
1171 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1172 }
1173
1174 static void
test_missing_sample_flags(void)1175 test_missing_sample_flags(void)
1176 {
1177 uint64_t properties[] = {
1178 /* No _PROP_SAMPLE_xyz flags */
1179
1180 /* OA unit configuration */
1181 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1182 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1183 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1184 };
1185 struct drm_i915_perf_open_param param = {
1186 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1187 .num_properties = sizeof(properties) / 16,
1188 .properties_ptr = to_user_pointer(properties),
1189 };
1190
1191 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1192 }
1193
1194 static void
read_2_oa_reports(int format_id,int exponent,uint32_t * oa_report0,uint32_t * oa_report1,bool timer_only)1195 read_2_oa_reports(int format_id,
1196 int exponent,
1197 uint32_t *oa_report0,
1198 uint32_t *oa_report1,
1199 bool timer_only)
1200 {
1201 size_t format_size = get_oa_format(format_id).size;
1202 size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
1203 format_size);
1204 const struct drm_i915_perf_record_header *header;
1205 uint32_t exponent_mask = (1 << (exponent + 1)) - 1;
1206
1207 /* Note: we allocate a large buffer so that each read() iteration
1208 * should scrape *all* pending records.
1209 *
1210 * The largest buffer the OA unit supports is 16MB.
1211 *
1212 * Being sure we are fetching all buffered reports allows us to
1213 * potentially throw away / skip all reports whenever we see
1214 * a _REPORT_LOST notification as a way of being sure are
1215 * measurements aren't skewed by a lost report.
1216 *
1217 * Note: that is is useful for some tests but also not something
1218 * applications would be expected to resort to. Lost reports are
1219 * somewhat unpredictable but typically don't pose a problem - except
1220 * to indicate that the OA unit may be over taxed if lots of reports
1221 * are being lost.
1222 */
1223 int max_reports = MAX_OA_BUF_SIZE / format_size;
1224 int buf_size = sample_size * max_reports * 1.5;
1225 uint8_t *buf = malloc(buf_size);
1226 int n = 0;
1227
1228 for (int i = 0; i < 1000; i++) {
1229 ssize_t len;
1230
1231 while ((len = read(stream_fd, buf, buf_size)) < 0 &&
1232 errno == EINTR)
1233 ;
1234
1235 igt_assert(len > 0);
1236 igt_debug("read %d bytes\n", (int)len);
1237
1238 for (size_t offset = 0; offset < len; offset += header->size) {
1239 const uint32_t *report;
1240
1241 header = (void *)(buf + offset);
1242
1243 igt_assert_eq(header->pad, 0); /* Reserved */
1244
1245 /* Currently the only test that should ever expect to
1246 * see a _BUFFER_LOST error is the buffer_fill test,
1247 * otherwise something bad has probably happened...
1248 */
1249 igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
1250
1251 /* At high sampling frequencies the OA HW might not be
1252 * able to cope with all write requests and will notify
1253 * us that a report was lost. We restart our read of
1254 * two sequential reports due to the timeline blip this
1255 * implies
1256 */
1257 if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
1258 igt_debug("read restart: OA trigger collision / report lost\n");
1259 n = 0;
1260
1261 /* XXX: break, because we don't know where
1262 * within the series of already read reports
1263 * there could be a blip from the lost report.
1264 */
1265 break;
1266 }
1267
1268 /* Currently the only other record type expected is a
1269 * _SAMPLE. Notably this test will need updating if
1270 * i915-perf is extended in the future with additional
1271 * record types.
1272 */
1273 igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
1274
1275 igt_assert_eq(header->size, sample_size);
1276
1277 report = (const void *)(header + 1);
1278
1279 igt_debug("read report: reason = %x, timestamp = %x, exponent mask=%x\n",
1280 report[0], report[1], exponent_mask);
1281
1282 /* Don't expect zero for timestamps */
1283 igt_assert_neq(report[1], 0);
1284
1285 if (timer_only) {
1286 if (!oa_report_is_periodic(exponent, report)) {
1287 igt_debug("skipping non timer report\n");
1288 continue;
1289 }
1290 }
1291
1292 if (n++ == 0)
1293 memcpy(oa_report0, report, format_size);
1294 else {
1295 memcpy(oa_report1, report, format_size);
1296 free(buf);
1297 return;
1298 }
1299 }
1300 }
1301
1302 free(buf);
1303
1304 igt_assert(!"reached");
1305 }
1306
1307 static void
open_and_read_2_oa_reports(int format_id,int exponent,uint32_t * oa_report0,uint32_t * oa_report1,bool timer_only)1308 open_and_read_2_oa_reports(int format_id,
1309 int exponent,
1310 uint32_t *oa_report0,
1311 uint32_t *oa_report1,
1312 bool timer_only)
1313 {
1314 uint64_t properties[] = {
1315 /* Include OA reports in samples */
1316 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1317
1318 /* OA unit configuration */
1319 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1320 DRM_I915_PERF_PROP_OA_FORMAT, format_id,
1321 DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
1322
1323 };
1324 struct drm_i915_perf_open_param param = {
1325 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1326 .num_properties = sizeof(properties) / 16,
1327 .properties_ptr = to_user_pointer(properties),
1328 };
1329
1330 stream_fd = __perf_open(drm_fd, ¶m, false);
1331
1332 read_2_oa_reports(format_id, exponent,
1333 oa_report0, oa_report1, timer_only);
1334
1335 __perf_close(stream_fd);
1336 }
1337
1338 static void
print_reports(uint32_t * oa_report0,uint32_t * oa_report1,int fmt)1339 print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
1340 {
1341 struct oa_format format = get_oa_format(fmt);
1342
1343 igt_debug("TIMESTAMP: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1344 oa_report0[1], oa_report1[1], oa_report1[1] - oa_report0[1]);
1345
1346 if (IS_HASWELL(devid) && format.n_c == 0) {
1347 igt_debug("CLOCK = N/A\n");
1348 } else {
1349 uint32_t clock0 = read_report_ticks(oa_report0, fmt);
1350 uint32_t clock1 = read_report_ticks(oa_report1, fmt);
1351
1352 igt_debug("CLOCK: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1353 clock0, clock1, clock1 - clock0);
1354 }
1355
1356 if (intel_gen(devid) >= 8) {
1357 uint32_t slice_freq0, slice_freq1, unslice_freq0, unslice_freq1;
1358 const char *reason0 = gen8_read_report_reason(oa_report0);
1359 const char *reason1 = gen8_read_report_reason(oa_report1);
1360
1361 igt_debug("CTX ID: 1st = %"PRIu32", 2nd = %"PRIu32"\n",
1362 oa_report0[2], oa_report1[2]);
1363
1364 gen8_read_report_clock_ratios(oa_report0,
1365 &slice_freq0, &unslice_freq0);
1366 gen8_read_report_clock_ratios(oa_report1,
1367 &slice_freq1, &unslice_freq1);
1368
1369 igt_debug("SLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
1370 slice_freq0, slice_freq1,
1371 ((int)slice_freq1 - (int)slice_freq0));
1372 igt_debug("UNSLICE CLK: 1st = %umhz, 2nd = %umhz, delta = %d\n",
1373 unslice_freq0, unslice_freq1,
1374 ((int)unslice_freq1 - (int)unslice_freq0));
1375
1376 igt_debug("REASONS: 1st = \"%s\", 2nd = \"%s\"\n", reason0, reason1);
1377 }
1378
1379 /* Gen8+ has some 40bit A counters... */
1380 for (int j = 0; j < format.n_a40; j++) {
1381 uint64_t value0 = gen8_read_40bit_a_counter(oa_report0, fmt, j);
1382 uint64_t value1 = gen8_read_40bit_a_counter(oa_report1, fmt, j);
1383 uint64_t delta = gen8_40bit_a_delta(value0, value1);
1384
1385 if (undefined_a_counters[j])
1386 continue;
1387
1388 igt_debug("A%d: 1st = %"PRIu64", 2nd = %"PRIu64", delta = %"PRIu64"\n",
1389 j, value0, value1, delta);
1390 }
1391
1392 for (int j = 0; j < format.n_a; j++) {
1393 uint32_t *a0 = (uint32_t *)(((uint8_t *)oa_report0) +
1394 format.a_off);
1395 uint32_t *a1 = (uint32_t *)(((uint8_t *)oa_report1) +
1396 format.a_off);
1397 int a_id = format.first_a + j;
1398 uint32_t delta = a1[j] - a0[j];
1399
1400 if (undefined_a_counters[a_id])
1401 continue;
1402
1403 igt_debug("A%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1404 a_id, a0[j], a1[j], delta);
1405 }
1406
1407 for (int j = 0; j < format.n_b; j++) {
1408 uint32_t *b0 = (uint32_t *)(((uint8_t *)oa_report0) +
1409 format.b_off);
1410 uint32_t *b1 = (uint32_t *)(((uint8_t *)oa_report1) +
1411 format.b_off);
1412 uint32_t delta = b1[j] - b0[j];
1413
1414 igt_debug("B%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1415 j, b0[j], b1[j], delta);
1416 }
1417
1418 for (int j = 0; j < format.n_c; j++) {
1419 uint32_t *c0 = (uint32_t *)(((uint8_t *)oa_report0) +
1420 format.c_off);
1421 uint32_t *c1 = (uint32_t *)(((uint8_t *)oa_report1) +
1422 format.c_off);
1423 uint32_t delta = c1[j] - c0[j];
1424
1425 igt_debug("C%d: 1st = %"PRIu32", 2nd = %"PRIu32", delta = %"PRIu32"\n",
1426 j, c0[j], c1[j], delta);
1427 }
1428 }
1429
1430 /* Debug function, only useful when reports don't make sense. */
1431 #if 0
1432 static void
1433 print_report(uint32_t *report, int fmt)
1434 {
1435 struct oa_format format = get_oa_format(fmt);
1436
1437 igt_debug("TIMESTAMP: %"PRIu32"\n", report[1]);
1438
1439 if (IS_HASWELL(devid) && format.n_c == 0) {
1440 igt_debug("CLOCK = N/A\n");
1441 } else {
1442 uint32_t clock = read_report_ticks(report, fmt);
1443
1444 igt_debug("CLOCK: %"PRIu32"\n", clock);
1445 }
1446
1447 if (intel_gen(devid) >= 8) {
1448 uint32_t slice_freq, unslice_freq;
1449 const char *reason = gen8_read_report_reason(report);
1450
1451 gen8_read_report_clock_ratios(report, &slice_freq, &unslice_freq);
1452
1453 igt_debug("SLICE CLK: %umhz\n", slice_freq);
1454 igt_debug("UNSLICE CLK: %umhz\n", unslice_freq);
1455 igt_debug("REASON: \"%s\"\n", reason);
1456 igt_debug("CTX ID: %"PRIu32"/%"PRIx32"\n", report[2], report[2]);
1457 }
1458
1459 /* Gen8+ has some 40bit A counters... */
1460 for (int j = 0; j < format.n_a40; j++) {
1461 uint64_t value = gen8_read_40bit_a_counter(report, fmt, j);
1462
1463 if (undefined_a_counters[j])
1464 continue;
1465
1466 igt_debug("A%d: %"PRIu64"\n", j, value);
1467 }
1468
1469 for (int j = 0; j < format.n_a; j++) {
1470 uint32_t *a = (uint32_t *)(((uint8_t *)report) +
1471 format.a_off);
1472 int a_id = format.first_a + j;
1473
1474 if (undefined_a_counters[a_id])
1475 continue;
1476
1477 igt_debug("A%d: %"PRIu32"\n", a_id, a[j]);
1478 }
1479
1480 for (int j = 0; j < format.n_b; j++) {
1481 uint32_t *b = (uint32_t *)(((uint8_t *)report) +
1482 format.b_off);
1483
1484 igt_debug("B%d: %"PRIu32"\n", j, b[j]);
1485 }
1486
1487 for (int j = 0; j < format.n_c; j++) {
1488 uint32_t *c = (uint32_t *)(((uint8_t *)report) +
1489 format.c_off);
1490
1491 igt_debug("C%d: %"PRIu32"\n", j, c[j]);
1492 }
1493 }
1494 #endif
1495
1496 static void
test_oa_formats(void)1497 test_oa_formats(void)
1498 {
1499 for (int i = 0; i < I915_OA_FORMAT_MAX; i++) {
1500 struct oa_format format = get_oa_format(i);
1501 uint32_t oa_report0[64];
1502 uint32_t oa_report1[64];
1503
1504 if (!format.name) /* sparse, indexed by ID */
1505 continue;
1506
1507 igt_debug("Checking OA format %s\n", format.name);
1508
1509 open_and_read_2_oa_reports(i,
1510 oa_exp_1_millisec,
1511 oa_report0,
1512 oa_report1,
1513 false); /* timer reports only */
1514
1515 print_reports(oa_report0, oa_report1, i);
1516 sanity_check_reports(oa_report0, oa_report1, i);
1517 }
1518 }
1519
1520
1521 enum load {
1522 LOW,
1523 HIGH
1524 };
1525
1526 #define LOAD_HELPER_PAUSE_USEC 500
1527
1528 static struct load_helper {
1529 int devid;
1530 drm_intel_bufmgr *bufmgr;
1531 drm_intel_context *context;
1532 uint32_t context_id;
1533 struct intel_batchbuffer *batch;
1534 enum load load;
1535 bool exit;
1536 struct igt_helper_process igt_proc;
1537 struct igt_buf src, dst;
1538 } lh = { 0, };
1539
load_helper_signal_handler(int sig)1540 static void load_helper_signal_handler(int sig)
1541 {
1542 if (sig == SIGUSR2)
1543 lh.load = lh.load == LOW ? HIGH : LOW;
1544 else
1545 lh.exit = true;
1546 }
1547
load_helper_set_load(enum load load)1548 static void load_helper_set_load(enum load load)
1549 {
1550 igt_assert(lh.igt_proc.running);
1551
1552 if (lh.load == load)
1553 return;
1554
1555 lh.load = load;
1556 kill(lh.igt_proc.pid, SIGUSR2);
1557 }
1558
load_helper_run(enum load load)1559 static void load_helper_run(enum load load)
1560 {
1561 /*
1562 * FIXME fork helpers won't get cleaned up when started from within a
1563 * subtest, so handle the case where it sticks around a bit too long.
1564 */
1565 if (lh.igt_proc.running) {
1566 load_helper_set_load(load);
1567 return;
1568 }
1569
1570 lh.load = load;
1571
1572 igt_fork_helper(&lh.igt_proc) {
1573 signal(SIGUSR1, load_helper_signal_handler);
1574 signal(SIGUSR2, load_helper_signal_handler);
1575
1576 while (!lh.exit) {
1577 int ret;
1578
1579 render_copy(lh.batch,
1580 lh.context,
1581 &lh.src, 0, 0, 1920, 1080,
1582 &lh.dst, 0, 0);
1583
1584 intel_batchbuffer_flush_with_context(lh.batch,
1585 lh.context);
1586
1587 ret = drm_intel_gem_context_get_id(lh.context,
1588 &lh.context_id);
1589 igt_assert_eq(ret, 0);
1590
1591 drm_intel_bo_wait_rendering(lh.dst.bo);
1592
1593 /* Lower the load by pausing after every submitted
1594 * write. */
1595 if (lh.load == LOW)
1596 usleep(LOAD_HELPER_PAUSE_USEC);
1597 }
1598 }
1599 }
1600
load_helper_stop(void)1601 static void load_helper_stop(void)
1602 {
1603 kill(lh.igt_proc.pid, SIGUSR1);
1604 igt_assert(igt_wait_helper(&lh.igt_proc) == 0);
1605 }
1606
load_helper_init(void)1607 static void load_helper_init(void)
1608 {
1609 int ret;
1610
1611 lh.devid = intel_get_drm_devid(drm_fd);
1612
1613 /* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
1614 * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
1615 * that's also all we care about for the rps testcase*/
1616 igt_assert(intel_gen(lh.devid) >= 6);
1617 lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1618 igt_assert(lh.bufmgr);
1619
1620 drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr);
1621
1622 lh.context = drm_intel_gem_context_create(lh.bufmgr);
1623 igt_assert(lh.context);
1624
1625 lh.context_id = 0xffffffff;
1626 ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id);
1627 igt_assert_eq(ret, 0);
1628 igt_assert_neq(lh.context_id, 0xffffffff);
1629
1630 lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid);
1631 igt_assert(lh.batch);
1632
1633 scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0);
1634 scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0);
1635 }
1636
load_helper_fini(void)1637 static void load_helper_fini(void)
1638 {
1639 if (lh.igt_proc.running)
1640 load_helper_stop();
1641
1642 if (lh.src.bo)
1643 drm_intel_bo_unreference(lh.src.bo);
1644 if (lh.dst.bo)
1645 drm_intel_bo_unreference(lh.dst.bo);
1646
1647 if (lh.batch)
1648 intel_batchbuffer_free(lh.batch);
1649
1650 if (lh.context)
1651 drm_intel_gem_context_destroy(lh.context);
1652
1653 if (lh.bufmgr)
1654 drm_intel_bufmgr_destroy(lh.bufmgr);
1655 }
1656
expected_report_timing_delta(uint32_t delta,uint32_t expected_delta)1657 static bool expected_report_timing_delta(uint32_t delta, uint32_t expected_delta)
1658 {
1659 /*
1660 * On ICL, the OA unit appears to be a bit more relaxed about
1661 * its timing for emitting OA reports (often missing the
1662 * deadline by 1 timestamp).
1663 */
1664 if (IS_ICELAKE(devid))
1665 return delta <= (expected_delta + 3);
1666 else
1667 return delta <= expected_delta;
1668 }
1669
1670 static void
test_oa_exponents(void)1671 test_oa_exponents(void)
1672 {
1673 load_helper_init();
1674 load_helper_run(HIGH);
1675
1676 /* It's asking a lot to sample with a 160 nanosecond period and the
1677 * test can fail due to buffer overflows if it wasn't possible to
1678 * keep up, so we don't start from an exponent of zero...
1679 */
1680 for (int exponent = 5; exponent < 20; exponent++) {
1681 uint64_t properties[] = {
1682 /* Include OA reports in samples */
1683 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1684
1685 /* OA unit configuration */
1686 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1687 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1688 DRM_I915_PERF_PROP_OA_EXPONENT, exponent,
1689 };
1690 struct drm_i915_perf_open_param param = {
1691 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1692 .num_properties = ARRAY_SIZE(properties) / 2,
1693 .properties_ptr = to_user_pointer(properties),
1694 };
1695 uint64_t expected_timestamp_delta = 2ULL << exponent;
1696 size_t format_size = get_oa_format(test_oa_format).size;
1697 size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
1698 format_size);
1699 int max_reports = MAX_OA_BUF_SIZE / format_size;
1700 int buf_size = sample_size * max_reports * 1.5;
1701 uint8_t *buf = calloc(1, buf_size);
1702 int ret, n_timer_reports = 0;
1703 uint32_t matches = 0;
1704 struct {
1705 uint32_t report[64];
1706 } timer_reports[30];
1707
1708 igt_debug("testing OA exponent %d,"
1709 " expected ts delta = %"PRIu64" (%"PRIu64"ns/%.2fus/%.2fms)\n",
1710 exponent, expected_timestamp_delta,
1711 oa_exponent_to_ns(exponent),
1712 oa_exponent_to_ns(exponent) / 1000.0,
1713 oa_exponent_to_ns(exponent) / (1000.0 * 1000.0));
1714
1715 stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
1716
1717 while (n_timer_reports < ARRAY_SIZE(timer_reports)) {
1718 struct drm_i915_perf_record_header *header;
1719
1720 while ((ret = read(stream_fd, buf, buf_size)) < 0 &&
1721 errno == EINTR)
1722 ;
1723
1724 /* igt_debug(" > read %i bytes\n", ret); */
1725
1726 /* We should never have no data. */
1727 igt_assert(ret > 0);
1728
1729 for (int offset = 0;
1730 offset < ret && n_timer_reports < ARRAY_SIZE(timer_reports);
1731 offset += header->size) {
1732 uint32_t *report;
1733
1734 header = (void *)(buf + offset);
1735
1736 if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST) {
1737 igt_assert(!"reached");
1738 break;
1739 }
1740
1741 if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST)
1742 igt_debug("report loss\n");
1743
1744 if (header->type != DRM_I915_PERF_RECORD_SAMPLE)
1745 continue;
1746
1747 report = (void *)(header + 1);
1748
1749 if (!oa_report_is_periodic(exponent, report))
1750 continue;
1751
1752 memcpy(timer_reports[n_timer_reports].report, report,
1753 sizeof(timer_reports[n_timer_reports].report));
1754 n_timer_reports++;
1755 }
1756 }
1757
1758 __perf_close(stream_fd);
1759
1760 igt_debug("report%04i ts=%08x hw_id=0x%08x\n", 0,
1761 timer_reports[0].report[1],
1762 oa_report_get_ctx_id(timer_reports[0].report));
1763 for (int i = 1; i < n_timer_reports; i++) {
1764 uint32_t delta =
1765 timer_reports[i].report[1] - timer_reports[i - 1].report[1];
1766
1767 igt_debug("report%04i ts=%08x hw_id=0x%08x delta=%u %s\n", i,
1768 timer_reports[i].report[1],
1769 oa_report_get_ctx_id(timer_reports[i].report),
1770 delta, expected_report_timing_delta(delta,
1771 expected_timestamp_delta) ? "" : "******");
1772
1773 matches += expected_report_timing_delta(delta,expected_timestamp_delta);
1774 }
1775
1776 igt_debug("matches=%u/%u\n", matches, n_timer_reports - 1);
1777
1778 /* Allow for a couple of errors. */
1779 igt_assert_lte(n_timer_reports - 3, matches);
1780 }
1781
1782 load_helper_stop();
1783 load_helper_fini();
1784 }
1785
1786 /* The OA exponent selects a timestamp counter bit to trigger reports on.
1787 *
1788 * With a 64bit timestamp and least significant bit approx == 80ns then the MSB
1789 * equates to > 40 thousand years and isn't exposed via the i915 perf interface.
1790 *
1791 * The max exponent exposed is expected to be 31, which is still a fairly
1792 * ridiculous period (>5min) but is the maximum exponent where it's still
1793 * possible to use periodic sampling as a means for tracking the overflow of
1794 * 32bit OA report timestamps.
1795 */
1796 static void
test_invalid_oa_exponent(void)1797 test_invalid_oa_exponent(void)
1798 {
1799 uint64_t properties[] = {
1800 /* Include OA reports in samples */
1801 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1802
1803 /* OA unit configuration */
1804 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1805 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1806 DRM_I915_PERF_PROP_OA_EXPONENT, 31, /* maximum exponent expected
1807 to be accepted */
1808 };
1809 struct drm_i915_perf_open_param param = {
1810 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1811 .num_properties = sizeof(properties) / 16,
1812 .properties_ptr = to_user_pointer(properties),
1813 };
1814
1815 stream_fd = __perf_open(drm_fd, ¶m, false);
1816
1817 __perf_close(stream_fd);
1818
1819 for (int i = 32; i < 65; i++) {
1820 properties[7] = i;
1821 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EINVAL);
1822 }
1823 }
1824
1825 /* The lowest periodic sampling exponent equates to a period of 160 nanoseconds
1826 * or a frequency of 6.25MHz which is only possible to request as root by
1827 * default. By default the maximum OA sampling rate is 100KHz
1828 */
1829 static void
test_low_oa_exponent_permissions(void)1830 test_low_oa_exponent_permissions(void)
1831 {
1832 int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
1833 int bad_exponent = max_oa_exponent_for_freq_gt(max_freq);
1834 int ok_exponent = bad_exponent + 1;
1835 uint64_t properties[] = {
1836 /* Include OA reports in samples */
1837 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1838
1839 /* OA unit configuration */
1840 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1841 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1842 DRM_I915_PERF_PROP_OA_EXPONENT, bad_exponent,
1843 };
1844 struct drm_i915_perf_open_param param = {
1845 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1846 .num_properties = sizeof(properties) / 16,
1847 .properties_ptr = to_user_pointer(properties),
1848 };
1849 uint64_t oa_period, oa_freq;
1850
1851 igt_assert_eq(max_freq, 100000);
1852
1853 /* Avoid EACCES errors opening a stream without CAP_SYS_ADMIN */
1854 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 0);
1855
1856 igt_fork(child, 1) {
1857 igt_drop_root();
1858
1859 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EACCES);
1860 }
1861
1862 igt_waitchildren();
1863
1864 properties[7] = ok_exponent;
1865
1866 igt_fork(child, 1) {
1867 igt_drop_root();
1868
1869 stream_fd = __perf_open(drm_fd, ¶m, false);
1870 __perf_close(stream_fd);
1871 }
1872
1873 igt_waitchildren();
1874
1875 oa_period = timebase_scale(2 << ok_exponent);
1876 oa_freq = NSEC_PER_SEC / oa_period;
1877 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", oa_freq - 100);
1878
1879 igt_fork(child, 1) {
1880 igt_drop_root();
1881
1882 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m, EACCES);
1883 }
1884
1885 igt_waitchildren();
1886
1887 /* restore the defaults */
1888 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
1889 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1890 }
1891
1892 static void
test_per_context_mode_unprivileged(void)1893 test_per_context_mode_unprivileged(void)
1894 {
1895 uint64_t properties[] = {
1896 /* Single context sampling */
1897 DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
1898
1899 /* Include OA reports in samples */
1900 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1901
1902 /* OA unit configuration */
1903 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1904 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1905 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
1906 };
1907 struct drm_i915_perf_open_param param = {
1908 .flags = I915_PERF_FLAG_FD_CLOEXEC,
1909 .num_properties = sizeof(properties) / 16,
1910 .properties_ptr = to_user_pointer(properties),
1911 };
1912
1913 /* should be default, but just to be sure... */
1914 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
1915
1916 igt_fork(child, 1) {
1917 drm_intel_context *context;
1918 drm_intel_bufmgr *bufmgr;
1919 uint32_t ctx_id = 0xffffffff; /* invalid id */
1920 int ret;
1921
1922 igt_drop_root();
1923
1924 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
1925 context = drm_intel_gem_context_create(bufmgr);
1926
1927 igt_assert(context);
1928
1929 ret = drm_intel_gem_context_get_id(context, &ctx_id);
1930 igt_assert_eq(ret, 0);
1931 igt_assert_neq(ctx_id, 0xffffffff);
1932
1933 properties[1] = ctx_id;
1934
1935 stream_fd = __perf_open(drm_fd, ¶m, false);
1936 __perf_close(stream_fd);
1937
1938 drm_intel_gem_context_destroy(context);
1939 drm_intel_bufmgr_destroy(bufmgr);
1940 }
1941
1942 igt_waitchildren();
1943 }
1944
1945 static int64_t
get_time(void)1946 get_time(void)
1947 {
1948 struct timespec ts;
1949
1950 clock_gettime(CLOCK_MONOTONIC, &ts);
1951
1952 return ts.tv_sec * 1000000000 + ts.tv_nsec;
1953 }
1954
1955 /* Note: The interface doesn't currently provide strict guarantees or control
1956 * over the upper bound for how long it might take for a POLLIN event after
1957 * some OA report is written by the OA unit.
1958 *
1959 * The plan is to add a property later that gives some control over the maximum
1960 * latency, but for now we expect it is tuned for a fairly low latency
1961 * suitable for applications wanting to provide live feedback for captured
1962 * metrics.
1963 *
1964 * At the time of writing this test the driver was using a fixed 200Hz hrtimer
1965 * regardless of the OA sampling exponent.
1966 *
1967 * There is no lower bound since a stream configured for periodic sampling may
1968 * still contain other automatically triggered reports.
1969 *
1970 * What we try and check for here is that blocking reads don't return EAGAIN
1971 * and that we aren't spending any significant time burning the cpu in
1972 * kernelspace.
1973 */
1974 static void
test_blocking(void)1975 test_blocking(void)
1976 {
1977 /* ~40 milliseconds
1978 *
1979 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
1980 * scheduling (liable to kick in when we make blocking poll()s/reads)
1981 * from interfering with the test.
1982 */
1983 int oa_exponent = max_oa_exponent_for_period_lte(40000000);
1984 uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
1985 uint64_t properties[] = {
1986 /* Include OA reports in samples */
1987 DRM_I915_PERF_PROP_SAMPLE_OA, true,
1988
1989 /* OA unit configuration */
1990 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
1991 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
1992 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
1993 };
1994 struct drm_i915_perf_open_param param = {
1995 .flags = I915_PERF_FLAG_FD_CLOEXEC |
1996 I915_PERF_FLAG_DISABLED,
1997 .num_properties = sizeof(properties) / 16,
1998 .properties_ptr = to_user_pointer(properties),
1999 };
2000 uint8_t buf[1024 * 1024];
2001 struct tms start_times;
2002 struct tms end_times;
2003 int64_t user_ns, kernel_ns;
2004 int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
2005 int64_t test_duration_ns = tick_ns * 1000;
2006
2007 int max_iterations = (test_duration_ns / oa_period) + 2;
2008 int n_extra_iterations = 0;
2009
2010 /* It's a bit tricky to put a lower limit here, but we expect a
2011 * relatively low latency for seeing reports, while we don't currently
2012 * give any control over this in the api.
2013 *
2014 * We assume a maximum latency of 6 millisecond to deliver a POLLIN and
2015 * read() after a new sample is written (46ms per iteration) considering
2016 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
2017 * to check for data and giving some time to read().
2018 */
2019 int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
2020
2021 int64_t start, end;
2022 int n = 0;
2023
2024 stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
2025
2026 times(&start_times);
2027
2028 igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d,"
2029 " estimated max iter. = %d, oa_period = %"PRIu64"ns\n",
2030 (int)tick_ns, test_duration_ns,
2031 min_iterations, max_iterations, oa_period);
2032
2033 /* In the loop we perform blocking polls while the HW is sampling at
2034 * ~25Hz, with the expectation that we spend most of our time blocked
2035 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
2036 * association with this process (verified by looking at stime before
2037 * and after loop).
2038 *
2039 * We're looking to assert that less than 1% of the test duration is
2040 * spent in the kernel dealing with polling and read()ing.
2041 *
2042 * The test runs for a relatively long time considering the very low
2043 * resolution of stime in ticks of typically 10 milliseconds. Since we
2044 * don't know the fractional part of tick values we read from userspace
2045 * so our minimum threshold needs to be >= one tick since any
2046 * measurement might really be +- tick_ns (assuming we effectively get
2047 * floor(real_stime)).
2048 *
2049 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
2050 *
2051 * Also enable the stream just before poll/read to minimize
2052 * the error delta.
2053 */
2054 start = get_time();
2055 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2056 for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
2057 struct drm_i915_perf_record_header *header;
2058 bool timer_report_read = false;
2059 bool non_timer_report_read = false;
2060 int ret;
2061
2062 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2063 errno == EINTR)
2064 ;
2065
2066 igt_assert(ret > 0);
2067
2068 /* For Haswell reports don't contain a well defined reason
2069 * field we so assume all reports to be 'periodic'. For gen8+
2070 * we want to to consider that the HW automatically writes some
2071 * non periodic reports (e.g. on context switch) which might
2072 * lead to more successful read()s than expected due to
2073 * periodic sampling and we don't want these extra reads to
2074 * cause the test to fail...
2075 */
2076 if (intel_gen(devid) >= 8) {
2077 for (int offset = 0; offset < ret; offset += header->size) {
2078 header = (void *)(buf + offset);
2079
2080 if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
2081 uint32_t *report = (void *)(header + 1);
2082
2083 if (oa_report_is_periodic(oa_exponent,
2084 report))
2085 timer_report_read = true;
2086 else
2087 non_timer_report_read = true;
2088 }
2089 }
2090 }
2091
2092 if (non_timer_report_read && !timer_report_read)
2093 n_extra_iterations++;
2094
2095 n++;
2096 }
2097
2098 times(&end_times);
2099
2100 /* Using nanosecond units is fairly silly here, given the tick in-
2101 * precision - ah well, it's consistent with the get_time() units.
2102 */
2103 user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
2104 kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
2105
2106 igt_debug("%d blocking reads during test with ~25Hz OA sampling (expect no more than %d)\n",
2107 n, max_iterations);
2108 igt_debug("%d extra iterations seen, not related to periodic sampling (e.g. context switches)\n",
2109 n_extra_iterations);
2110 igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
2111 user_ns, (int)tick_ns,
2112 (int)start_times.tms_utime, (int)end_times.tms_utime);
2113 igt_debug("time in kernelspace = %"PRIu64"ns (+-%dns) (start stime = %d, end = %d)\n",
2114 kernel_ns, (int)tick_ns,
2115 (int)start_times.tms_stime, (int)end_times.tms_stime);
2116
2117 /* With completely broken blocking (but also not returning an error) we
2118 * could end up with an open loop,
2119 */
2120 igt_assert(n <= (max_iterations + n_extra_iterations));
2121
2122 /* Make sure the driver is reporting new samples with a reasonably
2123 * low latency...
2124 */
2125 igt_assert(n > (min_iterations + n_extra_iterations));
2126
2127 igt_assert(kernel_ns <= (test_duration_ns / 100ull));
2128
2129 __perf_close(stream_fd);
2130 }
2131
2132 static void
test_polling(void)2133 test_polling(void)
2134 {
2135 /* ~40 milliseconds
2136 *
2137 * Having a period somewhat > sysconf(_SC_CLK_TCK) helps to stop
2138 * scheduling (liable to kick in when we make blocking poll()s/reads)
2139 * from interfering with the test.
2140 */
2141 int oa_exponent = max_oa_exponent_for_period_lte(40000000);
2142 uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2143 uint64_t properties[] = {
2144 /* Include OA reports in samples */
2145 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2146
2147 /* OA unit configuration */
2148 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2149 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2150 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2151 };
2152 struct drm_i915_perf_open_param param = {
2153 .flags = I915_PERF_FLAG_FD_CLOEXEC |
2154 I915_PERF_FLAG_DISABLED |
2155 I915_PERF_FLAG_FD_NONBLOCK,
2156 .num_properties = sizeof(properties) / 16,
2157 .properties_ptr = to_user_pointer(properties),
2158 };
2159 uint8_t buf[1024 * 1024];
2160 struct tms start_times;
2161 struct tms end_times;
2162 int64_t user_ns, kernel_ns;
2163 int64_t tick_ns = 1000000000 / sysconf(_SC_CLK_TCK);
2164 int64_t test_duration_ns = tick_ns * 1000;
2165
2166 int max_iterations = (test_duration_ns / oa_period) + 2;
2167 int n_extra_iterations = 0;
2168
2169 /* It's a bit tricky to put a lower limit here, but we expect a
2170 * relatively low latency for seeing reports, while we don't currently
2171 * give any control over this in the api.
2172 *
2173 * We assume a maximum latency of 6 millisecond to deliver a POLLIN and
2174 * read() after a new sample is written (46ms per iteration) considering
2175 * the knowledge that that the driver uses a 200Hz hrtimer (5ms period)
2176 * to check for data and giving some time to read().
2177 */
2178 int min_iterations = (test_duration_ns / (oa_period + 6000000ull));
2179 int64_t start, end;
2180 int n = 0;
2181
2182 stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
2183
2184 times(&start_times);
2185
2186 igt_debug("tick length = %dns, test duration = %"PRIu64"ns, min iter. = %d, max iter. = %d\n",
2187 (int)tick_ns, test_duration_ns,
2188 min_iterations, max_iterations);
2189
2190 /* In the loop we perform blocking polls while the HW is sampling at
2191 * ~25Hz, with the expectation that we spend most of our time blocked
2192 * in the kernel, and shouldn't be burning cpu cycles in the kernel in
2193 * association with this process (verified by looking at stime before
2194 * and after loop).
2195 *
2196 * We're looking to assert that less than 1% of the test duration is
2197 * spent in the kernel dealing with polling and read()ing.
2198 *
2199 * The test runs for a relatively long time considering the very low
2200 * resolution of stime in ticks of typically 10 milliseconds. Since we
2201 * don't know the fractional part of tick values we read from userspace
2202 * so our minimum threshold needs to be >= one tick since any
2203 * measurement might really be +- tick_ns (assuming we effectively get
2204 * floor(real_stime)).
2205 *
2206 * We Loop for 1000 x tick_ns so one tick corresponds to 0.1%
2207 *
2208 * Also enable the stream just before poll/read to minimize
2209 * the error delta.
2210 */
2211 start = get_time();
2212 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2213 for (/* nop */; ((end = get_time()) - start) < test_duration_ns; /* nop */) {
2214 struct pollfd pollfd = { .fd = stream_fd, .events = POLLIN };
2215 struct drm_i915_perf_record_header *header;
2216 bool timer_report_read = false;
2217 bool non_timer_report_read = false;
2218 int ret;
2219
2220 while ((ret = poll(&pollfd, 1, -1)) < 0 &&
2221 errno == EINTR)
2222 ;
2223 igt_assert_eq(ret, 1);
2224 igt_assert(pollfd.revents & POLLIN);
2225
2226 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2227 errno == EINTR)
2228 ;
2229
2230 /* Don't expect to see EAGAIN if we've had a POLLIN event
2231 *
2232 * XXX: actually this is technically overly strict since we do
2233 * knowingly allow false positive POLLIN events. At least in
2234 * the future when supporting context filtering of metrics for
2235 * Gen8+ handled in the kernel then POLLIN events may be
2236 * delivered when we know there are pending reports to process
2237 * but before we've done any filtering to know for certain that
2238 * any reports are destined to be copied to userspace.
2239 *
2240 * Still, for now it's a reasonable sanity check.
2241 */
2242 if (ret < 0)
2243 igt_debug("Unexpected error when reading after poll = %d\n", errno);
2244 igt_assert_neq(ret, -1);
2245
2246 /* For Haswell reports don't contain a well defined reason
2247 * field we so assume all reports to be 'periodic'. For gen8+
2248 * we want to to consider that the HW automatically writes some
2249 * non periodic reports (e.g. on context switch) which might
2250 * lead to more successful read()s than expected due to
2251 * periodic sampling and we don't want these extra reads to
2252 * cause the test to fail...
2253 */
2254 if (intel_gen(devid) >= 8) {
2255 for (int offset = 0; offset < ret; offset += header->size) {
2256 header = (void *)(buf + offset);
2257
2258 if (header->type == DRM_I915_PERF_RECORD_SAMPLE) {
2259 uint32_t *report = (void *)(header + 1);
2260
2261 if (oa_report_is_periodic(oa_exponent, report))
2262 timer_report_read = true;
2263 else
2264 non_timer_report_read = true;
2265 }
2266 }
2267 }
2268
2269 if (non_timer_report_read && !timer_report_read)
2270 n_extra_iterations++;
2271
2272 /* At this point, after consuming pending reports (and hoping
2273 * the scheduler hasn't stopped us for too long we now
2274 * expect EAGAIN on read.
2275 */
2276 while ((ret = read(stream_fd, buf, sizeof(buf))) < 0 &&
2277 errno == EINTR)
2278 ;
2279 igt_assert_eq(ret, -1);
2280 igt_assert_eq(errno, EAGAIN);
2281
2282 n++;
2283 }
2284
2285 times(&end_times);
2286
2287 /* Using nanosecond units is fairly silly here, given the tick in-
2288 * precision - ah well, it's consistent with the get_time() units.
2289 */
2290 user_ns = (end_times.tms_utime - start_times.tms_utime) * tick_ns;
2291 kernel_ns = (end_times.tms_stime - start_times.tms_stime) * tick_ns;
2292
2293 igt_debug("%d blocking reads during test with ~25Hz OA sampling (expect no more than %d)\n",
2294 n, max_iterations);
2295 igt_debug("%d extra iterations seen, not related to periodic sampling (e.g. context switches)\n",
2296 n_extra_iterations);
2297 igt_debug("time in userspace = %"PRIu64"ns (+-%dns) (start utime = %d, end = %d)\n",
2298 user_ns, (int)tick_ns,
2299 (int)start_times.tms_utime, (int)end_times.tms_utime);
2300 igt_debug("time in kernelspace = %"PRIu64"ns (+-%dns) (start stime = %d, end = %d)\n",
2301 kernel_ns, (int)tick_ns,
2302 (int)start_times.tms_stime, (int)end_times.tms_stime);
2303
2304 /* With completely broken blocking while polling (but still somehow
2305 * reporting a POLLIN event) we could end up with an open loop.
2306 */
2307 igt_assert(n <= (max_iterations + n_extra_iterations));
2308
2309 /* Make sure the driver is reporting new samples with a reasonably
2310 * low latency...
2311 */
2312 igt_assert(n > (min_iterations + n_extra_iterations));
2313
2314 igt_assert(kernel_ns <= (test_duration_ns / 100ull));
2315
2316 __perf_close(stream_fd);
2317 }
2318
2319 static void
test_buffer_fill(void)2320 test_buffer_fill(void)
2321 {
2322 /* ~5 micro second period */
2323 int oa_exponent = max_oa_exponent_for_period_lte(5000);
2324 uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2325 uint64_t properties[] = {
2326 /* Include OA reports in samples */
2327 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2328
2329 /* OA unit configuration */
2330 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2331 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2332 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2333 };
2334 struct drm_i915_perf_open_param param = {
2335 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2336 .num_properties = sizeof(properties) / 16,
2337 .properties_ptr = to_user_pointer(properties),
2338 };
2339 struct drm_i915_perf_record_header *header;
2340 int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
2341 uint8_t *buf = malloc(buf_size);
2342 int len;
2343 size_t oa_buf_size = MAX_OA_BUF_SIZE;
2344 size_t report_size = get_oa_format(test_oa_format).size;
2345 int n_full_oa_reports = oa_buf_size / report_size;
2346 uint64_t fill_duration = n_full_oa_reports * oa_period;
2347
2348 igt_assert(fill_duration < 1000000000);
2349
2350 stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
2351
2352 for (int i = 0; i < 5; i++) {
2353 bool overflow_seen;
2354 uint32_t n_periodic_reports;
2355 uint32_t first_timestamp = 0, last_timestamp = 0;
2356 uint32_t last_periodic_report[64];
2357
2358 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2359
2360 nanosleep(&(struct timespec){ .tv_sec = 0,
2361 .tv_nsec = fill_duration * 1.25 },
2362 NULL);
2363
2364 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2365 ;
2366
2367 igt_assert_neq(len, -1);
2368
2369 overflow_seen = false;
2370 for (int offset = 0; offset < len; offset += header->size) {
2371 header = (void *)(buf + offset);
2372
2373 if (header->type == DRM_I915_PERF_RECORD_OA_BUFFER_LOST)
2374 overflow_seen = true;
2375 }
2376
2377 igt_assert_eq(overflow_seen, true);
2378
2379 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2380
2381 igt_debug("fill_duration = %"PRIu64"ns, oa_exponent = %u\n",
2382 fill_duration, oa_exponent);
2383
2384 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2385
2386 nanosleep(&(struct timespec){ .tv_sec = 0,
2387 .tv_nsec = fill_duration / 2 },
2388 NULL);
2389
2390 n_periodic_reports = 0;
2391
2392 /* Because of the race condition between notification of new
2393 * reports and reports landing in memory, we need to rely on
2394 * timestamps to figure whether we've read enough of them.
2395 */
2396 while (((last_timestamp - first_timestamp) * oa_period) < (fill_duration / 2)) {
2397
2398 igt_debug("dts=%u elapsed=%"PRIu64" duration=%"PRIu64"\n",
2399 last_timestamp - first_timestamp,
2400 (last_timestamp - first_timestamp) * oa_period,
2401 fill_duration / 2);
2402
2403 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2404 ;
2405
2406 igt_assert_neq(len, -1);
2407
2408 for (int offset = 0; offset < len; offset += header->size) {
2409 uint32_t *report;
2410
2411 header = (void *) (buf + offset);
2412 report = (void *) (header + 1);
2413
2414 switch (header->type) {
2415 case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
2416 igt_debug("report loss, trying again\n");
2417 break;
2418 case DRM_I915_PERF_RECORD_SAMPLE:
2419 igt_debug(" > report ts=%u"
2420 " ts_delta_last_periodic=%8u is_timer=%i ctx_id=%8x nb_periodic=%u\n",
2421 report[1],
2422 n_periodic_reports > 0 ? report[1] - last_periodic_report[1] : 0,
2423 oa_report_is_periodic(oa_exponent, report),
2424 oa_report_get_ctx_id(report),
2425 n_periodic_reports);
2426
2427 if (first_timestamp == 0)
2428 first_timestamp = report[1];
2429 last_timestamp = report[1];
2430
2431 if (oa_report_is_periodic(oa_exponent, report)) {
2432 memcpy(last_periodic_report, report,
2433 sizeof(last_periodic_report));
2434 n_periodic_reports++;
2435 }
2436 break;
2437 case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
2438 igt_assert(!"unexpected overflow");
2439 break;
2440 }
2441 }
2442 }
2443
2444 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2445
2446 igt_debug("%f < %zu < %f\n",
2447 report_size * n_full_oa_reports * 0.45,
2448 n_periodic_reports * report_size,
2449 report_size * n_full_oa_reports * 0.55);
2450
2451 igt_assert(n_periodic_reports * report_size >
2452 report_size * n_full_oa_reports * 0.45);
2453 igt_assert(n_periodic_reports * report_size <
2454 report_size * n_full_oa_reports * 0.55);
2455 }
2456
2457 free(buf);
2458
2459 __perf_close(stream_fd);
2460 }
2461
2462 static void
test_enable_disable(void)2463 test_enable_disable(void)
2464 {
2465 /* ~5 micro second period */
2466 int oa_exponent = max_oa_exponent_for_period_lte(5000);
2467 uint64_t oa_period = oa_exponent_to_ns(oa_exponent);
2468 uint64_t properties[] = {
2469 /* Include OA reports in samples */
2470 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2471
2472 /* OA unit configuration */
2473 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2474 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2475 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2476 };
2477 struct drm_i915_perf_open_param param = {
2478 .flags = I915_PERF_FLAG_FD_CLOEXEC |
2479 I915_PERF_FLAG_DISABLED, /* Verify we start disabled */
2480 .num_properties = sizeof(properties) / 16,
2481 .properties_ptr = to_user_pointer(properties),
2482 };
2483 int buf_size = 65536 * (256 + sizeof(struct drm_i915_perf_record_header));
2484 uint8_t *buf = malloc(buf_size);
2485 size_t oa_buf_size = MAX_OA_BUF_SIZE;
2486 size_t report_size = get_oa_format(test_oa_format).size;
2487 int n_full_oa_reports = oa_buf_size / report_size;
2488 uint64_t fill_duration = n_full_oa_reports * oa_period;
2489
2490 load_helper_init();
2491 load_helper_run(HIGH);
2492
2493 stream_fd = __perf_open(drm_fd, ¶m, true /* prevent_pm */);
2494
2495 for (int i = 0; i < 5; i++) {
2496 int len;
2497 uint32_t n_periodic_reports;
2498 struct drm_i915_perf_record_header *header;
2499 uint32_t first_timestamp = 0, last_timestamp = 0;
2500 uint32_t last_periodic_report[64];
2501
2502 /* Giving enough time for an overflow might help catch whether
2503 * the OA unit has been enabled even if the driver might at
2504 * least avoid copying reports while disabled.
2505 */
2506 nanosleep(&(struct timespec){ .tv_sec = 0,
2507 .tv_nsec = fill_duration * 1.25 },
2508 NULL);
2509
2510 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2511 ;
2512
2513 igt_assert_eq(len, -1);
2514 igt_assert_eq(errno, EIO);
2515
2516 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2517
2518 nanosleep(&(struct timespec){ .tv_sec = 0,
2519 .tv_nsec = fill_duration / 2 },
2520 NULL);
2521
2522 n_periodic_reports = 0;
2523
2524 /* Because of the race condition between notification of new
2525 * reports and reports landing in memory, we need to rely on
2526 * timestamps to figure whether we've read enough of them.
2527 */
2528 while (((last_timestamp - first_timestamp) * oa_period) < (fill_duration / 2)) {
2529
2530 while ((len = read(stream_fd, buf, buf_size)) == -1 && errno == EINTR)
2531 ;
2532
2533 igt_assert_neq(len, -1);
2534
2535 for (int offset = 0; offset < len; offset += header->size) {
2536 uint32_t *report;
2537
2538 header = (void *) (buf + offset);
2539 report = (void *) (header + 1);
2540
2541 switch (header->type) {
2542 case DRM_I915_PERF_RECORD_OA_REPORT_LOST:
2543 break;
2544 case DRM_I915_PERF_RECORD_SAMPLE:
2545 if (first_timestamp == 0)
2546 first_timestamp = report[1];
2547 last_timestamp = report[1];
2548
2549 igt_debug(" > report ts=%8x"
2550 " ts_delta_last_periodic=%s%8u"
2551 " is_timer=%i ctx_id=0x%8x\n",
2552 report[1],
2553 oa_report_is_periodic(oa_exponent, report) ? " " : "*",
2554 n_periodic_reports > 0 ? (report[1] - last_periodic_report[1]) : 0,
2555 oa_report_is_periodic(oa_exponent, report),
2556 oa_report_get_ctx_id(report));
2557
2558 if (oa_report_is_periodic(oa_exponent, report)) {
2559 memcpy(last_periodic_report, report,
2560 sizeof(last_periodic_report));
2561
2562 /* We want to measure only the
2563 * periodic reports, ctx-switch
2564 * might inflate the content of
2565 * the buffer and skew or
2566 * measurement.
2567 */
2568 n_periodic_reports++;
2569 }
2570 break;
2571 case DRM_I915_PERF_RECORD_OA_BUFFER_LOST:
2572 igt_assert(!"unexpected overflow");
2573 break;
2574 }
2575 }
2576
2577 }
2578
2579 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2580
2581 igt_debug("%f < %zu < %f\n",
2582 report_size * n_full_oa_reports * 0.45,
2583 n_periodic_reports * report_size,
2584 report_size * n_full_oa_reports * 0.55);
2585
2586 igt_assert((n_periodic_reports * report_size) >
2587 (report_size * n_full_oa_reports * 0.45));
2588 igt_assert((n_periodic_reports * report_size) <
2589 report_size * n_full_oa_reports * 0.55);
2590
2591
2592 /* It's considered an error to read a stream while it's disabled
2593 * since it would block indefinitely...
2594 */
2595 len = read(stream_fd, buf, buf_size);
2596
2597 igt_assert_eq(len, -1);
2598 igt_assert_eq(errno, EIO);
2599 }
2600
2601 free(buf);
2602
2603 __perf_close(stream_fd);
2604
2605 load_helper_stop();
2606 load_helper_fini();
2607 }
2608
2609 static void
test_short_reads(void)2610 test_short_reads(void)
2611 {
2612 int oa_exponent = max_oa_exponent_for_period_lte(5000);
2613 uint64_t properties[] = {
2614 /* Include OA reports in samples */
2615 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2616
2617 /* OA unit configuration */
2618 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2619 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2620 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2621 };
2622 struct drm_i915_perf_open_param param = {
2623 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2624 .num_properties = sizeof(properties) / 16,
2625 .properties_ptr = to_user_pointer(properties),
2626 };
2627 size_t record_size = 256 + sizeof(struct drm_i915_perf_record_header);
2628 size_t page_size = sysconf(_SC_PAGE_SIZE);
2629 int zero_fd = open("/dev/zero", O_RDWR|O_CLOEXEC);
2630 uint8_t *pages = mmap(NULL, page_size * 2,
2631 PROT_READ|PROT_WRITE, MAP_PRIVATE, zero_fd, 0);
2632 struct drm_i915_perf_record_header *header;
2633 int ret;
2634
2635 igt_assert_neq(zero_fd, -1);
2636 close(zero_fd);
2637 zero_fd = -1;
2638
2639 igt_assert(pages);
2640
2641 ret = mprotect(pages + page_size, page_size, PROT_NONE);
2642 igt_assert_eq(ret, 0);
2643
2644 stream_fd = __perf_open(drm_fd, ¶m, false);
2645
2646 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 5000000 }, NULL);
2647
2648 /* At this point there should be lots of pending reports to read */
2649
2650 /* A read that can return at least one record should result in a short
2651 * read not an EFAULT if the buffer is smaller than the requested read
2652 * size...
2653 *
2654 * Expect to see a sample record here, but at least skip over any
2655 * _RECORD_LOST notifications.
2656 */
2657 do {
2658 header = (void *)(pages + page_size - record_size);
2659 ret = read(stream_fd,
2660 header,
2661 page_size);
2662 igt_assert(ret > 0);
2663 } while (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
2664
2665 igt_assert_eq(ret, record_size);
2666
2667 /* A read that can't return a single record because it would result
2668 * in a fault on buffer overrun should result in an EFAULT error...
2669 */
2670 ret = read(stream_fd, pages + page_size - 16, page_size);
2671 igt_assert_eq(ret, -1);
2672 igt_assert_eq(errno, EFAULT);
2673
2674 /* A read that can't return a single record because the buffer is too
2675 * small should result in an ENOSPC error..
2676 *
2677 * Again, skip over _RECORD_LOST records (smaller than record_size/2)
2678 */
2679 do {
2680 header = (void *)(pages + page_size - record_size / 2);
2681 ret = read(stream_fd,
2682 header,
2683 record_size / 2);
2684 } while (ret > 0 && header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST);
2685
2686 igt_assert_eq(ret, -1);
2687 igt_assert_eq(errno, ENOSPC);
2688
2689 __perf_close(stream_fd);
2690
2691 munmap(pages, page_size * 2);
2692 }
2693
2694 static void
test_non_sampling_read_error(void)2695 test_non_sampling_read_error(void)
2696 {
2697 uint64_t properties[] = {
2698 /* XXX: even without periodic sampling we have to
2699 * specify at least one sample layout property...
2700 */
2701 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2702
2703 /* OA unit configuration */
2704 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2705 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2706
2707 /* XXX: no sampling exponent */
2708 };
2709 struct drm_i915_perf_open_param param = {
2710 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2711 .num_properties = sizeof(properties) / 16,
2712 .properties_ptr = to_user_pointer(properties),
2713 };
2714 int ret;
2715 uint8_t buf[1024];
2716
2717 stream_fd = __perf_open(drm_fd, ¶m, false);
2718
2719 ret = read(stream_fd, buf, sizeof(buf));
2720 igt_assert_eq(ret, -1);
2721 igt_assert_eq(errno, EIO);
2722
2723 __perf_close(stream_fd);
2724 }
2725
2726 /* Check that attempts to read from a stream while it is disable will return
2727 * EIO instead of blocking indefinitely.
2728 */
2729 static void
test_disabled_read_error(void)2730 test_disabled_read_error(void)
2731 {
2732 int oa_exponent = 5; /* 5 micro seconds */
2733 uint64_t properties[] = {
2734 /* XXX: even without periodic sampling we have to
2735 * specify at least one sample layout property...
2736 */
2737 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2738
2739 /* OA unit configuration */
2740 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2741 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2742 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
2743 };
2744 struct drm_i915_perf_open_param param = {
2745 .flags = I915_PERF_FLAG_FD_CLOEXEC |
2746 I915_PERF_FLAG_DISABLED, /* XXX: open disabled */
2747 .num_properties = sizeof(properties) / 16,
2748 .properties_ptr = to_user_pointer(properties),
2749 };
2750 uint32_t oa_report0[64];
2751 uint32_t oa_report1[64];
2752 uint32_t buf[128] = { 0 };
2753 int ret;
2754
2755 stream_fd = __perf_open(drm_fd, ¶m, false);
2756
2757 ret = read(stream_fd, buf, sizeof(buf));
2758 igt_assert_eq(ret, -1);
2759 igt_assert_eq(errno, EIO);
2760
2761 __perf_close(stream_fd);
2762
2763
2764 param.flags &= ~I915_PERF_FLAG_DISABLED;
2765 stream_fd = __perf_open(drm_fd, ¶m, false);
2766
2767 read_2_oa_reports(test_oa_format,
2768 oa_exponent,
2769 oa_report0,
2770 oa_report1,
2771 false); /* not just timer reports */
2772
2773 do_ioctl(stream_fd, I915_PERF_IOCTL_DISABLE, 0);
2774
2775 ret = read(stream_fd, buf, sizeof(buf));
2776 igt_assert_eq(ret, -1);
2777 igt_assert_eq(errno, EIO);
2778
2779 do_ioctl(stream_fd, I915_PERF_IOCTL_ENABLE, 0);
2780
2781 read_2_oa_reports(test_oa_format,
2782 oa_exponent,
2783 oa_report0,
2784 oa_report1,
2785 false); /* not just timer reports */
2786
2787 __perf_close(stream_fd);
2788 }
2789
2790 static void
test_mi_rpc(void)2791 test_mi_rpc(void)
2792 {
2793 uint64_t properties[] = {
2794 /* Note: we have to specify at least one sample property even
2795 * though we aren't interested in samples in this case.
2796 */
2797 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2798
2799 /* OA unit configuration */
2800 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2801 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2802
2803 /* Note: no OA exponent specified in this case */
2804 };
2805 struct drm_i915_perf_open_param param = {
2806 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2807 .num_properties = sizeof(properties) / 16,
2808 .properties_ptr = to_user_pointer(properties),
2809 };
2810 drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
2811 drm_intel_context *context;
2812 struct intel_batchbuffer *batch;
2813 drm_intel_bo *bo;
2814 uint32_t *report32;
2815 int ret;
2816
2817 stream_fd = __perf_open(drm_fd, ¶m, false);
2818
2819 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
2820
2821 context = drm_intel_gem_context_create(bufmgr);
2822 igt_assert(context);
2823
2824 batch = intel_batchbuffer_alloc(bufmgr, devid);
2825
2826 bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
2827
2828 ret = drm_intel_bo_map(bo, true);
2829 igt_assert_eq(ret, 0);
2830
2831 memset(bo->virtual, 0x80, 4096);
2832 drm_intel_bo_unmap(bo);
2833
2834 emit_report_perf_count(batch,
2835 bo, /* dst */
2836 0, /* dst offset in bytes */
2837 0xdeadbeef); /* report ID */
2838
2839 intel_batchbuffer_flush_with_context(batch, context);
2840
2841 ret = drm_intel_bo_map(bo, false /* write enable */);
2842 igt_assert_eq(ret, 0);
2843
2844 report32 = bo->virtual;
2845 igt_assert_eq(report32[0], 0xdeadbeef); /* report ID */
2846 igt_assert_neq(report32[1], 0); /* timestamp */
2847
2848 igt_assert_neq(report32[63], 0x80808080); /* end of report */
2849 igt_assert_eq(report32[64], 0x80808080); /* after 256 byte report */
2850
2851 drm_intel_bo_unmap(bo);
2852 drm_intel_bo_unreference(bo);
2853 intel_batchbuffer_free(batch);
2854 drm_intel_gem_context_destroy(context);
2855 drm_intel_bufmgr_destroy(bufmgr);
2856 __perf_close(stream_fd);
2857 }
2858
2859 static void
emit_stall_timestamp_and_rpc(struct intel_batchbuffer * batch,drm_intel_bo * dst,int timestamp_offset,int report_dst_offset,uint32_t report_id)2860 emit_stall_timestamp_and_rpc(struct intel_batchbuffer *batch,
2861 drm_intel_bo *dst,
2862 int timestamp_offset,
2863 int report_dst_offset,
2864 uint32_t report_id)
2865 {
2866 uint32_t pipe_ctl_flags = (PIPE_CONTROL_CS_STALL |
2867 PIPE_CONTROL_RENDER_TARGET_FLUSH |
2868 PIPE_CONTROL_WRITE_TIMESTAMP);
2869
2870 if (intel_gen(devid) >= 8) {
2871 BEGIN_BATCH(5, 1);
2872 OUT_BATCH(GFX_OP_PIPE_CONTROL | (6 - 2));
2873 OUT_BATCH(pipe_ctl_flags);
2874 OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2875 timestamp_offset);
2876 OUT_BATCH(0); /* imm lower */
2877 OUT_BATCH(0); /* imm upper */
2878 ADVANCE_BATCH();
2879 } else {
2880 BEGIN_BATCH(5, 1);
2881 OUT_BATCH(GFX_OP_PIPE_CONTROL | (5 - 2));
2882 OUT_BATCH(pipe_ctl_flags);
2883 OUT_RELOC(dst, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2884 timestamp_offset);
2885 OUT_BATCH(0); /* imm lower */
2886 OUT_BATCH(0); /* imm upper */
2887 ADVANCE_BATCH();
2888 }
2889
2890 emit_report_perf_count(batch, dst, report_dst_offset, report_id);
2891 }
2892
2893 /* Tests the INTEL_performance_query use case where an unprivileged process
2894 * should be able to configure the OA unit for per-context metrics (for a
2895 * context associated with that process' drm file descriptor) and the counters
2896 * should only relate to that specific context.
2897 *
2898 * Unfortunately only Haswell limits the progression of OA counters for a
2899 * single context and so this unit test is Haswell specific. For Gen8+ although
2900 * reports read via i915 perf can be filtered for a single context the counters
2901 * themselves always progress as global/system-wide counters affected by all
2902 * contexts.
2903 */
2904 static void
hsw_test_single_ctx_counters(void)2905 hsw_test_single_ctx_counters(void)
2906 {
2907 uint64_t properties[] = {
2908 DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
2909
2910 /* Note: we have to specify at least one sample property even
2911 * though we aren't interested in samples in this case
2912 */
2913 DRM_I915_PERF_PROP_SAMPLE_OA, true,
2914
2915 /* OA unit configuration */
2916 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
2917 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
2918
2919 /* Note: no OA exponent specified in this case */
2920 };
2921 struct drm_i915_perf_open_param param = {
2922 .flags = I915_PERF_FLAG_FD_CLOEXEC,
2923 .num_properties = sizeof(properties) / 16,
2924 .properties_ptr = to_user_pointer(properties),
2925 };
2926
2927 /* should be default, but just to be sure... */
2928 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
2929
2930 igt_fork(child, 1) {
2931 drm_intel_bufmgr *bufmgr;
2932 drm_intel_context *context0, *context1;
2933 struct intel_batchbuffer *batch;
2934 struct igt_buf src[3], dst[3];
2935 drm_intel_bo *bo;
2936 uint32_t *report0_32, *report1_32;
2937 uint64_t timestamp0_64, timestamp1_64;
2938 uint32_t delta_ts64, delta_oa32;
2939 uint64_t delta_ts64_ns, delta_oa32_ns;
2940 uint32_t delta_delta;
2941 int n_samples_written;
2942 int width = 800;
2943 int height = 600;
2944 uint32_t ctx_id = 0xffffffff; /* invalid id */
2945 int ret;
2946
2947 igt_drop_root();
2948
2949 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
2950 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
2951
2952 for (int i = 0; i < ARRAY_SIZE(src); i++) {
2953 scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
2954 scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
2955 }
2956
2957 batch = intel_batchbuffer_alloc(bufmgr, devid);
2958
2959 context0 = drm_intel_gem_context_create(bufmgr);
2960 igt_assert(context0);
2961
2962 context1 = drm_intel_gem_context_create(bufmgr);
2963 igt_assert(context1);
2964
2965 igt_debug("submitting warm up render_copy\n");
2966
2967 /* Submit some early, unmeasured, work to the context we want
2968 * to measure to try and catch issues with i915-perf
2969 * initializing the HW context ID for filtering.
2970 *
2971 * We do this because i915-perf single context filtering had
2972 * previously only relied on a hook into context pinning to
2973 * initialize the HW context ID, instead of also trying to
2974 * determine the HW ID while opening the stream, in case it
2975 * has already been pinned.
2976 *
2977 * This wasn't noticed by the previous unit test because we
2978 * were opening the stream while the context hadn't been
2979 * touched or pinned yet and so it worked out correctly to wait
2980 * for the pinning hook.
2981 *
2982 * Now a buggy version of i915-perf will fail to measure
2983 * anything for context0 once this initial render_copy() ends
2984 * up pinning the context since there won't ever be a pinning
2985 * hook callback.
2986 */
2987 render_copy(batch,
2988 context0,
2989 &src[0], 0, 0, width, height,
2990 &dst[0], 0, 0);
2991
2992 ret = drm_intel_gem_context_get_id(context0, &ctx_id);
2993 igt_assert_eq(ret, 0);
2994 igt_assert_neq(ctx_id, 0xffffffff);
2995 properties[1] = ctx_id;
2996
2997 intel_batchbuffer_flush_with_context(batch, context0);
2998
2999 scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
3000 scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
3001
3002 igt_debug("opening i915-perf stream\n");
3003 stream_fd = __perf_open(drm_fd, ¶m, false);
3004
3005 bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
3006
3007 ret = drm_intel_bo_map(bo, true /* write enable */);
3008 igt_assert_eq(ret, 0);
3009
3010 memset(bo->virtual, 0x80, 4096);
3011 drm_intel_bo_unmap(bo);
3012
3013 emit_stall_timestamp_and_rpc(batch,
3014 bo,
3015 512 /* timestamp offset */,
3016 0, /* report dst offset */
3017 0xdeadbeef); /* report id */
3018
3019 /* Explicitly flush here (even though the render_copy() call
3020 * will itself flush before/after the copy) to clarify that
3021 * that the PIPE_CONTROL + MI_RPC commands will be in a
3022 * separate batch from the copy.
3023 */
3024 intel_batchbuffer_flush_with_context(batch, context0);
3025
3026 render_copy(batch,
3027 context0,
3028 &src[0], 0, 0, width, height,
3029 &dst[0], 0, 0);
3030
3031 /* Another redundant flush to clarify batch bo is free to reuse */
3032 intel_batchbuffer_flush_with_context(batch, context0);
3033
3034 /* submit two copies on the other context to avoid a false
3035 * positive in case the driver somehow ended up filtering for
3036 * context1
3037 */
3038 render_copy(batch,
3039 context1,
3040 &src[1], 0, 0, width, height,
3041 &dst[1], 0, 0);
3042
3043 render_copy(batch,
3044 context1,
3045 &src[2], 0, 0, width, height,
3046 &dst[2], 0, 0);
3047
3048 /* And another */
3049 intel_batchbuffer_flush_with_context(batch, context1);
3050
3051 emit_stall_timestamp_and_rpc(batch,
3052 bo,
3053 520 /* timestamp offset */,
3054 256, /* report dst offset */
3055 0xbeefbeef); /* report id */
3056
3057 intel_batchbuffer_flush_with_context(batch, context0);
3058
3059 ret = drm_intel_bo_map(bo, false /* write enable */);
3060 igt_assert_eq(ret, 0);
3061
3062 report0_32 = bo->virtual;
3063 igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
3064 igt_assert_neq(report0_32[1], 0); /* timestamp */
3065
3066 report1_32 = report0_32 + 64;
3067 igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
3068 igt_assert_neq(report1_32[1], 0); /* timestamp */
3069
3070 print_reports(report0_32, report1_32,
3071 lookup_format(test_oa_format));
3072
3073 /* A40 == N samples written to all render targets */
3074 n_samples_written = report1_32[43] - report0_32[43];
3075
3076 igt_debug("n samples written = %d\n", n_samples_written);
3077 igt_assert_eq(n_samples_written, width * height);
3078
3079 igt_debug("timestamp32 0 = %u\n", report0_32[1]);
3080 igt_debug("timestamp32 1 = %u\n", report1_32[1]);
3081
3082 timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
3083 timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
3084
3085 igt_debug("timestamp64 0 = %"PRIu64"\n", timestamp0_64);
3086 igt_debug("timestamp64 1 = %"PRIu64"\n", timestamp1_64);
3087
3088 delta_ts64 = timestamp1_64 - timestamp0_64;
3089 delta_oa32 = report1_32[1] - report0_32[1];
3090
3091 /* sanity check that we can pass the delta to timebase_scale */
3092 igt_assert(delta_ts64 < UINT32_MAX);
3093 delta_oa32_ns = timebase_scale(delta_oa32);
3094 delta_ts64_ns = timebase_scale(delta_ts64);
3095
3096 igt_debug("ts32 delta = %u, = %uns\n",
3097 delta_oa32, (unsigned)delta_oa32_ns);
3098 igt_debug("ts64 delta = %u, = %uns\n",
3099 delta_ts64, (unsigned)delta_ts64_ns);
3100
3101 /* The delta as calculated via the PIPE_CONTROL timestamp or
3102 * the OA report timestamps should be almost identical but
3103 * allow a 320 nanoseconds margin.
3104 */
3105 delta_delta = delta_ts64_ns > delta_oa32_ns ?
3106 (delta_ts64_ns - delta_oa32_ns) :
3107 (delta_oa32_ns - delta_ts64_ns);
3108 igt_assert(delta_delta <= 320);
3109
3110 for (int i = 0; i < ARRAY_SIZE(src); i++) {
3111 drm_intel_bo_unreference(src[i].bo);
3112 drm_intel_bo_unreference(dst[i].bo);
3113 }
3114
3115 drm_intel_bo_unmap(bo);
3116 drm_intel_bo_unreference(bo);
3117 intel_batchbuffer_free(batch);
3118 drm_intel_gem_context_destroy(context0);
3119 drm_intel_gem_context_destroy(context1);
3120 drm_intel_bufmgr_destroy(bufmgr);
3121 __perf_close(stream_fd);
3122 }
3123
3124 igt_waitchildren();
3125 }
3126
3127 /* Tests the INTEL_performance_query use case where an unprivileged process
3128 * should be able to configure the OA unit for per-context metrics (for a
3129 * context associated with that process' drm file descriptor) and the counters
3130 * should only relate to that specific context.
3131 *
3132 * For Gen8+ although reports read via i915 perf can be filtered for a single
3133 * context the counters themselves always progress as global/system-wide
3134 * counters affected by all contexts. To support the INTEL_performance_query
3135 * use case on Gen8+ it's necessary to combine OABUFFER and
3136 * MI_REPORT_PERF_COUNT reports so that counter normalisation can take into
3137 * account context-switch reports and factor out any counter progression not
3138 * associated with the current context.
3139 */
3140 static void
gen8_test_single_ctx_render_target_writes_a_counter(void)3141 gen8_test_single_ctx_render_target_writes_a_counter(void)
3142 {
3143 int oa_exponent = max_oa_exponent_for_period_lte(1000000);
3144 uint64_t properties[] = {
3145 DRM_I915_PERF_PROP_CTX_HANDLE, UINT64_MAX, /* updated below */
3146
3147 /* Note: we have to specify at least one sample property even
3148 * though we aren't interested in samples in this case
3149 */
3150 DRM_I915_PERF_PROP_SAMPLE_OA, true,
3151
3152 /* OA unit configuration */
3153 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
3154 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3155 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exponent,
3156
3157 /* Note: no OA exponent specified in this case */
3158 };
3159 struct drm_i915_perf_open_param param = {
3160 .flags = I915_PERF_FLAG_FD_CLOEXEC,
3161 .num_properties = ARRAY_SIZE(properties) / 2,
3162 .properties_ptr = to_user_pointer(properties),
3163 };
3164 size_t format_size = get_oa_format(test_oa_format).size;
3165 size_t sample_size = (sizeof(struct drm_i915_perf_record_header) +
3166 format_size);
3167 int max_reports = MAX_OA_BUF_SIZE / format_size;
3168 int buf_size = sample_size * max_reports * 1.5;
3169 int child_ret;
3170 uint8_t *buf = malloc(buf_size);
3171 ssize_t len;
3172 struct igt_helper_process child = {};
3173
3174 /* should be default, but just to be sure... */
3175 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
3176
3177 do {
3178
3179 igt_fork_helper(&child) {
3180 struct drm_i915_perf_record_header *header;
3181 drm_intel_bufmgr *bufmgr;
3182 drm_intel_context *context0, *context1;
3183 struct intel_batchbuffer *batch;
3184 struct igt_buf src[3], dst[3];
3185 drm_intel_bo *bo;
3186 uint32_t *report0_32, *report1_32;
3187 uint32_t *prev, *lprev = NULL;
3188 uint64_t timestamp0_64, timestamp1_64;
3189 uint32_t delta_ts64, delta_oa32;
3190 uint64_t delta_ts64_ns, delta_oa32_ns;
3191 uint32_t delta_delta;
3192 int width = 800;
3193 int height = 600;
3194 uint32_t ctx_id = 0xffffffff; /* invalid handle */
3195 uint32_t ctx1_id = 0xffffffff; /* invalid handle */
3196 uint32_t current_ctx_id = 0xffffffff;
3197 uint32_t n_invalid_ctx = 0;
3198 int ret;
3199 struct accumulator accumulator = {
3200 .format = test_oa_format
3201 };
3202
3203 bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
3204 drm_intel_bufmgr_gem_enable_reuse(bufmgr);
3205
3206 for (int i = 0; i < ARRAY_SIZE(src); i++) {
3207 scratch_buf_init(bufmgr, &src[i], width, height, 0xff0000ff);
3208 scratch_buf_init(bufmgr, &dst[i], width, height, 0x00ff00ff);
3209 }
3210
3211 batch = intel_batchbuffer_alloc(bufmgr, devid);
3212
3213 context0 = drm_intel_gem_context_create(bufmgr);
3214 igt_assert(context0);
3215
3216 context1 = drm_intel_gem_context_create(bufmgr);
3217 igt_assert(context1);
3218
3219 igt_debug("submitting warm up render_copy\n");
3220
3221 /* Submit some early, unmeasured, work to the context we want
3222 * to measure to try and catch issues with i915-perf
3223 * initializing the HW context ID for filtering.
3224 *
3225 * We do this because i915-perf single context filtering had
3226 * previously only relied on a hook into context pinning to
3227 * initialize the HW context ID, instead of also trying to
3228 * determine the HW ID while opening the stream, in case it
3229 * has already been pinned.
3230 *
3231 * This wasn't noticed by the previous unit test because we
3232 * were opening the stream while the context hadn't been
3233 * touched or pinned yet and so it worked out correctly to wait
3234 * for the pinning hook.
3235 *
3236 * Now a buggy version of i915-perf will fail to measure
3237 * anything for context0 once this initial render_copy() ends
3238 * up pinning the context since there won't ever be a pinning
3239 * hook callback.
3240 */
3241 render_copy(batch,
3242 context0,
3243 &src[0], 0, 0, width, height,
3244 &dst[0], 0, 0);
3245
3246 ret = drm_intel_gem_context_get_id(context0, &ctx_id);
3247 igt_assert_eq(ret, 0);
3248 igt_assert_neq(ctx_id, 0xffffffff);
3249 properties[1] = ctx_id;
3250
3251 scratch_buf_memset(src[0].bo, width, height, 0xff0000ff);
3252 scratch_buf_memset(dst[0].bo, width, height, 0x00ff00ff);
3253
3254 igt_debug("opening i915-perf stream\n");
3255 stream_fd = __perf_open(drm_fd, ¶m, false);
3256
3257 bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 4096, 64);
3258
3259 ret = drm_intel_bo_map(bo, true /* write enable */);
3260 igt_assert_eq(ret, 0);
3261
3262 memset(bo->virtual, 0x80, 4096);
3263 drm_intel_bo_unmap(bo);
3264
3265 emit_stall_timestamp_and_rpc(batch,
3266 bo,
3267 512 /* timestamp offset */,
3268 0, /* report dst offset */
3269 0xdeadbeef); /* report id */
3270
3271 /* Explicitly flush here (even though the render_copy() call
3272 * will itself flush before/after the copy) to clarify that
3273 * that the PIPE_CONTROL + MI_RPC commands will be in a
3274 * separate batch from the copy.
3275 */
3276 intel_batchbuffer_flush_with_context(batch, context0);
3277
3278 render_copy(batch,
3279 context0,
3280 &src[0], 0, 0, width, height,
3281 &dst[0], 0, 0);
3282
3283 /* Another redundant flush to clarify batch bo is free to reuse */
3284 intel_batchbuffer_flush_with_context(batch, context0);
3285
3286 /* submit two copies on the other context to avoid a false
3287 * positive in case the driver somehow ended up filtering for
3288 * context1
3289 */
3290 render_copy(batch,
3291 context1,
3292 &src[1], 0, 0, width, height,
3293 &dst[1], 0, 0);
3294
3295 ret = drm_intel_gem_context_get_id(context1, &ctx1_id);
3296 igt_assert_eq(ret, 0);
3297 igt_assert_neq(ctx1_id, 0xffffffff);
3298
3299 render_copy(batch,
3300 context1,
3301 &src[2], 0, 0, width, height,
3302 &dst[2], 0, 0);
3303
3304 /* And another */
3305 intel_batchbuffer_flush_with_context(batch, context1);
3306
3307 emit_stall_timestamp_and_rpc(batch,
3308 bo,
3309 520 /* timestamp offset */,
3310 256, /* report dst offset */
3311 0xbeefbeef); /* report id */
3312
3313 intel_batchbuffer_flush_with_context(batch, context1);
3314
3315 ret = drm_intel_bo_map(bo, false /* write enable */);
3316 igt_assert_eq(ret, 0);
3317
3318 report0_32 = bo->virtual;
3319 igt_assert_eq(report0_32[0], 0xdeadbeef); /* report ID */
3320 igt_assert_neq(report0_32[1], 0); /* timestamp */
3321 prev = report0_32;
3322 ctx_id = prev[2];
3323 igt_debug("MI_RPC(start) CTX ID: %u\n", ctx_id);
3324
3325 report1_32 = report0_32 + 64; /* 64 uint32_t = 256bytes offset */
3326 igt_assert_eq(report1_32[0], 0xbeefbeef); /* report ID */
3327 igt_assert_neq(report1_32[1], 0); /* timestamp */
3328 ctx1_id = report1_32[2];
3329
3330 memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
3331 accumulate_reports(&accumulator, report0_32, report1_32);
3332 igt_debug("total: A0 = %"PRIu64", A21 = %"PRIu64", A26 = %"PRIu64"\n",
3333 accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3334 accumulator.deltas[2 + 21],
3335 accumulator.deltas[2 + 26]);
3336
3337 igt_debug("oa_timestamp32 0 = %u\n", report0_32[1]);
3338 igt_debug("oa_timestamp32 1 = %u\n", report1_32[1]);
3339 igt_debug("ctx_id 0 = %u\n", report0_32[2]);
3340 igt_debug("ctx_id 1 = %u\n", report1_32[2]);
3341
3342 timestamp0_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 512);
3343 timestamp1_64 = *(uint64_t *)(((uint8_t *)bo->virtual) + 520);
3344
3345 igt_debug("ts_timestamp64 0 = %"PRIu64"\n", timestamp0_64);
3346 igt_debug("ts_timestamp64 1 = %"PRIu64"\n", timestamp1_64);
3347
3348 delta_ts64 = timestamp1_64 - timestamp0_64;
3349 delta_oa32 = report1_32[1] - report0_32[1];
3350
3351 /* sanity check that we can pass the delta to timebase_scale */
3352 igt_assert(delta_ts64 < UINT32_MAX);
3353 delta_oa32_ns = timebase_scale(delta_oa32);
3354 delta_ts64_ns = timebase_scale(delta_ts64);
3355
3356 igt_debug("oa32 delta = %u, = %uns\n",
3357 delta_oa32, (unsigned)delta_oa32_ns);
3358 igt_debug("ts64 delta = %u, = %uns\n",
3359 delta_ts64, (unsigned)delta_ts64_ns);
3360
3361 /* The delta as calculated via the PIPE_CONTROL timestamp or
3362 * the OA report timestamps should be almost identical but
3363 * allow a 500 nanoseconds margin.
3364 */
3365 delta_delta = delta_ts64_ns > delta_oa32_ns ?
3366 (delta_ts64_ns - delta_oa32_ns) :
3367 (delta_oa32_ns - delta_ts64_ns);
3368 if (delta_delta > 500) {
3369 igt_debug("skipping\n");
3370 exit(EAGAIN);
3371 }
3372
3373 len = i915_read_reports_until_timestamp(test_oa_format,
3374 buf, buf_size,
3375 report0_32[1],
3376 report1_32[1]);
3377
3378 igt_assert(len > 0);
3379 igt_debug("read %d bytes\n", (int)len);
3380
3381 memset(accumulator.deltas, 0, sizeof(accumulator.deltas));
3382
3383 for (size_t offset = 0; offset < len; offset += header->size) {
3384 uint32_t *report;
3385 uint32_t reason;
3386 const char *skip_reason = NULL, *report_reason = NULL;
3387 struct accumulator laccumulator = {
3388 .format = test_oa_format
3389 };
3390
3391
3392 header = (void *)(buf + offset);
3393
3394 igt_assert_eq(header->pad, 0); /* Reserved */
3395
3396 /* Currently the only test that should ever expect to
3397 * see a _BUFFER_LOST error is the buffer_fill test,
3398 * otherwise something bad has probably happened...
3399 */
3400 igt_assert_neq(header->type, DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
3401
3402 /* At high sampling frequencies the OA HW might not be
3403 * able to cope with all write requests and will notify
3404 * us that a report was lost.
3405 *
3406 * XXX: we should maybe restart the test in this case?
3407 */
3408 if (header->type == DRM_I915_PERF_RECORD_OA_REPORT_LOST) {
3409 igt_debug("OA trigger collision / report lost\n");
3410 exit(EAGAIN);
3411 }
3412
3413 /* Currently the only other record type expected is a
3414 * _SAMPLE. Notably this test will need updating if
3415 * i915-perf is extended in the future with additional
3416 * record types.
3417 */
3418 igt_assert_eq(header->type, DRM_I915_PERF_RECORD_SAMPLE);
3419
3420 igt_assert_eq(header->size, sample_size);
3421
3422 report = (void *)(header + 1);
3423
3424 /* Don't expect zero for timestamps */
3425 igt_assert_neq(report[1], 0);
3426
3427 igt_debug("report %p:\n", report);
3428
3429 /* Discard reports not contained in between the
3430 * timestamps we're looking at. */
3431 {
3432 uint32_t time_delta = report[1] - report0_32[1];
3433
3434 if (timebase_scale(time_delta) > 1000000000) {
3435 skip_reason = "prior first mi-rpc";
3436 }
3437 }
3438
3439 {
3440 uint32_t time_delta = report[1] - report1_32[1];
3441
3442 if (timebase_scale(time_delta) <= 1000000000) {
3443 igt_debug(" comes after last MI_RPC (%u)\n",
3444 report1_32[1]);
3445 report = report1_32;
3446 }
3447 }
3448
3449 /* Print out deltas for a few significant
3450 * counters for each report. */
3451 if (lprev) {
3452 memset(laccumulator.deltas, 0, sizeof(laccumulator.deltas));
3453 accumulate_reports(&laccumulator, lprev, report);
3454 igt_debug(" deltas: A0=%"PRIu64" A21=%"PRIu64", A26=%"PRIu64"\n",
3455 laccumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3456 laccumulator.deltas[2 + 21],
3457 laccumulator.deltas[2 + 26]);
3458 }
3459 lprev = report;
3460
3461 /* Print out reason for the report. */
3462 reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
3463 OAREPORT_REASON_MASK);
3464
3465 if (reason & OAREPORT_REASON_CTX_SWITCH) {
3466 report_reason = "ctx-load";
3467 } else if (reason & OAREPORT_REASON_TIMER) {
3468 report_reason = "timer";
3469 } else if (reason & OAREPORT_REASON_INTERNAL ||
3470 reason & OAREPORT_REASON_GO ||
3471 reason & OAREPORT_REASON_CLK_RATIO) {
3472 report_reason = "internal/go/clk-ratio";
3473 } else {
3474 report_reason = "end-mi-rpc";
3475 }
3476 igt_debug(" ctx_id=%u/%x reason=%s oa_timestamp32=%u\n",
3477 report[2], report[2], report_reason, report[1]);
3478
3479 /* Should we skip this report?
3480 *
3481 * Only if the current context id of
3482 * the stream is not the one we want
3483 * to measure.
3484 */
3485 if (current_ctx_id != ctx_id) {
3486 skip_reason = "not our context";
3487 }
3488
3489 if (n_invalid_ctx > 1) {
3490 skip_reason = "too many invalid context events";
3491 }
3492
3493 if (!skip_reason) {
3494 accumulate_reports(&accumulator, prev, report);
3495 igt_debug(" -> Accumulated deltas A0=%"PRIu64" A21=%"PRIu64", A26=%"PRIu64"\n",
3496 accumulator.deltas[2 + 0], /* skip timestamp + clock cycles */
3497 accumulator.deltas[2 + 21],
3498 accumulator.deltas[2 + 26]);
3499 } else {
3500 igt_debug(" -> Skipping: %s\n", skip_reason);
3501 }
3502
3503
3504 /* Finally update current-ctx_id, only possible
3505 * with a valid context id. */
3506 if (oa_report_ctx_is_valid(report)) {
3507 current_ctx_id = report[2];
3508 n_invalid_ctx = 0;
3509 } else {
3510 n_invalid_ctx++;
3511 }
3512
3513 prev = report;
3514
3515 if (report == report1_32) {
3516 igt_debug("Breaking on end of report\n");
3517 print_reports(report0_32, report1_32,
3518 lookup_format(test_oa_format));
3519 break;
3520 }
3521 }
3522
3523 igt_debug("n samples written = %"PRIu64"/%"PRIu64" (%ix%i)\n",
3524 accumulator.deltas[2 + 21],/* skip timestamp + clock cycles */
3525 accumulator.deltas[2 + 26],
3526 width, height);
3527 accumulator_print(&accumulator, "filtered");
3528
3529 ret = drm_intel_bo_map(src[0].bo, false /* write enable */);
3530 igt_assert_eq(ret, 0);
3531 ret = drm_intel_bo_map(dst[0].bo, false /* write enable */);
3532 igt_assert_eq(ret, 0);
3533
3534 ret = memcmp(src[0].bo->virtual, dst[0].bo->virtual, 4 * width * height);
3535 if (ret != 0) {
3536 accumulator_print(&accumulator, "total");
3537 /* This needs to be investigated... From time
3538 * to time, the work we kick off doesn't seem
3539 * to happen. WTH?? */
3540 exit(EAGAIN);
3541 }
3542
3543 drm_intel_bo_unmap(src[0].bo);
3544 drm_intel_bo_unmap(dst[0].bo);
3545
3546 igt_assert_eq(accumulator.deltas[2 + 26], width * height);
3547
3548 for (int i = 0; i < ARRAY_SIZE(src); i++) {
3549 drm_intel_bo_unreference(src[i].bo);
3550 drm_intel_bo_unreference(dst[i].bo);
3551 }
3552
3553 drm_intel_bo_unmap(bo);
3554 drm_intel_bo_unreference(bo);
3555 intel_batchbuffer_free(batch);
3556 drm_intel_gem_context_destroy(context0);
3557 drm_intel_gem_context_destroy(context1);
3558 drm_intel_bufmgr_destroy(bufmgr);
3559 __perf_close(stream_fd);
3560 }
3561
3562 child_ret = igt_wait_helper(&child);
3563
3564 igt_assert(WEXITSTATUS(child_ret) == EAGAIN ||
3565 WEXITSTATUS(child_ret) == 0);
3566
3567 } while (WEXITSTATUS(child_ret) == EAGAIN);
3568 }
3569
rc6_residency_ms(void)3570 static unsigned long rc6_residency_ms(void)
3571 {
3572 return sysfs_read("power/rc6_residency_ms");
3573 }
3574
3575 static void
test_rc6_disable(void)3576 test_rc6_disable(void)
3577 {
3578 uint64_t properties[] = {
3579 /* Include OA reports in samples */
3580 DRM_I915_PERF_PROP_SAMPLE_OA, true,
3581
3582 /* OA unit configuration */
3583 DRM_I915_PERF_PROP_OA_METRICS_SET, test_metric_set_id,
3584 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3585 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
3586 };
3587 struct drm_i915_perf_open_param param = {
3588 .flags = I915_PERF_FLAG_FD_CLOEXEC,
3589 .num_properties = sizeof(properties) / 16,
3590 .properties_ptr = to_user_pointer(properties),
3591 };
3592 unsigned long n_events_start, n_events_end;
3593 unsigned long rc6_enabled;
3594
3595 rc6_enabled = 0;
3596 igt_sysfs_scanf(sysfs, "power/rc6_enable", "%lu", &rc6_enabled);
3597 igt_require(rc6_enabled);
3598
3599 stream_fd = __perf_open(drm_fd, ¶m, false);
3600
3601 n_events_start = rc6_residency_ms();
3602 nanosleep(&(struct timespec){ .tv_sec = 0, .tv_nsec = 500000000 }, NULL);
3603 n_events_end = rc6_residency_ms();
3604 igt_assert_eq(n_events_end - n_events_start, 0);
3605
3606 __perf_close(stream_fd);
3607 gem_quiescent_gpu(drm_fd);
3608
3609 n_events_start = rc6_residency_ms();
3610 nanosleep(&(struct timespec){ .tv_sec = 1, .tv_nsec = 0 }, NULL);
3611 n_events_end = rc6_residency_ms();
3612 igt_assert_neq(n_events_end - n_events_start, 0);
3613 }
3614
__i915_perf_add_config(int fd,struct drm_i915_perf_oa_config * config)3615 static int __i915_perf_add_config(int fd, struct drm_i915_perf_oa_config *config)
3616 {
3617 int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, config);
3618 if (ret < 0)
3619 ret = -errno;
3620 return ret;
3621 }
3622
i915_perf_add_config(int fd,struct drm_i915_perf_oa_config * config)3623 static int i915_perf_add_config(int fd, struct drm_i915_perf_oa_config *config)
3624 {
3625 int config_id = __i915_perf_add_config(fd, config);
3626
3627 igt_debug("config_id=%i\n", config_id);
3628 igt_assert(config_id > 0);
3629
3630 return config_id;
3631 }
3632
i915_perf_remove_config(int fd,uint64_t config_id)3633 static void i915_perf_remove_config(int fd, uint64_t config_id)
3634 {
3635 igt_assert_eq(igt_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG,
3636 &config_id), 0);
3637 }
3638
has_i915_perf_userspace_config(int fd)3639 static bool has_i915_perf_userspace_config(int fd)
3640 {
3641 uint64_t config = 0;
3642 int ret = igt_ioctl(fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config);
3643 igt_assert_eq(ret, -1);
3644
3645 igt_debug("errno=%i\n", errno);
3646
3647 return errno != EINVAL;
3648 }
3649
3650 static void
test_invalid_create_userspace_config(void)3651 test_invalid_create_userspace_config(void)
3652 {
3653 struct drm_i915_perf_oa_config config;
3654 const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3655 const char *invalid_uuid = "blablabla-wrong";
3656 uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3657 uint32_t invalid_mux_regs[] = { 0x12345678 /* invalid register */, 0x0 };
3658
3659 igt_require(has_i915_perf_userspace_config(drm_fd));
3660
3661 memset(&config, 0, sizeof(config));
3662
3663 /* invalid uuid */
3664 strncpy(config.uuid, invalid_uuid, sizeof(config.uuid));
3665 config.n_mux_regs = 1;
3666 config.mux_regs_ptr = to_user_pointer(mux_regs);
3667 config.n_boolean_regs = 0;
3668 config.n_flex_regs = 0;
3669
3670 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3671
3672 /* invalid mux_regs */
3673 memcpy(config.uuid, uuid, sizeof(config.uuid));
3674 config.n_mux_regs = 1;
3675 config.mux_regs_ptr = to_user_pointer(invalid_mux_regs);
3676 config.n_boolean_regs = 0;
3677 config.n_flex_regs = 0;
3678
3679 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3680
3681 /* empty config */
3682 memcpy(config.uuid, uuid, sizeof(config.uuid));
3683 config.n_mux_regs = 0;
3684 config.mux_regs_ptr = to_user_pointer(mux_regs);
3685 config.n_boolean_regs = 0;
3686 config.n_flex_regs = 0;
3687
3688 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3689
3690 /* empty config with null pointers */
3691 memcpy(config.uuid, uuid, sizeof(config.uuid));
3692 config.n_mux_regs = 1;
3693 config.mux_regs_ptr = to_user_pointer(NULL);
3694 config.n_boolean_regs = 2;
3695 config.boolean_regs_ptr = to_user_pointer(NULL);
3696 config.n_flex_regs = 3;
3697 config.flex_regs_ptr = to_user_pointer(NULL);
3698
3699 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EINVAL);
3700
3701 /* invalid pointers */
3702 memcpy(config.uuid, uuid, sizeof(config.uuid));
3703 config.n_mux_regs = 42;
3704 config.mux_regs_ptr = to_user_pointer((void *) 0xDEADBEEF);
3705 config.n_boolean_regs = 0;
3706 config.n_flex_regs = 0;
3707
3708 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EFAULT);
3709 }
3710
3711 static void
test_invalid_remove_userspace_config(void)3712 test_invalid_remove_userspace_config(void)
3713 {
3714 struct drm_i915_perf_oa_config config;
3715 const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3716 uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3717 uint64_t config_id, wrong_config_id = 999999999;
3718 char path[512];
3719
3720 igt_require(has_i915_perf_userspace_config(drm_fd));
3721
3722 snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3723
3724 /* Destroy previous configuration if present */
3725 if (try_sysfs_read_u64(path, &config_id))
3726 i915_perf_remove_config(drm_fd, config_id);
3727
3728 memset(&config, 0, sizeof(config));
3729
3730 memcpy(config.uuid, uuid, sizeof(config.uuid));
3731
3732 config.n_mux_regs = 1;
3733 config.mux_regs_ptr = to_user_pointer(mux_regs);
3734 config.n_boolean_regs = 0;
3735 config.n_flex_regs = 0;
3736
3737 config_id = i915_perf_add_config(drm_fd, &config);
3738
3739 /* Removing configs without permissions should fail. */
3740 igt_fork(child, 1) {
3741 igt_drop_root();
3742
3743 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config_id, EACCES);
3744 }
3745 igt_waitchildren();
3746
3747 /* Removing invalid config ID should fail. */
3748 do_ioctl_err(drm_fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &wrong_config_id, ENOENT);
3749
3750 i915_perf_remove_config(drm_fd, config_id);
3751 }
3752
3753 static void
test_create_destroy_userspace_config(void)3754 test_create_destroy_userspace_config(void)
3755 {
3756 struct drm_i915_perf_oa_config config;
3757 const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3758 uint32_t mux_regs[] = { 0x9888 /* NOA_WRITE */, 0x0 };
3759 uint32_t flex_regs[100];
3760 int i;
3761 uint64_t config_id;
3762 uint64_t properties[] = {
3763 DRM_I915_PERF_PROP_OA_METRICS_SET, 0, /* Filled later */
3764
3765 /* OA unit configuration */
3766 DRM_I915_PERF_PROP_SAMPLE_OA, true,
3767 DRM_I915_PERF_PROP_OA_FORMAT, test_oa_format,
3768 DRM_I915_PERF_PROP_OA_EXPONENT, oa_exp_1_millisec,
3769 DRM_I915_PERF_PROP_OA_METRICS_SET
3770 };
3771 struct drm_i915_perf_open_param param = {
3772 .flags = I915_PERF_FLAG_FD_CLOEXEC |
3773 I915_PERF_FLAG_FD_NONBLOCK |
3774 I915_PERF_FLAG_DISABLED,
3775 .num_properties = ARRAY_SIZE(properties) / 2,
3776 .properties_ptr = to_user_pointer(properties),
3777 };
3778 char path[512];
3779
3780 igt_require(has_i915_perf_userspace_config(drm_fd));
3781
3782 snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3783
3784 /* Destroy previous configuration if present */
3785 if (try_sysfs_read_u64(path, &config_id))
3786 i915_perf_remove_config(drm_fd, config_id);
3787
3788 memset(&config, 0, sizeof(config));
3789 memcpy(config.uuid, uuid, sizeof(config.uuid));
3790
3791 config.n_mux_regs = 1;
3792 config.mux_regs_ptr = to_user_pointer(mux_regs);
3793
3794 /* Flex EU counters are only available on gen8+ */
3795 if (intel_gen(devid) >= 8) {
3796 for (i = 0; i < ARRAY_SIZE(flex_regs) / 2; i++) {
3797 flex_regs[i * 2] = 0xe458; /* EU_PERF_CNTL0 */
3798 flex_regs[i * 2 + 1] = 0x0;
3799 }
3800 config.flex_regs_ptr = to_user_pointer(flex_regs);
3801 config.n_flex_regs = ARRAY_SIZE(flex_regs) / 2;
3802 }
3803
3804 config.n_boolean_regs = 0;
3805
3806 /* Creating configs without permissions shouldn't work. */
3807 igt_fork(child, 1) {
3808 igt_drop_root();
3809
3810 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EACCES);
3811 }
3812 igt_waitchildren();
3813
3814 /* Create a new config */
3815 config_id = i915_perf_add_config(drm_fd, &config);
3816
3817 /* Verify that adding the another config with the same uuid fails. */
3818 igt_assert_eq(__i915_perf_add_config(drm_fd, &config), -EADDRINUSE);
3819
3820 /* Try to use the new config */
3821 properties[1] = config_id;
3822 stream_fd = __perf_open(drm_fd, ¶m, false);
3823
3824 /* Verify that destroying the config doesn't yield any error. */
3825 i915_perf_remove_config(drm_fd, config_id);
3826
3827 /* Read the config to verify shouldn't raise any issue. */
3828 config_id = i915_perf_add_config(drm_fd, &config);
3829
3830 __perf_close(stream_fd);
3831
3832 i915_perf_remove_config(drm_fd, config_id);
3833 }
3834
3835 /* Registers required by userspace. This list should be maintained by
3836 * the OA configs developers and agreed upon with kernel developers as
3837 * some of the registers have bits used by the kernel (for workarounds
3838 * for instance) and other bits that need to be set by the OA configs.
3839 */
3840 static void
test_whitelisted_registers_userspace_config(void)3841 test_whitelisted_registers_userspace_config(void)
3842 {
3843 struct drm_i915_perf_oa_config config;
3844 const char *uuid = "01234567-0123-0123-0123-0123456789ab";
3845 uint32_t mux_regs[200];
3846 uint32_t b_counters_regs[200];
3847 uint32_t flex_regs[200];
3848 uint32_t i;
3849 uint64_t config_id;
3850 char path[512];
3851 int ret;
3852 const uint32_t flex[] = {
3853 0xe458,
3854 0xe558,
3855 0xe658,
3856 0xe758,
3857 0xe45c,
3858 0xe55c,
3859 0xe65c
3860 };
3861
3862 igt_require(has_i915_perf_userspace_config(drm_fd));
3863
3864 snprintf(path, sizeof(path), "metrics/%s/id", uuid);
3865
3866 if (try_sysfs_read_u64(path, &config_id))
3867 i915_perf_remove_config(drm_fd, config_id);
3868
3869 memset(&config, 0, sizeof(config));
3870 memcpy(config.uuid, uuid, sizeof(config.uuid));
3871
3872 /* OASTARTTRIG[1-8] */
3873 for (i = 0x2710; i <= 0x272c; i += 4) {
3874 b_counters_regs[config.n_boolean_regs * 2] = i;
3875 b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
3876 config.n_boolean_regs++;
3877 }
3878 /* OAREPORTTRIG[1-8] */
3879 for (i = 0x2740; i <= 0x275c; i += 4) {
3880 b_counters_regs[config.n_boolean_regs * 2] = i;
3881 b_counters_regs[config.n_boolean_regs * 2 + 1] = 0;
3882 config.n_boolean_regs++;
3883 }
3884 config.boolean_regs_ptr = (uintptr_t) b_counters_regs;
3885
3886 if (intel_gen(devid) >= 8) {
3887 /* Flex EU registers, only from Gen8+. */
3888 for (i = 0; i < ARRAY_SIZE(flex); i++) {
3889 flex_regs[config.n_flex_regs * 2] = flex[i];
3890 flex_regs[config.n_flex_regs * 2 + 1] = 0;
3891 config.n_flex_regs++;
3892 }
3893 config.flex_regs_ptr = (uintptr_t) flex_regs;
3894 }
3895
3896 /* Mux registers (too many of them, just checking bounds) */
3897 i = 0;
3898
3899 /* NOA_WRITE */
3900 mux_regs[i++] = 0x9800;
3901 mux_regs[i++] = 0;
3902
3903 if (IS_HASWELL(devid)) {
3904 /* Haswell specific. undocumented... */
3905 mux_regs[i++] = 0x9ec0;
3906 mux_regs[i++] = 0;
3907
3908 mux_regs[i++] = 0x25100;
3909 mux_regs[i++] = 0;
3910 mux_regs[i++] = 0x2ff90;
3911 mux_regs[i++] = 0;
3912 }
3913
3914 if (intel_gen(devid) >= 8 && !IS_CHERRYVIEW(devid)) {
3915 /* NOA_CONFIG */
3916 mux_regs[i++] = 0xD04;
3917 mux_regs[i++] = 0;
3918 mux_regs[i++] = 0xD2C;
3919 mux_regs[i++] = 0;
3920 /* WAIT_FOR_RC6_EXIT */
3921 mux_regs[i++] = 0x20CC;
3922 mux_regs[i++] = 0;
3923 }
3924
3925 /* HALF_SLICE_CHICKEN2 (shared with kernel workaround) */
3926 mux_regs[i++] = 0xE180;
3927 mux_regs[i++] = 0;
3928
3929 if (IS_CHERRYVIEW(devid)) {
3930 /* Cherryview specific. undocumented... */
3931 mux_regs[i++] = 0x182300;
3932 mux_regs[i++] = 0;
3933 mux_regs[i++] = 0x1823A4;
3934 mux_regs[i++] = 0;
3935 }
3936
3937 /* PERFCNT[12] */
3938 mux_regs[i++] = 0x91B8;
3939 mux_regs[i++] = 0;
3940 /* PERFMATRIX */
3941 mux_regs[i++] = 0x91C8;
3942 mux_regs[i++] = 0;
3943
3944 config.mux_regs_ptr = (uintptr_t) mux_regs;
3945 config.n_mux_regs = i / 2;
3946
3947 /* Create a new config */
3948 ret = igt_ioctl(drm_fd, DRM_IOCTL_I915_PERF_ADD_CONFIG, &config);
3949 igt_assert(ret > 0); /* Config 0 should be used by the kernel */
3950 config_id = ret;
3951
3952 i915_perf_remove_config(drm_fd, config_id);
3953 }
3954
3955 static unsigned
read_i915_module_ref(void)3956 read_i915_module_ref(void)
3957 {
3958 FILE *fp = fopen("/proc/modules", "r");
3959 char *line = NULL;
3960 size_t line_buf_size = 0;
3961 int len = 0;
3962 unsigned ref_count;
3963
3964 igt_assert(fp);
3965
3966 while ((len = getline(&line, &line_buf_size, fp)) > 0) {
3967 if (strncmp(line, "i915 ", 5) == 0) {
3968 unsigned long mem;
3969 int ret = sscanf(line + 5, "%lu %u", &mem, &ref_count);
3970 igt_assert(ret == 2);
3971 goto done;
3972 }
3973 }
3974
3975 igt_assert(!"reached");
3976
3977 done:
3978 free(line);
3979 fclose(fp);
3980 return ref_count;
3981 }
3982
3983 /* check that an open i915 perf stream holds a reference on the drm i915 module
3984 * including in the corner case where the original drm fd has been closed.
3985 */
3986 static void
test_i915_ref_count(void)3987 test_i915_ref_count(void)
3988 {
3989 uint64_t properties[] = {
3990 /* Include OA reports in samples */
3991 DRM_I915_PERF_PROP_SAMPLE_OA, true,
3992
3993 /* OA unit configuration */
3994 DRM_I915_PERF_PROP_OA_METRICS_SET, 0 /* updated below */,
3995 DRM_I915_PERF_PROP_OA_FORMAT, 0, /* update below */
3996 DRM_I915_PERF_PROP_OA_EXPONENT, 0, /* update below */
3997 };
3998 struct drm_i915_perf_open_param param = {
3999 .flags = I915_PERF_FLAG_FD_CLOEXEC,
4000 .num_properties = sizeof(properties) / 16,
4001 .properties_ptr = to_user_pointer(properties),
4002 };
4003 unsigned baseline, ref_count0, ref_count1;
4004 uint32_t oa_report0[64];
4005 uint32_t oa_report1[64];
4006
4007 /* This should be the first test before the first fixture so no drm_fd
4008 * should have been opened so far...
4009 */
4010 igt_assert_eq(drm_fd, -1);
4011
4012 baseline = read_i915_module_ref();
4013 igt_debug("baseline ref count (drm fd closed) = %u\n", baseline);
4014
4015 drm_fd = __drm_open_driver(DRIVER_INTEL);
4016 devid = intel_get_drm_devid(drm_fd);
4017 sysfs = igt_sysfs_open(drm_fd);
4018
4019 /* Note: these global variables are only initialized after calling
4020 * init_sys_info()...
4021 */
4022 igt_require(init_sys_info());
4023 properties[3] = test_metric_set_id;
4024 properties[5] = test_oa_format;
4025 properties[7] = oa_exp_1_millisec;
4026
4027 ref_count0 = read_i915_module_ref();
4028 igt_debug("initial ref count with drm_fd open = %u\n", ref_count0);
4029 igt_assert(ref_count0 > baseline);
4030
4031 stream_fd = __perf_open(drm_fd, ¶m, false);
4032 ref_count1 = read_i915_module_ref();
4033 igt_debug("ref count after opening i915 perf stream = %u\n", ref_count1);
4034 igt_assert(ref_count1 > ref_count0);
4035
4036 close(drm_fd);
4037 close(sysfs);
4038 drm_fd = -1;
4039 sysfs = -1;
4040 ref_count0 = read_i915_module_ref();
4041 igt_debug("ref count after closing drm fd = %u\n", ref_count0);
4042
4043 igt_assert(ref_count0 > baseline);
4044
4045 read_2_oa_reports(test_oa_format,
4046 oa_exp_1_millisec,
4047 oa_report0,
4048 oa_report1,
4049 false); /* not just timer reports */
4050
4051 __perf_close(stream_fd);
4052 ref_count0 = read_i915_module_ref();
4053 igt_debug("ref count after closing i915 perf stream fd = %u\n", ref_count0);
4054 igt_assert_eq(ref_count0, baseline);
4055 }
4056
4057 static void
test_sysctl_defaults(void)4058 test_sysctl_defaults(void)
4059 {
4060 int paranoid = read_u64_file("/proc/sys/dev/i915/perf_stream_paranoid");
4061 int max_freq = read_u64_file("/proc/sys/dev/i915/oa_max_sample_rate");
4062
4063 igt_assert_eq(paranoid, 1);
4064 igt_assert_eq(max_freq, 100000);
4065 }
4066
4067 igt_main
4068 {
4069 igt_skip_on_simulation();
4070
4071 igt_fixture {
4072 struct stat sb;
4073
4074 igt_require(stat("/proc/sys/dev/i915/perf_stream_paranoid", &sb)
4075 == 0);
4076 igt_require(stat("/proc/sys/dev/i915/oa_max_sample_rate", &sb)
4077 == 0);
4078 }
4079
4080 igt_subtest("i915-ref-count")
4081 test_i915_ref_count();
4082
4083 igt_subtest("sysctl-defaults")
4084 test_sysctl_defaults();
4085
4086 igt_fixture {
4087 /* We expect that the ref count test before these fixtures
4088 * should have closed drm_fd...
4089 */
4090 igt_assert_eq(drm_fd, -1);
4091
4092 drm_fd = drm_open_driver(DRIVER_INTEL);
4093 igt_require_gem(drm_fd);
4094
4095 devid = intel_get_drm_devid(drm_fd);
4096 sysfs = igt_sysfs_open(drm_fd);
4097
4098 igt_require(init_sys_info());
4099
4100 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
4101 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
4102
4103 gt_max_freq_mhz = sysfs_read("gt_boost_freq_mhz");
4104
4105 render_copy = igt_get_render_copyfunc(devid);
4106 igt_require_f(render_copy, "no render-copy function\n");
4107 }
4108
4109 igt_subtest("non-system-wide-paranoid")
4110 test_system_wide_paranoid();
4111
4112 igt_subtest("invalid-open-flags")
4113 test_invalid_open_flags();
4114
4115 igt_subtest("invalid-oa-metric-set-id")
4116 test_invalid_oa_metric_set_id();
4117
4118 igt_subtest("invalid-oa-format-id")
4119 test_invalid_oa_format_id();
4120
4121 igt_subtest("missing-sample-flags")
4122 test_missing_sample_flags();
4123
4124 igt_subtest("oa-formats")
4125 test_oa_formats();
4126
4127 igt_subtest("invalid-oa-exponent")
4128 test_invalid_oa_exponent();
4129 igt_subtest("low-oa-exponent-permissions")
4130 test_low_oa_exponent_permissions();
4131 igt_subtest("oa-exponents")
4132 test_oa_exponents();
4133
4134 igt_subtest("per-context-mode-unprivileged") {
4135 igt_require(IS_HASWELL(devid));
4136 test_per_context_mode_unprivileged();
4137 }
4138
4139 igt_subtest("buffer-fill")
4140 test_buffer_fill();
4141
4142 igt_subtest("disabled-read-error")
4143 test_disabled_read_error();
4144 igt_subtest("non-sampling-read-error")
4145 test_non_sampling_read_error();
4146
4147 igt_subtest("enable-disable")
4148 test_enable_disable();
4149
4150 igt_subtest("blocking")
4151 test_blocking();
4152
4153 igt_subtest("polling")
4154 test_polling();
4155
4156 igt_subtest("short-reads")
4157 test_short_reads();
4158
4159 igt_subtest("mi-rpc")
4160 test_mi_rpc();
4161
4162 igt_subtest("unprivileged-single-ctx-counters") {
4163 igt_require(IS_HASWELL(devid));
4164 hsw_test_single_ctx_counters();
4165 }
4166
4167 igt_subtest("gen8-unprivileged-single-ctx-counters") {
4168 /* For Gen8+ the OA unit can no longer be made to clock gate
4169 * for a specific context. Additionally the partial-replacement
4170 * functionality to HW filter timer reports for a specific
4171 * context (SKL+) can't stop multiple applications viewing
4172 * system-wide data via MI_REPORT_PERF_COUNT commands.
4173 */
4174 igt_require(intel_gen(devid) >= 8);
4175 gen8_test_single_ctx_render_target_writes_a_counter();
4176 }
4177
4178 igt_subtest("rc6-disable")
4179 test_rc6_disable();
4180
4181 igt_subtest("invalid-create-userspace-config")
4182 test_invalid_create_userspace_config();
4183
4184 igt_subtest("invalid-remove-userspace-config")
4185 test_invalid_remove_userspace_config();
4186
4187 igt_subtest("create-destroy-userspace-config")
4188 test_create_destroy_userspace_config();
4189
4190 igt_subtest("whitelisted-registers-userspace-config")
4191 test_whitelisted_registers_userspace_config();
4192
4193 igt_fixture {
4194 /* leave sysctl options in their default state... */
4195 write_u64_file("/proc/sys/dev/i915/oa_max_sample_rate", 100000);
4196 write_u64_file("/proc/sys/dev/i915/perf_stream_paranoid", 1);
4197
4198 close(drm_fd);
4199 }
4200 }
4201