xref: /aosp_15_r20/external/igt-gpu-tools/tests/i915/gem_stress.c (revision d83cc019efdc2edc6c4b16e9034a3ceb8d35d77c)
1 /*
2  * Copyright © 2011 Daniel Vetter
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Daniel Vetter <[email protected]>
25  *
26  * Partially based upon gem_tiled_fence_blits.c
27  */
28 
29 /** @file gem_stress.c
30  *
31  * This is a general gem coherency test. It's designed to eventually replicate
32  * any possible sequence of access patterns. It works by copying a set of tiles
33  * between two sets of backing buffer objects, randomly permutating the assinged
34  * position on each copy operations.
35  *
36  * The copy operation are done in tiny portions (to reduce any race windows
37  * for corruptions, hence increasing the chances for observing one) and are
38  * constantly switched between all means to copy stuff (fenced blitter, unfenced
39  * render, mmap, pwrite/read).
40  *
41  * After every complete move of a set tiling parameters of a buffer are randomly
42  * changed to simulate the effects of libdrm caching.
43  *
44  * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further
45  * split up to test relaxed fencing. Using this to push the average working set
46  * size over the available gtt space forces objects to be mapped as unfenceable
47  * (and as a side-effect tests gtt map/unmap coherency).
48  *
49  * In short: designed for maximum evilness.
50  */
51 
52 #include "igt.h"
53 #include <stdlib.h>
54 #include <sys/ioctl.h>
55 #include <stdio.h>
56 #include <string.h>
57 #include <fcntl.h>
58 #include <inttypes.h>
59 #include <errno.h>
60 #include <sys/stat.h>
61 #include <sys/time.h>
62 
63 #include <drm.h>
64 
65 #include "intel_bufmgr.h"
66 
67 IGT_TEST_DESCRIPTION("General gem coherency test.");
68 
69 #define CMD_POLY_STIPPLE_OFFSET       0x7906
70 
71 #define DUCTAPE 0xdead0001
72 #define TILESZ	0xdead0002
73 #define CHCK_RENDER 0xdead0003
74 
75 /** TODO:
76  * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel)
77  * - render copy (to check fence tracking and cache coherency management by the
78  *   kernel)
79  * - multi-threading: probably just a wrapper script to launch multiple
80  *   instances + an option to accordingly reduce the working set
81  * - gen6 inter-ring coherency (needs render copy, first)
82  * - variable buffer size
83  * - add an option to fork a second process that randomly sends signals to the
84  *   first one (to check consistency of the kernel recovery paths)
85  */
86 
87 drm_intel_bufmgr *bufmgr;
88 struct intel_batchbuffer *batch;
89 int drm_fd;
90 int devid;
91 int num_fences;
92 
93 drm_intel_bo *busy_bo;
94 
95 struct option_struct {
96     unsigned scratch_buf_size;
97     unsigned max_dimension;
98     unsigned num_buffers;
99     int trace_tile;
100     int no_hw;
101     int gpu_busy_load;
102     int use_render;
103     int use_blt;
104     int forced_tiling;
105     int use_cpu_maps;
106     int total_rounds;
107     int fail;
108     int tiles_per_buf;
109     int ducttape;
110     int tile_size;
111     int check_render_cpyfn;
112     int use_signal_helper;
113 };
114 
115 #define MAX_BUFS		4096
116 #define SCRATCH_BUF_SIZE	1024*1024
117 #define BUSY_BUF_SIZE		(256*4096)
118 #define TILE_BYTES(size)	((size)*(size)*sizeof(uint32_t))
119 
120 struct option_struct options = {
121 	.scratch_buf_size = BUSY_BUF_SIZE,
122 	.no_hw = 0,
123 	.use_signal_helper = 1,
124 	.gpu_busy_load = 0,
125 	.num_buffers = 0,
126 	.trace_tile = -1,
127 	.use_render = 1,
128 	.use_blt = 1,
129 	.forced_tiling = -1,
130 	.use_cpu_maps = 0,
131 	.total_rounds = 512,
132 	.fail = 1,
133 	.ducttape = 1,
134 	.tile_size = 16,
135 	.tiles_per_buf = BUSY_BUF_SIZE / TILE_BYTES(16),
136 	.check_render_cpyfn = 0,
137 };
138 
139 static struct igt_buf buffers[2][MAX_BUFS];
140 /* tile i is at logical position tile_permutation[i] */
141 static unsigned *tile_permutation;
142 static unsigned num_buffers = 0;
143 static unsigned current_set = 0;
144 static unsigned target_set = 0;
145 static unsigned num_total_tiles = 0;
146 
147 int fence_storm = 0;
148 static int gpu_busy_load = 10;
149 
150 struct {
151 	unsigned num_failed;
152 	unsigned max_failed_reads;
153 } stats;
154 
tile2xy(struct igt_buf * buf,unsigned tile,unsigned * x,unsigned * y)155 static void tile2xy(struct igt_buf *buf, unsigned tile, unsigned *x, unsigned *y)
156 {
157 	igt_assert(tile < buf->num_tiles);
158 	*x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t));
159 	*y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size;
160 }
161 
emit_blt(drm_intel_bo * src_bo,uint32_t src_tiling,unsigned src_pitch,unsigned src_x,unsigned src_y,unsigned w,unsigned h,drm_intel_bo * dst_bo,uint32_t dst_tiling,unsigned dst_pitch,unsigned dst_x,unsigned dst_y)162 static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch,
163 		     unsigned src_x, unsigned src_y, unsigned w, unsigned h,
164 		     drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch,
165 		     unsigned dst_x, unsigned dst_y)
166 {
167 	uint32_t cmd_bits = 0;
168 
169 	if (IS_965(devid) && src_tiling) {
170 		src_pitch /= 4;
171 		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
172 	}
173 
174 	if (IS_965(devid) && dst_tiling) {
175 		dst_pitch /= 4;
176 		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
177 	}
178 
179 	/* copy lower half to upper half */
180 	BLIT_COPY_BATCH_START(cmd_bits);
181 	OUT_BATCH((3 << 24) | /* 32 bits */
182 		  (0xcc << 16) | /* copy ROP */
183 		  dst_pitch);
184 	OUT_BATCH(dst_y << 16 | dst_x);
185 	OUT_BATCH((dst_y+h) << 16 | (dst_x+w));
186 	OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
187 	OUT_BATCH(src_y << 16 | src_x);
188 	OUT_BATCH(src_pitch);
189 	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
190 	ADVANCE_BATCH();
191 
192 	if (batch->gen >= 6) {
193 		BEGIN_BATCH(3, 0);
194 		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
195 		OUT_BATCH(0);
196 		OUT_BATCH(0);
197 		ADVANCE_BATCH();
198 	}
199 }
200 
201 /* All this gem trashing wastes too much cpu time, so give the gpu something to
202  * do to increase changes for races. */
keep_gpu_busy(void)203 static void keep_gpu_busy(void)
204 {
205 	int tmp;
206 
207 	tmp = 1 << gpu_busy_load;
208 	igt_assert_lte(tmp, 1024);
209 
210 	emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128,
211 		 busy_bo, 0, 4096, 0, 128);
212 }
213 
set_to_cpu_domain(struct igt_buf * buf,int writing)214 static void set_to_cpu_domain(struct igt_buf *buf, int writing)
215 {
216 	gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU,
217 		       writing ? I915_GEM_DOMAIN_CPU : 0);
218 }
219 
220 static unsigned int copyfunc_seq = 0;
221 static void (*copyfunc)(struct igt_buf *src, unsigned src_x, unsigned src_y,
222 			struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
223 			unsigned logical_tile_no);
224 
225 /* stride, x, y in units of uint32_t! */
cpucpy2d(uint32_t * src,unsigned src_stride,unsigned src_x,unsigned src_y,uint32_t * dst,unsigned dst_stride,unsigned dst_x,unsigned dst_y,unsigned logical_tile_no)226 static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y,
227 		     uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y,
228 		     unsigned logical_tile_no)
229 {
230 	int i, j;
231 	int failed = 0;
232 
233 	for (i = 0; i < options.tile_size; i++) {
234 		for (j = 0; j < options.tile_size; j++) {
235 			unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i);
236 			unsigned src_ofs = src_x + j + src_stride * (src_y + i);
237 			unsigned expect = logical_tile_no*options.tile_size*options.tile_size
238 			    + i*options.tile_size + j;
239 			uint32_t tmp = src[src_ofs];
240 			if (tmp != expect) {
241 			    igt_info("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", logical_tile_no, i * options.tile_size + j, tmp, expect, (int)tmp - expect);
242 			    igt_fail_on(options.trace_tile >= 0 && options.fail);
243 			    failed++;
244 			}
245 			/* when not aborting, correct any errors */
246 			dst[dst_ofs] = expect;
247 		}
248 	}
249 	igt_fail_on(failed && options.fail);
250 
251 	if (failed > stats.max_failed_reads)
252 		stats.max_failed_reads = failed;
253 	if (failed)
254 		stats.num_failed++;
255 }
256 
cpu_copyfunc(struct igt_buf * src,unsigned src_x,unsigned src_y,struct igt_buf * dst,unsigned dst_x,unsigned dst_y,unsigned logical_tile_no)257 static void cpu_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
258 			 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
259 			 unsigned logical_tile_no)
260 {
261 	igt_assert(batch->ptr == batch->buffer);
262 
263 	if (options.ducttape)
264 		drm_intel_bo_wait_rendering(dst->bo);
265 
266 	if (options.use_cpu_maps) {
267 		set_to_cpu_domain(src, 0);
268 		set_to_cpu_domain(dst, 1);
269 	}
270 
271 	cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
272 		 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
273 		 logical_tile_no);
274 }
275 
prw_copyfunc(struct igt_buf * src,unsigned src_x,unsigned src_y,struct igt_buf * dst,unsigned dst_x,unsigned dst_y,unsigned logical_tile_no)276 static void prw_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
277 			 struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
278 			 unsigned logical_tile_no)
279 {
280 	uint32_t tmp_tile[options.tile_size*options.tile_size];
281 	int i;
282 
283 	igt_assert(batch->ptr == batch->buffer);
284 
285 	if (options.ducttape)
286 		drm_intel_bo_wait_rendering(dst->bo);
287 
288 	if (src->tiling == I915_TILING_NONE) {
289 		for (i = 0; i < options.tile_size; i++) {
290 			unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i);
291 			drm_intel_bo_get_subdata(src->bo, ofs,
292 						 options.tile_size*sizeof(uint32_t),
293 						 tmp_tile + options.tile_size*i);
294 		}
295 	} else {
296 		if (options.use_cpu_maps)
297 			set_to_cpu_domain(src, 0);
298 
299 		cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y,
300 			 tmp_tile, options.tile_size, 0, 0, logical_tile_no);
301 	}
302 
303 	if (dst->tiling == I915_TILING_NONE) {
304 		for (i = 0; i < options.tile_size; i++) {
305 			unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i);
306 			drm_intel_bo_subdata(dst->bo, ofs,
307 					     options.tile_size*sizeof(uint32_t),
308 					     tmp_tile + options.tile_size*i);
309 		}
310 	} else {
311 		if (options.use_cpu_maps)
312 			set_to_cpu_domain(dst, 1);
313 
314 		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
315 			 dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y,
316 			 logical_tile_no);
317 	}
318 }
319 
blitter_copyfunc(struct igt_buf * src,unsigned src_x,unsigned src_y,struct igt_buf * dst,unsigned dst_x,unsigned dst_y,unsigned logical_tile_no)320 static void blitter_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
321 			     struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
322 			     unsigned logical_tile_no)
323 {
324 	static unsigned keep_gpu_busy_counter = 0;
325 
326 	/* check both edges of the fence usage */
327 	if (keep_gpu_busy_counter & 1 && !fence_storm)
328 		keep_gpu_busy();
329 
330 	emit_blt(src->bo, src->tiling, src->stride, src_x, src_y,
331 		 options.tile_size, options.tile_size,
332 		 dst->bo, dst->tiling, dst->stride, dst_x, dst_y);
333 
334 	if (!(keep_gpu_busy_counter & 1) && !fence_storm)
335 		keep_gpu_busy();
336 
337 	keep_gpu_busy_counter++;
338 
339 	if (src->tiling)
340 		fence_storm--;
341 	if (dst->tiling)
342 		fence_storm--;
343 
344 	if (fence_storm <= 1) {
345 		fence_storm = 0;
346 		intel_batchbuffer_flush(batch);
347 	}
348 }
349 
render_copyfunc(struct igt_buf * src,unsigned src_x,unsigned src_y,struct igt_buf * dst,unsigned dst_x,unsigned dst_y,unsigned logical_tile_no)350 static void render_copyfunc(struct igt_buf *src, unsigned src_x, unsigned src_y,
351 			    struct igt_buf *dst, unsigned dst_x, unsigned dst_y,
352 			    unsigned logical_tile_no)
353 {
354 	static unsigned keep_gpu_busy_counter = 0;
355 	igt_render_copyfunc_t rendercopy = igt_get_render_copyfunc(devid);
356 
357 	/* check both edges of the fence usage */
358 	if (keep_gpu_busy_counter & 1)
359 		keep_gpu_busy();
360 
361 	if (rendercopy) {
362 		/*
363 		 * Flush outstanding blts so that they don't end up on
364 		 * the render ring when that's not allowed (gen6+).
365 		 */
366 		intel_batchbuffer_flush(batch);
367 		rendercopy(batch, NULL, src, src_x, src_y,
368 		     options.tile_size, options.tile_size,
369 		     dst, dst_x, dst_y);
370 	} else
371 		blitter_copyfunc(src, src_x, src_y,
372 				 dst, dst_x, dst_y,
373 				 logical_tile_no);
374 	if (!(keep_gpu_busy_counter & 1))
375 		keep_gpu_busy();
376 
377 	keep_gpu_busy_counter++;
378 	intel_batchbuffer_flush(batch);
379 }
380 
next_copyfunc(int tile)381 static void next_copyfunc(int tile)
382 {
383 	if (fence_storm) {
384 		if (tile == options.trace_tile)
385 			igt_info(" using fence storm\n");
386 		return;
387 	}
388 
389 	if (copyfunc_seq % 61 == 0
390 			&& options.forced_tiling != I915_TILING_NONE) {
391 		if (tile == options.trace_tile)
392 			igt_info(" using fence storm\n");
393 		fence_storm = num_fences;
394 		copyfunc = blitter_copyfunc;
395 	} else if (copyfunc_seq % 17 == 0) {
396 		if (tile == options.trace_tile)
397 			igt_info(" using cpu\n");
398 		copyfunc = cpu_copyfunc;
399 	} else if (copyfunc_seq % 19 == 0) {
400 		if (tile == options.trace_tile)
401 			igt_info(" using prw\n");
402 		copyfunc = prw_copyfunc;
403 	} else if (copyfunc_seq % 3 == 0 && options.use_render) {
404 		if (tile == options.trace_tile)
405 			igt_info(" using render\n");
406 		copyfunc = render_copyfunc;
407 	} else if (options.use_blt){
408 		if (tile == options.trace_tile)
409 			igt_info(" using blitter\n");
410 		copyfunc = blitter_copyfunc;
411 	} else if (options.use_render){
412 		if (tile == options.trace_tile)
413 			igt_info(" using render\n");
414 		copyfunc = render_copyfunc;
415 	} else {
416 		copyfunc = cpu_copyfunc;
417 	}
418 
419 	copyfunc_seq++;
420 }
421 
fan_out(void)422 static void fan_out(void)
423 {
424 	uint32_t tmp_tile[options.tile_size*options.tile_size];
425 	uint32_t seq = 0;
426 	int i, k;
427 	unsigned tile, buf_idx, x, y;
428 
429 	for (i = 0; i < num_total_tiles; i++) {
430 		tile = i;
431 		buf_idx = tile / options.tiles_per_buf;
432 		tile %= options.tiles_per_buf;
433 
434 		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
435 
436 		for (k = 0; k < options.tile_size*options.tile_size; k++)
437 			tmp_tile[k] = seq++;
438 
439 		if (options.use_cpu_maps)
440 			set_to_cpu_domain(&buffers[current_set][buf_idx], 1);
441 
442 		cpucpy2d(tmp_tile, options.tile_size, 0, 0,
443 			 buffers[current_set][buf_idx].data,
444 			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
445 			 x, y, i);
446 	}
447 
448 	for (i = 0; i < num_total_tiles; i++)
449 		tile_permutation[i] = i;
450 }
451 
fan_in_and_check(void)452 static void fan_in_and_check(void)
453 {
454 	uint32_t tmp_tile[options.tile_size*options.tile_size];
455 	unsigned tile, buf_idx, x, y;
456 	int i;
457 	for (i = 0; i < num_total_tiles; i++) {
458 		tile = tile_permutation[i];
459 		buf_idx = tile / options.tiles_per_buf;
460 		tile %= options.tiles_per_buf;
461 
462 		tile2xy(&buffers[current_set][buf_idx], tile, &x, &y);
463 
464 		if (options.use_cpu_maps)
465 			set_to_cpu_domain(&buffers[current_set][buf_idx], 0);
466 
467 		cpucpy2d(buffers[current_set][buf_idx].data,
468 			 buffers[current_set][buf_idx].stride / sizeof(uint32_t),
469 			 x, y,
470 			 tmp_tile, options.tile_size, 0, 0,
471 			 i);
472 	}
473 }
474 
sanitize_stride(struct igt_buf * buf)475 static void sanitize_stride(struct igt_buf *buf)
476 {
477 
478 	if (igt_buf_height(buf) > options.max_dimension)
479 		buf->stride = buf->size / options.max_dimension;
480 
481 	if (igt_buf_height(buf) < options.tile_size)
482 		buf->stride = buf->size / options.tile_size;
483 
484 	if (igt_buf_width(buf) < options.tile_size)
485 		buf->stride = options.tile_size * sizeof(uint32_t);
486 
487 	igt_assert(buf->stride <= 8192);
488 	igt_assert(igt_buf_width(buf) <= options.max_dimension);
489 	igt_assert(igt_buf_height(buf) <= options.max_dimension);
490 
491 	igt_assert(igt_buf_width(buf) >= options.tile_size);
492 	igt_assert(igt_buf_height(buf) >= options.tile_size);
493 
494 }
495 
init_buffer(struct igt_buf * buf,unsigned size)496 static void init_buffer(struct igt_buf *buf, unsigned size)
497 {
498 	memset(buf, 0, sizeof(*buf));
499 
500 	buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096);
501 	buf->size = size;
502 	igt_assert(buf->bo);
503 	buf->tiling = I915_TILING_NONE;
504 	buf->stride = 4096;
505 	buf->bpp = 32;
506 
507 	sanitize_stride(buf);
508 
509 	if (options.no_hw)
510 		buf->data = malloc(size);
511 	else {
512 		if (options.use_cpu_maps)
513 			drm_intel_bo_map(buf->bo, 1);
514 		else
515 			drm_intel_gem_bo_map_gtt(buf->bo);
516 		buf->data = buf->bo->virtual;
517 	}
518 
519 	buf->num_tiles = options.tiles_per_buf;
520 }
521 
exchange_buf(void * array,unsigned i,unsigned j)522 static void exchange_buf(void *array, unsigned i, unsigned j)
523 {
524 	struct igt_buf *buf_arr, tmp;
525 	buf_arr = array;
526 
527 	memcpy(&tmp, &buf_arr[i], sizeof(struct igt_buf));
528 	memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct igt_buf));
529 	memcpy(&buf_arr[j], &tmp, sizeof(struct igt_buf));
530 }
531 
532 
init_set(unsigned set)533 static void init_set(unsigned set)
534 {
535 	long int r;
536 	int i;
537 
538 	igt_permute_array(buffers[set], num_buffers, exchange_buf);
539 
540 	if (current_set == 1 && options.gpu_busy_load == 0) {
541 		gpu_busy_load++;
542 		if (gpu_busy_load > 10)
543 			gpu_busy_load = 6;
544 	}
545 
546 	for (i = 0; i < num_buffers; i++) {
547 		r = random();
548 		if ((r & 3) != 0)
549 		    continue;
550 		r >>= 2;
551 
552 		if ((r & 3) != 0)
553 			buffers[set][i].tiling = I915_TILING_X;
554 		else
555 			buffers[set][i].tiling = I915_TILING_NONE;
556 		r >>= 2;
557 		if (options.forced_tiling >= 0)
558 			buffers[set][i].tiling = options.forced_tiling;
559 
560 		if (buffers[set][i].tiling == I915_TILING_NONE) {
561 			/* min 64 byte stride */
562 			r %= 8;
563 			buffers[set][i].stride = 64 * (1 << r);
564 		} else if (IS_GEN2(devid)) {
565 			/* min 128 byte stride */
566 			r %= 7;
567 			buffers[set][i].stride = 128 * (1 << r);
568 		} else {
569 			/* min 512 byte stride */
570 			r %= 5;
571 			buffers[set][i].stride = 512 * (1 << r);
572 		}
573 
574 		sanitize_stride(&buffers[set][i]);
575 
576 		gem_set_tiling(drm_fd, buffers[set][i].bo->handle,
577 			       buffers[set][i].tiling,
578 			       buffers[set][i].stride);
579 
580 		if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf)
581 			igt_info("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, options.trace_tile, buffers[set][i].tiling, buffers[set][i].stride);
582 	}
583 }
584 
exchange_uint(void * array,unsigned i,unsigned j)585 static void exchange_uint(void *array, unsigned i, unsigned j)
586 {
587 	unsigned *i_arr = array;
588 
589 	igt_swap(i_arr[i], i_arr[j]);
590 }
591 
copy_tiles(unsigned * permutation)592 static void copy_tiles(unsigned *permutation)
593 {
594 	unsigned src_tile, src_buf_idx, src_x, src_y;
595 	unsigned dst_tile, dst_buf_idx, dst_x, dst_y;
596 	struct igt_buf *src_buf, *dst_buf;
597 	int i, idx;
598 	for (i = 0; i < num_total_tiles; i++) {
599 		/* tile_permutation is independent of current_permutation, so
600 		 * abuse it to randomize the order of the src bos */
601 		idx  = tile_permutation[i];
602 		src_buf_idx = idx / options.tiles_per_buf;
603 		src_tile = idx % options.tiles_per_buf;
604 		src_buf = &buffers[current_set][src_buf_idx];
605 
606 		tile2xy(src_buf, src_tile, &src_x, &src_y);
607 
608 		dst_buf_idx = permutation[idx] / options.tiles_per_buf;
609 		dst_tile = permutation[idx] % options.tiles_per_buf;
610 		dst_buf = &buffers[target_set][dst_buf_idx];
611 
612 		tile2xy(dst_buf, dst_tile, &dst_x, &dst_y);
613 
614 		if (options.trace_tile == i)
615 			igt_info("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, tile_permutation[i], src_buf_idx, src_tile, permutation[idx], dst_buf_idx, dst_tile);
616 
617 		if (options.no_hw) {
618 			cpucpy2d(src_buf->data,
619 				 src_buf->stride / sizeof(uint32_t),
620 				 src_x, src_y,
621 				 dst_buf->data,
622 				 dst_buf->stride / sizeof(uint32_t),
623 				 dst_x, dst_y,
624 				 i);
625 		} else {
626 			next_copyfunc(i);
627 
628 			copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y,
629 				 i);
630 		}
631 	}
632 
633 	intel_batchbuffer_flush(batch);
634 }
635 
sanitize_tiles_per_buf(void)636 static void sanitize_tiles_per_buf(void)
637 {
638 	if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size))
639 		options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size);
640 }
641 
parse_options(int opt,int opt_index,void * data)642 static int parse_options(int opt, int opt_index, void *data)
643 {
644 	int tmp;
645 
646 	switch(opt) {
647 	case 'd':
648 		options.no_hw = 1;
649 		igt_info("no-hw debug mode\n");
650 		break;
651 	case 'S':
652 		options.use_signal_helper = 0;
653 		igt_info("disabling that pesky nuisance who keeps interrupting us\n");
654 		break;
655 	case 's':
656 		tmp = atoi(optarg);
657 		if (tmp < options.tile_size*8192)
658 			igt_info("scratch buffer size needs to be at least %i\n", options.tile_size * 8192);
659 		else if (tmp & (tmp - 1)) {
660 			igt_info("scratch buffer size needs to be a power-of-two\n");
661 		} else {
662 			igt_info("fixed scratch buffer size to %u\n", tmp);
663 			options.scratch_buf_size = tmp;
664 			sanitize_tiles_per_buf();
665 		}
666 		break;
667 	case 'g':
668 		tmp = atoi(optarg);
669 		if (tmp < 0 || tmp > 10)
670 			igt_info("gpu busy load needs to be bigger than 0 and smaller than 10\n");
671 		else {
672 			igt_info("gpu busy load factor set to %i\n", tmp);
673 			gpu_busy_load = options.gpu_busy_load = tmp;
674 		}
675 		break;
676 	case 'c':
677 		options.num_buffers = atoi(optarg);
678 		igt_info("buffer count set to %i\n", options.num_buffers);
679 		break;
680 	case 't':
681 		options.trace_tile = atoi(optarg);
682 		igt_info("tracing tile %i\n", options.trace_tile);
683 		break;
684 	case 'r':
685 		options.use_render = 0;
686 		igt_info("disabling render copy\n");
687 		break;
688 	case 'b':
689 		options.use_blt = 0;
690 		igt_info("disabling blt copy\n");
691 		break;
692 	case 'u':
693 		options.forced_tiling = I915_TILING_NONE;
694 		igt_info("disabling tiling\n");
695 		break;
696 	case 'x':
697 		if (options.use_cpu_maps) {
698 			igt_info("tiling not possible with cpu maps\n");
699 		} else {
700 			options.forced_tiling = I915_TILING_X;
701 			igt_info("using only X-tiling\n");
702 		}
703 		break;
704 	case 'm':
705 		options.use_cpu_maps = 1;
706 		options.forced_tiling = I915_TILING_NONE;
707 		igt_info("disabling tiling\n");
708 		break;
709 	case 'o':
710 		options.total_rounds = atoi(optarg);
711 		igt_info("total rounds %i\n", options.total_rounds);
712 		break;
713 	case 'f':
714 		options.fail = 0;
715 		igt_info("not failing when detecting errors\n");
716 		break;
717 	case 'p':
718 		options.tiles_per_buf = atoi(optarg);
719 		igt_info("tiles per buffer %i\n", options.tiles_per_buf);
720 		break;
721 	case DUCTAPE:
722 		options.ducttape = 0;
723 		igt_info("applying duct-tape\n");
724 		break;
725 	case TILESZ:
726 		options.tile_size = atoi(optarg);
727 		sanitize_tiles_per_buf();
728 		igt_info("til size %i\n", options.tile_size);
729 		break;
730 	case CHCK_RENDER:
731 		options.check_render_cpyfn = 1;
732 		igt_info("checking render copy function\n");
733 		break;
734 	default:
735 		return IGT_OPT_HANDLER_ERROR;
736 	}
737 
738 	/* actually 32767, according to docs, but that kills our nice pot calculations. */
739 	options.max_dimension = 16*1024;
740 	if (options.use_render) {
741 		if (IS_GEN2(devid) || IS_GEN3(devid))
742 			options.max_dimension = 2048;
743 		else
744 			options.max_dimension = 8192;
745 	}
746 	igt_info("Limiting buffer to %dx%d\n", options.max_dimension, options.max_dimension);
747 
748 	return IGT_OPT_HANDLER_SUCCESS;
749 }
750 
init(void)751 static void init(void)
752 {
753 	int i;
754 	unsigned tmp;
755 
756 	if (options.num_buffers == 0) {
757 		tmp = gem_aperture_size(drm_fd);
758 		tmp = min(256 * (1024 * 1024), tmp);
759 		num_buffers = 2 * tmp / options.scratch_buf_size / 3;
760 		num_buffers /= 2;
761 		igt_info("using %u buffers\n", num_buffers);
762 	} else
763 		num_buffers = options.num_buffers;
764 
765 	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
766 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
767 	drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr);
768 	num_fences = gem_available_fences(drm_fd);
769 	igt_assert_lt(4, num_fences);
770 	batch = intel_batchbuffer_alloc(bufmgr, devid);
771 
772 	busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096);
773 	if (options.forced_tiling >= 0)
774 		gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096);
775 
776 	for (i = 0; i < num_buffers; i++) {
777 		init_buffer(&buffers[0][i], options.scratch_buf_size);
778 		init_buffer(&buffers[1][i], options.scratch_buf_size);
779 
780 		num_total_tiles += buffers[0][i].num_tiles;
781 	}
782 	current_set = 0;
783 
784 	/* just in case it helps reproducability */
785 	srandom(0xdeadbeef);
786 }
787 
check_render_copyfunc(void)788 static void check_render_copyfunc(void)
789 {
790 	struct igt_buf src, dst;
791 	uint32_t *ptr;
792 	int i, j, pass;
793 
794 	if (!options.check_render_cpyfn)
795 		return;
796 
797 	init_buffer(&src, options.scratch_buf_size);
798 	init_buffer(&dst, options.scratch_buf_size);
799 
800 	for (pass = 0; pass < 16; pass++) {
801 		int sx = random() % (igt_buf_width(&src)-options.tile_size);
802 		int sy = random() % (igt_buf_height(&src)-options.tile_size);
803 		int dx = random() % (igt_buf_width(&dst)-options.tile_size);
804 		int dy = random() % (igt_buf_height(&dst)-options.tile_size);
805 
806 		if (options.use_cpu_maps)
807 			set_to_cpu_domain(&src, 1);
808 
809 		memset(src.data, 0xff, options.scratch_buf_size);
810 		for (j = 0; j < options.tile_size; j++) {
811 			ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride);
812 			for (i = 0; i < options.tile_size; i++)
813 				ptr[i] = j * options.tile_size + i;
814 		}
815 
816 		render_copyfunc(&src, sx, sy, &dst, dx, dy, 0);
817 
818 		if (options.use_cpu_maps)
819 			set_to_cpu_domain(&dst, 0);
820 
821 		for (j = 0; j < options.tile_size; j++) {
822 			ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride);
823 			for (i = 0; i < options.tile_size; i++)
824 				if (ptr[i] != j * options.tile_size + i) {
825 					igt_info("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", i, j, ptr[i], j * options.tile_size + i);
826 				}
827 		}
828 	}
829 }
830 
831 static struct option long_options[] = {
832 	{"no-hw", 0, 0, 'd'},
833 	{"buf-size", 1, 0, 's'},
834 	{"gpu-busy-load", 1, 0, 'g'},
835 	{"no-signals", 0, 0, 'S'},
836 	{"buffer-count", 1, 0, 'c'},
837 	{"trace-tile", 1, 0, 't'},
838 	{"disable-blt", 0, 0, 'b'},
839 	{"disable-render", 0, 0, 'r'},
840 	{"untiled", 0, 0, 'u'},
841 	{"x-tiled", 0, 0, 'x'},
842 	{"use-cpu-maps", 0, 0, 'm'},
843 	{"rounds", 1, 0, 'o'},
844 	{"no-fail", 0, 0, 'f'},
845 	{"tiles-per-buf", 0, 0, 'p'},
846 	{"remove-duct-tape", 0, 0, DUCTAPE},
847 	{"tile-size", 1, 0, TILESZ},
848 	{"check-render-cpyfn", 0, 0, CHCK_RENDER},
849 	{NULL, 0, 0, 0},
850 };
851 
852 igt_simple_main_args("ds:g:c:t:rbuxmo:fp:",
853 		     long_options, NULL, parse_options, NULL)
854 {
855 	int i, j;
856 	unsigned *current_permutation, *tmp_permutation;
857 
858 	drm_fd = drm_open_driver(DRIVER_INTEL);
859 	devid = intel_get_drm_devid(drm_fd);
860 
861 	/* start our little helper early before too may allocations occur */
862 	if (options.use_signal_helper)
863 		igt_fork_signal_helper();
864 
865 	init();
866 
867 	check_render_copyfunc();
868 
869 	tile_permutation = malloc(num_total_tiles*sizeof(uint32_t));
870 	current_permutation = malloc(num_total_tiles*sizeof(uint32_t));
871 	tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t));
872 	igt_assert(tile_permutation);
873 	igt_assert(current_permutation);
874 	igt_assert(tmp_permutation);
875 
876 	fan_out();
877 
878 	for (i = 0; i < options.total_rounds; i++) {
879 		igt_info("round %i\n", i);
880 		if (i % 64 == 63) {
881 			fan_in_and_check();
882 			igt_info("everything correct after %i rounds\n", i + 1);
883 		}
884 
885 		target_set = (current_set + 1) & 1;
886 		init_set(target_set);
887 
888 		for (j = 0; j < num_total_tiles; j++)
889 			current_permutation[j] = j;
890 		igt_permute_array(current_permutation, num_total_tiles, exchange_uint);
891 
892 		copy_tiles(current_permutation);
893 
894 		memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles);
895 
896 		/* accumulate the permutations */
897 		for (j = 0; j < num_total_tiles; j++)
898 			tile_permutation[j] = current_permutation[tmp_permutation[j]];
899 
900 		current_set = target_set;
901 	}
902 
903 	fan_in_and_check();
904 
905 	igt_info("num failed tiles %u, max incoherent bytes %zd\n", stats.num_failed, stats.max_failed_reads * sizeof(uint32_t));
906 
907 	intel_batchbuffer_free(batch);
908 	drm_intel_bufmgr_destroy(bufmgr);
909 
910 	close(drm_fd);
911 
912 	igt_stop_signal_helper();
913 }
914